hcl/scanner: scan heredocs

2015-11-10 14:01:56 -08:00 · 2015-11-10 14:01:56 -08:00 · 82ad2beb52
commit 82ad2beb52
parent e5d4045cf0
3 changed files with 78 additions and 8 deletions
--- a/hcl/scanner/scanner.go
+++ b/hcl/scanner/scanner.go
@ -174,6 +174,9 @@ func (s *Scanner) Scan() token.Token {
 				ch = s.scanMantissa(ch)
 				ch = s.scanExponent(ch)
 			}
+		case '<':
+			tok = token.HEREDOC
+			s.scanHeredoc()
 		case '[':
 			tok = token.LBRACK
 		case ']':
@ -371,6 +374,67 @@ func (s *Scanner) scanExponent(ch rune) rune {
 	return ch
 }

+// scanHeredoc scans a heredoc string.
+func (s *Scanner) scanHeredoc() {
+	// Scan the second '<' in example: '<<EOF'
+	if s.next() != '<' {
+		s.err("heredoc expected second '<', didn't see it")
+		return
+	}
+
+	// Get the original offset so we can read just the heredoc ident
+	offs := s.srcPos.Offset
+
+	// Scan the identifier
+	ch := s.next()
+	for isLetter(ch) {
+		ch = s.next()
+	}
+
+	// If we reached an EOF then that is not good
+	if ch == eof {
+		s.err("heredoc not terminated")
+		return
+	}
+
+	// If we didn't reach a newline then that is also not good
+	if ch != '\n' {
+		s.err("invalid characters in heredoc anchor")
+		return
+	}
+
+	// Read the identifier
+	identBytes := s.src[offs : s.srcPos.Offset-s.lastCharLen]
+
+	// Read the actual string value
+	lineStart := s.srcPos.Offset
+	for {
+		ch := s.next()
+
+		// Special newline handling.
+		if ch == '\n' {
+			// Math is fast, so we first compare the byte counts to
+			// see if we have a chance of seeing the same identifier. If those
+			// match, then we compare the string values directly.
+			lineBytesLen := s.srcPos.Offset - s.lastCharLen - lineStart
+			if lineBytesLen == len(identBytes) &&
+				bytes.Equal(identBytes, s.src[lineStart:s.srcPos.Offset-s.lastCharLen]) {
+				break
+			}
+
+			// Not an anchor match, record the start of a new line
+			lineStart = s.srcPos.Offset
+		}
+
+		if ch == eof {
+			s.err("heredoc not terminated")
+			return
+		}
+	}
+
+	return
+}
+
 // scanString scans a quoted string
 func (s *Scanner) scanString() {
 	braces := 0
--- a/hcl/scanner/scanner_test.go
+++ b/hcl/scanner/scanner_test.go
@ -71,6 +71,9 @@ var tokenLists = map[string][]tokenPair{
 		{token.IDENT, "foo६४"},
 		{token.IDENT, "bar９８７６"},
 	},
+	"heredoc": []tokenPair{
+		{token.HEREDOC, "<<EOF\nhello\nworld\nEOF"},
+	},
 	"string": []tokenPair{
 		{token.STRING, `" "`},
 		{token.STRING, `"a"`},
@ -229,6 +232,7 @@ var orderedTokenLists = []string{
 	"operator",
 	"bool",
 	"ident",
+	"heredoc",
 	"string",
 	"number",
 	"float",
--- a/hcl/token/token.go
+++ b/hcl/token/token.go
@ -37,10 +37,11 @@ const (
 	identifier_end

 	operator_beg
-	LBRACK // [
-	LBRACE // {
-	COMMA  // ,
-	PERIOD // .
+	LBRACK  // [
+	LBRACE  // {
+	COMMA   // ,
+	PERIOD  // .
+	HEREDOC // <<

 	RBRACK // ]
 	RBRACE // }
@ -63,10 +64,11 @@ var tokens = [...]string{
 	BOOL:   "BOOL",
 	STRING: "STRING",

-	LBRACK: "LBRACK",
-	LBRACE: "LBRACE",
-	COMMA:  "COMMA",
-	PERIOD: "PERIOD",
+	LBRACK:  "LBRACK",
+	LBRACE:  "LBRACE",
+	COMMA:   "COMMA",
+	PERIOD:  "PERIOD",
+	HEREDOC: "HEREDOC",

 	RBRACK: "RBRACK",
 	RBRACE: "RBRACE",