Merge pull request #91 from hashicorp/f-indented-heredocs

Add support for indented HEREDOC terminators
2016-03-21 14:46:21 +00:00 · 2016-03-21 14:46:21 +00:00 · 2604f3bda7
commit 2604f3bda7
parent d27ef81edb 2584e26c89
6 changed files with 113 additions and 15 deletions
--- a/decoder_test.go
+++ b/decoder_test.go
@ -91,6 +91,16 @@ func TestDecode_interface(t *testing.T) {
 			false,
 			map[string]interface{}{"foo": testhelper.Unix2dos("bar\nbaz\n")},
 		},
+		{
+			"multiline_indented.hcl",
+			false,
+			map[string]interface{}{"foo": testhelper.Unix2dos("  bar\n  baz\n")},
+		},
+		{
+			"multiline_no_hanging_indent.hcl",
+			false,
+			map[string]interface{}{"foo": testhelper.Unix2dos("  baz\n    bar\n      foo\n")},
+		},
 		{
 			"multiline_no_eof.hcl",
 			false,
--- a/hcl/scanner/scanner.go
+++ b/hcl/scanner/scanner.go
@ -6,6 +6,7 @@ import (
 	"bytes"
 	"fmt"
 	"os"
+	"regexp"
 	"unicode"
 	"unicode/utf8"

@ -376,7 +377,7 @@ func (s *Scanner) scanExponent(ch rune) rune {
 	return ch
 }

-// scanHeredoc scans a heredoc string.
+// scanHeredoc scans a heredoc string
 func (s *Scanner) scanHeredoc() {
 	// Scan the second '<' in example: '<<EOF'
 	if s.next() != '<' {
@ -389,6 +390,12 @@ func (s *Scanner) scanHeredoc() {

 	// Scan the identifier
 	ch := s.next()
+
+	// Indented heredoc syntax
+	if ch == '-' {
+		ch = s.next()
+	}
+
 	for isLetter(ch) || isDigit(ch) {
 		ch = s.next()
 	}
@ -414,6 +421,17 @@ func (s *Scanner) scanHeredoc() {

 	// Read the identifier
 	identBytes := s.src[offs : s.srcPos.Offset-s.lastCharLen]
+	if len(identBytes) == 0 {
+		s.err("zero-length heredoc anchor")
+		return
+	}
+
+	var identRegexp *regexp.Regexp
+	if identBytes[0] == '-' {
+		identRegexp = regexp.MustCompile(fmt.Sprintf(`[[:space:]]*%s\z`, identBytes[1:]))
+	} else {
+		identRegexp = regexp.MustCompile(fmt.Sprintf(`[[:space:]]*%s\z`, identBytes))
+	}

 	// Read the actual string value
 	lineStart := s.srcPos.Offset
@ -422,12 +440,11 @@ func (s *Scanner) scanHeredoc() {

 		// Special newline handling.
 		if ch == '\n' {
-			// Math is fast, so we first compare the byte counts to
-			// see if we have a chance of seeing the same identifier. If those
-			// match, then we compare the string values directly.
+			// Math is fast, so we first compare the byte counts to see if we have a chance
+			// of seeing the same identifier - if the length is less than the number of bytes
+			// in the identifier, this cannot be a valid terminator.
 			lineBytesLen := s.srcPos.Offset - s.lastCharLen - lineStart
-			if lineBytesLen == len(identBytes) &&
-				bytes.Equal(identBytes, s.src[lineStart:s.srcPos.Offset-s.lastCharLen]) {
+			if lineBytesLen >= len(identBytes) && identRegexp.Match(s.src[lineStart:s.srcPos.Offset-s.lastCharLen]) {
 				break
 			}

--- a/hcl/scanner/scanner_test.go
+++ b/hcl/scanner/scanner_test.go
@ -5,8 +5,9 @@ import (
 	"fmt"
 	"testing"

-	"github.com/hashicorp/hcl/hcl/token"
 	"strings"
+
+	"github.com/hashicorp/hcl/hcl/token"
 )

 var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
@ -377,6 +378,14 @@ func TestRealExample(t *testing.T) {
 Main interface
 EOF
 	    }
+	    
+		network_interface {
+	        device_index = 1
+	        description = <<-EOF
+			Outer text
+				Indented text
+			EOF
+		}
 	}`

 	literals := []struct {
@ -435,6 +444,15 @@ EOF
 		{token.ASSIGN, `=`},
 		{token.HEREDOC, "<<EOF\nMain interface\nEOF\n"},
 		{token.RBRACE, `}`},
+		{token.IDENT, `network_interface`},
+		{token.LBRACE, `{`},
+		{token.IDENT, `device_index`},
+		{token.ASSIGN, `=`},
+		{token.NUMBER, `1`},
+		{token.IDENT, `description`},
+		{token.ASSIGN, `=`},
+		{token.HEREDOC, "<<-EOF\n\t\t\tOuter text\n\t\t\t\tIndented text\n\t\t\tEOF\n"},
+		{token.RBRACE, `}`},
 		{token.RBRACE, `}`},
 		{token.EOF, ``},
 	}
@ -447,7 +465,7 @@ EOF
 		}

 		if l.literal != tok.Text {
-			t.Errorf("got: %s want %s\n", tok, l.literal)
+			t.Errorf("got:\n%+v\n%s\n want:\n%+v\n%s\n", []byte(tok.String()), tok, []byte(l.literal), l.literal)
 		}
 	}

--- a/hcl/token/token.go
+++ b/hcl/token/token.go
@ -142,13 +142,7 @@ func (t Token) Value() interface{} {
 	case IDENT:
 		return t.Text
 	case HEREDOC:
-		// We need to find the end of the marker
-		idx := strings.IndexByte(t.Text, '\n')
-		if idx == -1 {
-			panic("heredoc doesn't contain newline")
-		}
-
-		return string(t.Text[idx+1 : len(t.Text)-idx+1])
+		return unindentHeredoc(t.Text)
 	case STRING:
 		// Determine the Unquote method to use. If it came from JSON,
 		// then we need to use the built-in unquote since we have to
@ -168,3 +162,53 @@ func (t Token) Value() interface{} {
 		panic(fmt.Sprintf("unimplemented Value for type: %s", t.Type))
 	}
 }
+
+// unindentHeredoc returns the string content of a HEREDOC if it is started with <<
+// and the content of a HEREDOC with the hanging indent removed if it is started with
+// a <<-, and the terminating line is at least as indented as the least indented line.
+func unindentHeredoc(heredoc string) string {
+	// We need to find the end of the marker
+	idx := strings.IndexByte(heredoc, '\n')
+	if idx == -1 {
+		panic("heredoc doesn't contain newline")
+	}
+
+	unindent := heredoc[2] == '-'
+
+	// We can optimize if the heredoc isn't marked for indentation
+	if !unindent {
+		return string(heredoc[idx+1 : len(heredoc)-idx+1])
+	}
+
+	// We need to unindent each line based on the indentation level of the marker
+	lines := strings.Split(string(heredoc[idx+1:len(heredoc)-idx+2]), "\n")
+	whitespacePrefix := lines[len(lines)-1]
+
+	isIndented := true
+	for _, v := range lines {
+		if strings.HasPrefix(v, whitespacePrefix) {
+			continue
+		}
+
+		isIndented = false
+		break
+	}
+
+	// If all lines are not at least as indented as the terminating mark, return the
+	// heredoc as is, but trim the leading space from the marker on the final line.
+	if !isIndented {
+		return strings.TrimRight(string(heredoc[idx+1:len(heredoc)-idx+1]), " \t")
+	}
+
+	unindentedLines := make([]string, len(lines))
+	for k, v := range lines {
+		if k == len(lines)-1 {
+			unindentedLines[k] = ""
+			break
+		}
+
+		unindentedLines[k] = strings.TrimPrefix(v, whitespacePrefix)
+	}
+
+	return strings.Join(unindentedLines, "\n")
+}
--- a/test-fixtures/multiline_indented.hcl
+++ b/test-fixtures/multiline_indented.hcl
@ -0,0 +1,4 @@
+foo = <<-EOF
+        bar
+        baz
+      EOF
--- a/test-fixtures/multiline_no_hanging_indent.hcl
+++ b/test-fixtures/multiline_no_hanging_indent.hcl
@ -0,0 +1,5 @@
+foo = <<-EOF
+  baz
+    bar
+      foo
+      EOF