Merge pull request #91 from hashicorp/f-indented-heredocs

Add support for indented HEREDOC terminators
This commit is contained in:
James Nugent 2016-03-21 14:46:21 +00:00
commit 2604f3bda7
6 changed files with 113 additions and 15 deletions

View File

@ -91,6 +91,16 @@ func TestDecode_interface(t *testing.T) {
false,
map[string]interface{}{"foo": testhelper.Unix2dos("bar\nbaz\n")},
},
{
"multiline_indented.hcl",
false,
map[string]interface{}{"foo": testhelper.Unix2dos(" bar\n baz\n")},
},
{
"multiline_no_hanging_indent.hcl",
false,
map[string]interface{}{"foo": testhelper.Unix2dos(" baz\n bar\n foo\n")},
},
{
"multiline_no_eof.hcl",
false,

View File

@ -6,6 +6,7 @@ import (
"bytes"
"fmt"
"os"
"regexp"
"unicode"
"unicode/utf8"
@ -376,7 +377,7 @@ func (s *Scanner) scanExponent(ch rune) rune {
return ch
}
// scanHeredoc scans a heredoc string.
// scanHeredoc scans a heredoc string
func (s *Scanner) scanHeredoc() {
// Scan the second '<' in example: '<<EOF'
if s.next() != '<' {
@ -389,6 +390,12 @@ func (s *Scanner) scanHeredoc() {
// Scan the identifier
ch := s.next()
// Indented heredoc syntax
if ch == '-' {
ch = s.next()
}
for isLetter(ch) || isDigit(ch) {
ch = s.next()
}
@ -414,6 +421,17 @@ func (s *Scanner) scanHeredoc() {
// Read the identifier
identBytes := s.src[offs : s.srcPos.Offset-s.lastCharLen]
if len(identBytes) == 0 {
s.err("zero-length heredoc anchor")
return
}
var identRegexp *regexp.Regexp
if identBytes[0] == '-' {
identRegexp = regexp.MustCompile(fmt.Sprintf(`[[:space:]]*%s\z`, identBytes[1:]))
} else {
identRegexp = regexp.MustCompile(fmt.Sprintf(`[[:space:]]*%s\z`, identBytes))
}
// Read the actual string value
lineStart := s.srcPos.Offset
@ -422,12 +440,11 @@ func (s *Scanner) scanHeredoc() {
// Special newline handling.
if ch == '\n' {
// Math is fast, so we first compare the byte counts to
// see if we have a chance of seeing the same identifier. If those
// match, then we compare the string values directly.
// Math is fast, so we first compare the byte counts to see if we have a chance
// of seeing the same identifier - if the length is less than the number of bytes
// in the identifier, this cannot be a valid terminator.
lineBytesLen := s.srcPos.Offset - s.lastCharLen - lineStart
if lineBytesLen == len(identBytes) &&
bytes.Equal(identBytes, s.src[lineStart:s.srcPos.Offset-s.lastCharLen]) {
if lineBytesLen >= len(identBytes) && identRegexp.Match(s.src[lineStart:s.srcPos.Offset-s.lastCharLen]) {
break
}

View File

@ -5,8 +5,9 @@ import (
"fmt"
"testing"
"github.com/hashicorp/hcl/hcl/token"
"strings"
"github.com/hashicorp/hcl/hcl/token"
)
var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
@ -377,6 +378,14 @@ func TestRealExample(t *testing.T) {
Main interface
EOF
}
network_interface {
device_index = 1
description = <<-EOF
Outer text
Indented text
EOF
}
}`
literals := []struct {
@ -435,6 +444,15 @@ EOF
{token.ASSIGN, `=`},
{token.HEREDOC, "<<EOF\nMain interface\nEOF\n"},
{token.RBRACE, `}`},
{token.IDENT, `network_interface`},
{token.LBRACE, `{`},
{token.IDENT, `device_index`},
{token.ASSIGN, `=`},
{token.NUMBER, `1`},
{token.IDENT, `description`},
{token.ASSIGN, `=`},
{token.HEREDOC, "<<-EOF\n\t\t\tOuter text\n\t\t\t\tIndented text\n\t\t\tEOF\n"},
{token.RBRACE, `}`},
{token.RBRACE, `}`},
{token.EOF, ``},
}
@ -447,7 +465,7 @@ EOF
}
if l.literal != tok.Text {
t.Errorf("got: %s want %s\n", tok, l.literal)
t.Errorf("got:\n%+v\n%s\n want:\n%+v\n%s\n", []byte(tok.String()), tok, []byte(l.literal), l.literal)
}
}

View File

@ -142,13 +142,7 @@ func (t Token) Value() interface{} {
case IDENT:
return t.Text
case HEREDOC:
// We need to find the end of the marker
idx := strings.IndexByte(t.Text, '\n')
if idx == -1 {
panic("heredoc doesn't contain newline")
}
return string(t.Text[idx+1 : len(t.Text)-idx+1])
return unindentHeredoc(t.Text)
case STRING:
// Determine the Unquote method to use. If it came from JSON,
// then we need to use the built-in unquote since we have to
@ -168,3 +162,53 @@ func (t Token) Value() interface{} {
panic(fmt.Sprintf("unimplemented Value for type: %s", t.Type))
}
}
// unindentHeredoc returns the string content of a HEREDOC if it is started with <<
// and the content of a HEREDOC with the hanging indent removed if it is started with
// a <<-, and the terminating line is at least as indented as the least indented line.
func unindentHeredoc(heredoc string) string {
// We need to find the end of the marker
idx := strings.IndexByte(heredoc, '\n')
if idx == -1 {
panic("heredoc doesn't contain newline")
}
unindent := heredoc[2] == '-'
// We can optimize if the heredoc isn't marked for indentation
if !unindent {
return string(heredoc[idx+1 : len(heredoc)-idx+1])
}
// We need to unindent each line based on the indentation level of the marker
lines := strings.Split(string(heredoc[idx+1:len(heredoc)-idx+2]), "\n")
whitespacePrefix := lines[len(lines)-1]
isIndented := true
for _, v := range lines {
if strings.HasPrefix(v, whitespacePrefix) {
continue
}
isIndented = false
break
}
// If all lines are not at least as indented as the terminating mark, return the
// heredoc as is, but trim the leading space from the marker on the final line.
if !isIndented {
return strings.TrimRight(string(heredoc[idx+1:len(heredoc)-idx+1]), " \t")
}
unindentedLines := make([]string, len(lines))
for k, v := range lines {
if k == len(lines)-1 {
unindentedLines[k] = ""
break
}
unindentedLines[k] = strings.TrimPrefix(v, whitespacePrefix)
}
return strings.Join(unindentedLines, "\n")
}

View File

@ -0,0 +1,4 @@
foo = <<-EOF
bar
baz
EOF

View File

@ -0,0 +1,5 @@
foo = <<-EOF
baz
bar
foo
EOF