hcl/hclsyntax: Fix up parsing of flush heredocs

This was implemented a long time ago in the original template parser, but
it was missed in the rewrite of the template parser to make it use a
two-stage parsing strategy.

It's implemented as a post-processing step on the result of the first
stage of parsing, which produces a flat sequence of literal strings,
interpolation markers, and control markers, and prior to the second stage
which matches opening and closing control markers to produce an expression
AST.

It's important to do this at parse time rather than eval time since it is
the static layout of the source code that decides the indentation level,
and so an interpolation marker at the start of a line that itself produces
spaces does not affect the result.
This commit is contained in:
Martin Atkins 2018-12-13 17:22:41 -08:00
parent c33bbe4c25
commit e8dbb16dbc
7 changed files with 252 additions and 5 deletions

View File

@ -1163,6 +1163,62 @@ EOT
cty.TupleVal([]cty.Value{cty.StringVal(" Foo\n Bar\n Baz\n")}),
0,
},
{
`[
<<-EOT
Foo
Bar
Baz
EOT
]
`,
nil,
cty.TupleVal([]cty.Value{cty.StringVal("Foo\nBar\nBaz\n")}),
0,
},
{
`[
<<-EOT
Foo
Bar
Baz
EOT
]
`,
nil,
cty.TupleVal([]cty.Value{cty.StringVal("Foo\n Bar\n Baz\n")}),
0,
},
{
`[
<<-EOT
Foo
Bar
Baz
EOT
]
`,
nil,
cty.TupleVal([]cty.Value{cty.StringVal(" Foo\nBar\n Baz\n")}),
0,
},
{
`[
<<-EOT
Foo
${bar}
Baz
EOT
]
`,
&hcl.EvalContext{
Variables: map[string]cty.Value{
"bar": cty.StringVal(" Bar"), // Spaces in the interpolation result don't affect the outcome
},
},
cty.TupleVal([]cty.Value{cty.StringVal(" Foo\n Bar\n Baz\n")}),
0,
},
{
`unk["baz"]`,

View File

@ -860,7 +860,7 @@ func (p *parser) parseExpressionTerm() (Expression, hcl.Diagnostics) {
case TokenOQuote, TokenOHeredoc:
open := p.Read() // eat opening marker
closer := p.oppositeBracket(open.Type)
exprs, passthru, _, diags := p.parseTemplateInner(closer)
exprs, passthru, _, diags := p.parseTemplateInner(closer, tokenOpensFlushHeredoc(open))
closeRange := p.PrevRange()

View File

@ -2,6 +2,7 @@ package hclsyntax
import (
"fmt"
"github.com/apparentlymart/go-textseg/textseg"
"strings"
"unicode"
@ -10,11 +11,11 @@ import (
)
func (p *parser) ParseTemplate() (Expression, hcl.Diagnostics) {
return p.parseTemplate(TokenEOF)
return p.parseTemplate(TokenEOF, false)
}
func (p *parser) parseTemplate(end TokenType) (Expression, hcl.Diagnostics) {
exprs, passthru, rng, diags := p.parseTemplateInner(end)
func (p *parser) parseTemplate(end TokenType, flushHeredoc bool) (Expression, hcl.Diagnostics) {
exprs, passthru, rng, diags := p.parseTemplateInner(end, flushHeredoc)
if passthru {
if len(exprs) != 1 {
@ -32,8 +33,11 @@ func (p *parser) parseTemplate(end TokenType) (Expression, hcl.Diagnostics) {
}, diags
}
func (p *parser) parseTemplateInner(end TokenType) ([]Expression, bool, hcl.Range, hcl.Diagnostics) {
func (p *parser) parseTemplateInner(end TokenType, flushHeredoc bool) ([]Expression, bool, hcl.Range, hcl.Diagnostics) {
parts, diags := p.parseTemplateParts(end)
if flushHeredoc {
flushHeredocTemplateParts(parts) // Trim off leading spaces on lines per the flush heredoc spec
}
tp := templateParser{
Tokens: parts.Tokens,
SrcRange: parts.SrcRange,
@ -649,6 +653,73 @@ Token:
return ret, diags
}
// flushHeredocTemplateParts modifies in-place the line-leading literal strings
// to apply the flush heredoc processing rule: find the line with the smallest
// number of whitespace characters as prefix and then trim that number of
// characters from all of the lines.
//
// This rule is applied to static tokens rather than to the rendered result,
// so interpolating a string with leading whitespace cannot affect the chosen
// prefix length.
func flushHeredocTemplateParts(parts *templateParts) {
if len(parts.Tokens) == 0 {
// Nothing to do
return
}
const maxInt = int((^uint(0)) >> 1)
minSpaces := maxInt
newline := true
var adjust []*templateLiteralToken
for _, ttok := range parts.Tokens {
if newline {
newline = false
var spaces int
if lit, ok := ttok.(*templateLiteralToken); ok {
orig := lit.Val
trimmed := strings.TrimLeftFunc(orig, unicode.IsSpace)
// If a token is entirely spaces and ends with a newline
// then it's a "blank line" and thus not considered for
// space-prefix-counting purposes.
if len(trimmed) == 0 && strings.HasSuffix(orig, "\n") {
spaces = maxInt
} else {
spaceBytes := len(lit.Val) - len(trimmed)
spaces, _ = textseg.TokenCount([]byte(orig[:spaceBytes]), textseg.ScanGraphemeClusters)
adjust = append(adjust, lit)
}
} else if _, ok := ttok.(*templateEndToken); ok {
break // don't process the end token since it never has spaces before it
}
if spaces < minSpaces {
minSpaces = spaces
}
}
if lit, ok := ttok.(*templateLiteralToken); ok {
if strings.HasSuffix(lit.Val, "\n") {
newline = true // The following token, if any, begins a new line
}
}
}
for _, lit := range adjust {
// Since we want to count space _characters_ rather than space _bytes_,
// we can't just do a straightforward slice operation here and instead
// need to hunt for the split point with a scanner.
valBytes := []byte(lit.Val)
spaceByteCount := 0
for i := 0; i < minSpaces; i++ {
adv, _, _ := textseg.ScanGraphemeClusters(valBytes, true)
spaceByteCount += adv
valBytes = valBytes[adv:]
}
lit.Val = lit.Val[spaceByteCount:]
lit.SrcRange.Start.Column += minSpaces
lit.SrcRange.Start.Byte += spaceByteCount
}
}
type templateParts struct {
Tokens []templateToken
SrcRange hcl.Range

View File

@ -1,6 +1,7 @@
package hclsyntax
import (
"bytes"
"fmt"
"github.com/apparentlymart/go-textseg/textseg"
@ -161,6 +162,13 @@ type heredocInProgress struct {
StartOfLine bool
}
func tokenOpensFlushHeredoc(tok Token) bool {
if tok.Type != TokenOHeredoc {
return false
}
return bytes.HasPrefix(tok.Bytes, []byte{'<', '<', '-'})
}
// checkInvalidTokens does a simple pass across the given tokens and generates
// diagnostics for tokens that should _never_ appear in HCL source. This
// is intended to avoid the need for the parser to have special support

View File

@ -0,0 +1,73 @@
normal = {
basic = <<EOT
Foo
Bar
Baz
EOT
indented = <<EOT
Foo
Bar
Baz
EOT
indented_more = <<EOT
Foo
Bar
Baz
EOT
interp = <<EOT
Foo
${bar}
Baz
EOT
marker_at_suffix = <<EOT
NOT EOT
EOT
}
flush = {
basic = <<-EOT
Foo
Bar
Baz
EOT
indented = <<-EOT
Foo
Bar
Baz
EOT
indented_more = <<-EOT
Foo
Bar
Baz
EOT
indented_less = <<-EOT
Foo
Bar
Baz
EOT
interp = <<-EOT
Foo
${bar}
Baz
EOT
interp_indented_more = <<-EOT
Foo
${bar}
Baz
EOT
interp_indented_less = <<-EOT
Foo
${space_bar}
Baz
EOT
tabs = <<-EOT
Foo
Bar
Baz
EOT
unicode_spaces = <<-EOT
Foo (there's two "em spaces" before Foo there)
Bar
Baz
EOT
}

View File

@ -0,0 +1,14 @@
variables {
bar = "Bar"
space_bar = " Bar"
words = ["Foo", "Bar", "Baz"]
}
object {
attr "normal" {
type = map(string)
}
attr "flush" {
type = map(string)
}
}

View File

@ -0,0 +1,25 @@
result = {
normal = {
basic = "Foo\nBar\nBaz\n"
indented = " Foo\n Bar\n Baz\n"
indented_more = " Foo\n Bar\n Baz\n"
interp = " Foo\n Bar\n Baz\n"
marker_at_suffix = " NOT EOT\n"
}
flush = {
basic = "Foo\nBar\nBaz\n"
indented = "Foo\nBar\nBaz\n"
indented_more = "Foo\n Bar\nBaz\n"
indented_less = " Foo\nBar\n Baz\n"
interp = "Foo\nBar\nBaz\n"
interp_indented_more = "Foo\n Bar\nBaz\n"
interp_indented_less = " Foo\n Bar\n Baz\n"
tabs = "Foo\n Bar\n Baz\n"
unicode_spaces = "Foo (there's two \"em spaces\" before Foo there)\nBar\nBaz\n"
}
}
result_type = object({
normal = map(string)
flush = map(string)
})