hcl/hclsyntax: Fix up parsing of flush heredocs
This was implemented a long time ago in the original template parser, but it was missed in the rewrite of the template parser to make it use a two-stage parsing strategy. It's implemented as a post-processing step on the result of the first stage of parsing, which produces a flat sequence of literal strings, interpolation markers, and control markers, and prior to the second stage which matches opening and closing control markers to produce an expression AST. It's important to do this at parse time rather than eval time since it is the static layout of the source code that decides the indentation level, and so an interpolation marker at the start of a line that itself produces spaces does not affect the result.
This commit is contained in:
parent
c33bbe4c25
commit
e8dbb16dbc
@ -1163,6 +1163,62 @@ EOT
|
||||
cty.TupleVal([]cty.Value{cty.StringVal(" Foo\n Bar\n Baz\n")}),
|
||||
0,
|
||||
},
|
||||
{
|
||||
`[
|
||||
<<-EOT
|
||||
Foo
|
||||
Bar
|
||||
Baz
|
||||
EOT
|
||||
]
|
||||
`,
|
||||
nil,
|
||||
cty.TupleVal([]cty.Value{cty.StringVal("Foo\nBar\nBaz\n")}),
|
||||
0,
|
||||
},
|
||||
{
|
||||
`[
|
||||
<<-EOT
|
||||
Foo
|
||||
Bar
|
||||
Baz
|
||||
EOT
|
||||
]
|
||||
`,
|
||||
nil,
|
||||
cty.TupleVal([]cty.Value{cty.StringVal("Foo\n Bar\n Baz\n")}),
|
||||
0,
|
||||
},
|
||||
{
|
||||
`[
|
||||
<<-EOT
|
||||
Foo
|
||||
Bar
|
||||
Baz
|
||||
EOT
|
||||
]
|
||||
`,
|
||||
nil,
|
||||
cty.TupleVal([]cty.Value{cty.StringVal(" Foo\nBar\n Baz\n")}),
|
||||
0,
|
||||
},
|
||||
{
|
||||
`[
|
||||
<<-EOT
|
||||
Foo
|
||||
${bar}
|
||||
Baz
|
||||
EOT
|
||||
]
|
||||
`,
|
||||
&hcl.EvalContext{
|
||||
Variables: map[string]cty.Value{
|
||||
"bar": cty.StringVal(" Bar"), // Spaces in the interpolation result don't affect the outcome
|
||||
},
|
||||
},
|
||||
cty.TupleVal([]cty.Value{cty.StringVal(" Foo\n Bar\n Baz\n")}),
|
||||
0,
|
||||
},
|
||||
|
||||
{
|
||||
`unk["baz"]`,
|
||||
|
@ -860,7 +860,7 @@ func (p *parser) parseExpressionTerm() (Expression, hcl.Diagnostics) {
|
||||
case TokenOQuote, TokenOHeredoc:
|
||||
open := p.Read() // eat opening marker
|
||||
closer := p.oppositeBracket(open.Type)
|
||||
exprs, passthru, _, diags := p.parseTemplateInner(closer)
|
||||
exprs, passthru, _, diags := p.parseTemplateInner(closer, tokenOpensFlushHeredoc(open))
|
||||
|
||||
closeRange := p.PrevRange()
|
||||
|
||||
|
@ -2,6 +2,7 @@ package hclsyntax
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/apparentlymart/go-textseg/textseg"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
@ -10,11 +11,11 @@ import (
|
||||
)
|
||||
|
||||
func (p *parser) ParseTemplate() (Expression, hcl.Diagnostics) {
|
||||
return p.parseTemplate(TokenEOF)
|
||||
return p.parseTemplate(TokenEOF, false)
|
||||
}
|
||||
|
||||
func (p *parser) parseTemplate(end TokenType) (Expression, hcl.Diagnostics) {
|
||||
exprs, passthru, rng, diags := p.parseTemplateInner(end)
|
||||
func (p *parser) parseTemplate(end TokenType, flushHeredoc bool) (Expression, hcl.Diagnostics) {
|
||||
exprs, passthru, rng, diags := p.parseTemplateInner(end, flushHeredoc)
|
||||
|
||||
if passthru {
|
||||
if len(exprs) != 1 {
|
||||
@ -32,8 +33,11 @@ func (p *parser) parseTemplate(end TokenType) (Expression, hcl.Diagnostics) {
|
||||
}, diags
|
||||
}
|
||||
|
||||
func (p *parser) parseTemplateInner(end TokenType) ([]Expression, bool, hcl.Range, hcl.Diagnostics) {
|
||||
func (p *parser) parseTemplateInner(end TokenType, flushHeredoc bool) ([]Expression, bool, hcl.Range, hcl.Diagnostics) {
|
||||
parts, diags := p.parseTemplateParts(end)
|
||||
if flushHeredoc {
|
||||
flushHeredocTemplateParts(parts) // Trim off leading spaces on lines per the flush heredoc spec
|
||||
}
|
||||
tp := templateParser{
|
||||
Tokens: parts.Tokens,
|
||||
SrcRange: parts.SrcRange,
|
||||
@ -649,6 +653,73 @@ Token:
|
||||
return ret, diags
|
||||
}
|
||||
|
||||
// flushHeredocTemplateParts modifies in-place the line-leading literal strings
|
||||
// to apply the flush heredoc processing rule: find the line with the smallest
|
||||
// number of whitespace characters as prefix and then trim that number of
|
||||
// characters from all of the lines.
|
||||
//
|
||||
// This rule is applied to static tokens rather than to the rendered result,
|
||||
// so interpolating a string with leading whitespace cannot affect the chosen
|
||||
// prefix length.
|
||||
func flushHeredocTemplateParts(parts *templateParts) {
|
||||
if len(parts.Tokens) == 0 {
|
||||
// Nothing to do
|
||||
return
|
||||
}
|
||||
|
||||
const maxInt = int((^uint(0)) >> 1)
|
||||
|
||||
minSpaces := maxInt
|
||||
newline := true
|
||||
var adjust []*templateLiteralToken
|
||||
for _, ttok := range parts.Tokens {
|
||||
if newline {
|
||||
newline = false
|
||||
var spaces int
|
||||
if lit, ok := ttok.(*templateLiteralToken); ok {
|
||||
orig := lit.Val
|
||||
trimmed := strings.TrimLeftFunc(orig, unicode.IsSpace)
|
||||
// If a token is entirely spaces and ends with a newline
|
||||
// then it's a "blank line" and thus not considered for
|
||||
// space-prefix-counting purposes.
|
||||
if len(trimmed) == 0 && strings.HasSuffix(orig, "\n") {
|
||||
spaces = maxInt
|
||||
} else {
|
||||
spaceBytes := len(lit.Val) - len(trimmed)
|
||||
spaces, _ = textseg.TokenCount([]byte(orig[:spaceBytes]), textseg.ScanGraphemeClusters)
|
||||
adjust = append(adjust, lit)
|
||||
}
|
||||
} else if _, ok := ttok.(*templateEndToken); ok {
|
||||
break // don't process the end token since it never has spaces before it
|
||||
}
|
||||
if spaces < minSpaces {
|
||||
minSpaces = spaces
|
||||
}
|
||||
}
|
||||
if lit, ok := ttok.(*templateLiteralToken); ok {
|
||||
if strings.HasSuffix(lit.Val, "\n") {
|
||||
newline = true // The following token, if any, begins a new line
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, lit := range adjust {
|
||||
// Since we want to count space _characters_ rather than space _bytes_,
|
||||
// we can't just do a straightforward slice operation here and instead
|
||||
// need to hunt for the split point with a scanner.
|
||||
valBytes := []byte(lit.Val)
|
||||
spaceByteCount := 0
|
||||
for i := 0; i < minSpaces; i++ {
|
||||
adv, _, _ := textseg.ScanGraphemeClusters(valBytes, true)
|
||||
spaceByteCount += adv
|
||||
valBytes = valBytes[adv:]
|
||||
}
|
||||
lit.Val = lit.Val[spaceByteCount:]
|
||||
lit.SrcRange.Start.Column += minSpaces
|
||||
lit.SrcRange.Start.Byte += spaceByteCount
|
||||
}
|
||||
}
|
||||
|
||||
type templateParts struct {
|
||||
Tokens []templateToken
|
||||
SrcRange hcl.Range
|
||||
|
@ -1,6 +1,7 @@
|
||||
package hclsyntax
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
|
||||
"github.com/apparentlymart/go-textseg/textseg"
|
||||
@ -161,6 +162,13 @@ type heredocInProgress struct {
|
||||
StartOfLine bool
|
||||
}
|
||||
|
||||
func tokenOpensFlushHeredoc(tok Token) bool {
|
||||
if tok.Type != TokenOHeredoc {
|
||||
return false
|
||||
}
|
||||
return bytes.HasPrefix(tok.Bytes, []byte{'<', '<', '-'})
|
||||
}
|
||||
|
||||
// checkInvalidTokens does a simple pass across the given tokens and generates
|
||||
// diagnostics for tokens that should _never_ appear in HCL source. This
|
||||
// is intended to avoid the need for the parser to have special support
|
||||
|
73
specsuite/tests/expressions/heredoc.hcl
Normal file
73
specsuite/tests/expressions/heredoc.hcl
Normal file
@ -0,0 +1,73 @@
|
||||
normal = {
|
||||
basic = <<EOT
|
||||
Foo
|
||||
Bar
|
||||
Baz
|
||||
EOT
|
||||
indented = <<EOT
|
||||
Foo
|
||||
Bar
|
||||
Baz
|
||||
EOT
|
||||
indented_more = <<EOT
|
||||
Foo
|
||||
Bar
|
||||
Baz
|
||||
EOT
|
||||
interp = <<EOT
|
||||
Foo
|
||||
${bar}
|
||||
Baz
|
||||
EOT
|
||||
|
||||
marker_at_suffix = <<EOT
|
||||
NOT EOT
|
||||
EOT
|
||||
}
|
||||
flush = {
|
||||
basic = <<-EOT
|
||||
Foo
|
||||
Bar
|
||||
Baz
|
||||
EOT
|
||||
indented = <<-EOT
|
||||
Foo
|
||||
Bar
|
||||
Baz
|
||||
EOT
|
||||
indented_more = <<-EOT
|
||||
Foo
|
||||
Bar
|
||||
Baz
|
||||
EOT
|
||||
indented_less = <<-EOT
|
||||
Foo
|
||||
Bar
|
||||
Baz
|
||||
EOT
|
||||
interp = <<-EOT
|
||||
Foo
|
||||
${bar}
|
||||
Baz
|
||||
EOT
|
||||
interp_indented_more = <<-EOT
|
||||
Foo
|
||||
${bar}
|
||||
Baz
|
||||
EOT
|
||||
interp_indented_less = <<-EOT
|
||||
Foo
|
||||
${space_bar}
|
||||
Baz
|
||||
EOT
|
||||
tabs = <<-EOT
|
||||
Foo
|
||||
Bar
|
||||
Baz
|
||||
EOT
|
||||
unicode_spaces = <<-EOT
|
||||
Foo (there's two "em spaces" before Foo there)
|
||||
Bar
|
||||
Baz
|
||||
EOT
|
||||
}
|
14
specsuite/tests/expressions/heredoc.hcldec
Normal file
14
specsuite/tests/expressions/heredoc.hcldec
Normal file
@ -0,0 +1,14 @@
|
||||
variables {
|
||||
bar = "Bar"
|
||||
space_bar = " Bar"
|
||||
words = ["Foo", "Bar", "Baz"]
|
||||
}
|
||||
|
||||
object {
|
||||
attr "normal" {
|
||||
type = map(string)
|
||||
}
|
||||
attr "flush" {
|
||||
type = map(string)
|
||||
}
|
||||
}
|
25
specsuite/tests/expressions/heredoc.t
Normal file
25
specsuite/tests/expressions/heredoc.t
Normal file
@ -0,0 +1,25 @@
|
||||
result = {
|
||||
normal = {
|
||||
basic = "Foo\nBar\nBaz\n"
|
||||
indented = " Foo\n Bar\n Baz\n"
|
||||
indented_more = " Foo\n Bar\n Baz\n"
|
||||
interp = " Foo\n Bar\n Baz\n"
|
||||
|
||||
marker_at_suffix = " NOT EOT\n"
|
||||
}
|
||||
flush = {
|
||||
basic = "Foo\nBar\nBaz\n"
|
||||
indented = "Foo\nBar\nBaz\n"
|
||||
indented_more = "Foo\n Bar\nBaz\n"
|
||||
indented_less = " Foo\nBar\n Baz\n"
|
||||
interp = "Foo\nBar\nBaz\n"
|
||||
interp_indented_more = "Foo\n Bar\nBaz\n"
|
||||
interp_indented_less = " Foo\n Bar\n Baz\n"
|
||||
tabs = "Foo\n Bar\n Baz\n"
|
||||
unicode_spaces = " Foo (there's two \"em spaces\" before Foo there)\nBar\nBaz\n"
|
||||
}
|
||||
}
|
||||
result_type = object({
|
||||
normal = map(string)
|
||||
flush = map(string)
|
||||
})
|
Loading…
Reference in New Issue
Block a user