zclsyntax: reorganize template parsing
Previously we tried to do the whole template parse in one pass. This was adequate for dealing with literals and interpolations because they create a flat structure, but to parse the template control sequences we need to be able to deal with nested template sequences. As a building block towards this, we first do a pass of extracting the template-level "tokens": literals, interpolations, control sequences. We then pass over that sequence of tokens and parse it, which is then simplified because the larger template atoms have already been produced.
This commit is contained in:
parent
d90da0c4ba
commit
2f1bfd284c
@ -4,8 +4,6 @@ import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"github.com/apparentlymart/go-textseg/textseg"
|
||||
"github.com/zclconf/go-cty/cty"
|
||||
@ -763,12 +761,12 @@ func (p *parser) parseExpressionTerm() (Expression, zcl.Diagnostics) {
|
||||
case TokenOQuote, TokenOHeredoc:
|
||||
open := p.Read() // eat opening marker
|
||||
closer := p.oppositeBracket(open.Type)
|
||||
parts, unwrap, diags := p.parseTemplateParts(closer)
|
||||
exprs, unwrap, _, diags := p.parseTemplateInner(closer)
|
||||
|
||||
closeRange := p.PrevRange()
|
||||
|
||||
return &TemplateExpr{
|
||||
Parts: parts,
|
||||
Parts: exprs,
|
||||
Unwrap: unwrap,
|
||||
|
||||
SrcRange: zcl.RangeBetween(open.Range, closeRange),
|
||||
@ -1342,142 +1340,6 @@ func (p *parser) finishParsingForExpr(open Token) (Expression, zcl.Diagnostics)
|
||||
}, diags
|
||||
}
|
||||
|
||||
func (p *parser) ParseTemplate() (Expression, zcl.Diagnostics) {
|
||||
startRange := p.NextRange()
|
||||
parts, unwrap, diags := p.parseTemplateParts(TokenEOF)
|
||||
endRange := p.PrevRange()
|
||||
|
||||
return &TemplateExpr{
|
||||
Parts: parts,
|
||||
Unwrap: unwrap,
|
||||
|
||||
SrcRange: zcl.RangeBetween(startRange, endRange),
|
||||
}, diags
|
||||
}
|
||||
|
||||
// parseTemplateParts parses the expressions that make up the content of a
|
||||
// template, up to the given closing delimiter. It also returns a flag that
|
||||
// is true if the first part should be returned as-is, or false if the
|
||||
// full set of parts should be wrapped in a TemplateExpr to return.
|
||||
//
|
||||
// The wrapping is done separately by the caller so that any template
|
||||
// delimiters can be included in the template's source range.
|
||||
func (p *parser) parseTemplateParts(end TokenType) ([]Expression, bool, zcl.Diagnostics) {
|
||||
var parts []Expression
|
||||
var diags zcl.Diagnostics
|
||||
|
||||
startRange := p.NextRange()
|
||||
ltrimNext := false
|
||||
nextCanTrimPrev := false
|
||||
|
||||
Token:
|
||||
for {
|
||||
next := p.Read()
|
||||
if next.Type == end {
|
||||
// all done!
|
||||
break
|
||||
}
|
||||
|
||||
ltrim := ltrimNext
|
||||
ltrimNext = false
|
||||
canTrimPrev := nextCanTrimPrev
|
||||
nextCanTrimPrev = false
|
||||
|
||||
switch next.Type {
|
||||
case TokenStringLit, TokenQuotedLit:
|
||||
str, strDiags := p.decodeStringLit(next)
|
||||
diags = append(diags, strDiags...)
|
||||
|
||||
if ltrim {
|
||||
str = strings.TrimLeftFunc(str, unicode.IsSpace)
|
||||
}
|
||||
|
||||
parts = append(parts, &LiteralValueExpr{
|
||||
Val: cty.StringVal(str),
|
||||
SrcRange: next.Range,
|
||||
})
|
||||
nextCanTrimPrev = true
|
||||
|
||||
case TokenTemplateInterp:
|
||||
// if the opener is ${~ then we want to eat any trailing whitespace
|
||||
// in the preceding literal token, assuming it is indeed a literal
|
||||
// token.
|
||||
if canTrimPrev && len(next.Bytes) == 3 && next.Bytes[2] == '~' && len(parts) > 0 {
|
||||
prevExpr := parts[len(parts)-1]
|
||||
if lexpr, ok := prevExpr.(*LiteralValueExpr); ok {
|
||||
val := lexpr.Val
|
||||
if val.Type() == cty.String && val.IsKnown() && !val.IsNull() {
|
||||
str := val.AsString()
|
||||
str = strings.TrimRightFunc(str, unicode.IsSpace)
|
||||
lexpr.Val = cty.StringVal(str)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
p.PushIncludeNewlines(false)
|
||||
expr, exprDiags := p.ParseExpression()
|
||||
diags = append(diags, exprDiags...)
|
||||
close := p.Peek()
|
||||
if close.Type != TokenTemplateSeqEnd {
|
||||
if !p.recovery {
|
||||
diags = append(diags, &zcl.Diagnostic{
|
||||
Severity: zcl.DiagError,
|
||||
Summary: "Extra characters after interpolation expression",
|
||||
Detail: "Expected a closing brace to end the interpolation expression, but found extra characters.",
|
||||
Subject: &close.Range,
|
||||
Context: zcl.RangeBetween(startRange, close.Range).Ptr(),
|
||||
})
|
||||
}
|
||||
p.recover(TokenTemplateSeqEnd)
|
||||
} else {
|
||||
p.Read() // eat closing brace
|
||||
|
||||
// If the closer is ~} then we want to eat any leading
|
||||
// whitespace on the next token, if it turns out to be a
|
||||
// literal token.
|
||||
if len(close.Bytes) == 2 && close.Bytes[0] == '~' {
|
||||
ltrimNext = true
|
||||
}
|
||||
}
|
||||
p.PopIncludeNewlines()
|
||||
parts = append(parts, expr)
|
||||
case TokenTemplateControl:
|
||||
panic("template control sequences not yet supported")
|
||||
|
||||
default:
|
||||
if !p.recovery {
|
||||
diags = append(diags, &zcl.Diagnostic{
|
||||
Severity: zcl.DiagError,
|
||||
Summary: "Unterminated template string",
|
||||
Detail: "No closing marker was found for the string.",
|
||||
Subject: &next.Range,
|
||||
Context: zcl.RangeBetween(startRange, next.Range).Ptr(),
|
||||
})
|
||||
}
|
||||
p.recover(end)
|
||||
break Token
|
||||
}
|
||||
}
|
||||
|
||||
if len(parts) == 0 {
|
||||
// If a sequence has no content, we'll treat it as if it had an
|
||||
// empty string in it because that's what the user probably means
|
||||
// if they write "" in configuration.
|
||||
return []Expression{
|
||||
&LiteralValueExpr{
|
||||
Val: cty.StringVal(""),
|
||||
SrcRange: zcl.Range{
|
||||
Filename: startRange.Filename,
|
||||
Start: startRange.Start,
|
||||
End: startRange.Start,
|
||||
},
|
||||
},
|
||||
}, true, diags
|
||||
}
|
||||
|
||||
return parts, len(parts) == 1, diags
|
||||
}
|
||||
|
||||
// parseQuotedStringLiteral is a helper for parsing quoted strings that
|
||||
// aren't allowed to contain any interpolations, such as block labels.
|
||||
func (p *parser) parseQuotedStringLiteral() (string, zcl.Range, zcl.Diagnostics) {
|
||||
@ -1872,3 +1734,10 @@ func (p *parser) oppositeBracket(ty TokenType) TokenType {
|
||||
return TokenNil
|
||||
}
|
||||
}
|
||||
|
||||
func errPlaceholderExpr(rng zcl.Range) Expression {
|
||||
return &LiteralValueExpr{
|
||||
Val: cty.DynamicVal,
|
||||
SrcRange: rng,
|
||||
}
|
||||
}
|
||||
|
342
zcl/zclsyntax/parser_template.go
Normal file
342
zcl/zclsyntax/parser_template.go
Normal file
@ -0,0 +1,342 @@
|
||||
package zclsyntax
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"github.com/zclconf/go-cty/cty"
|
||||
"github.com/zclconf/go-zcl/zcl"
|
||||
)
|
||||
|
||||
func (p *parser) ParseTemplate() (Expression, zcl.Diagnostics) {
|
||||
return p.parseTemplate(TokenEOF)
|
||||
}
|
||||
|
||||
func (p *parser) parseTemplate(end TokenType) (Expression, zcl.Diagnostics) {
|
||||
exprs, unwrap, rng, diags := p.parseTemplateInner(end)
|
||||
|
||||
return &TemplateExpr{
|
||||
Parts: exprs,
|
||||
Unwrap: unwrap,
|
||||
|
||||
SrcRange: rng,
|
||||
}, diags
|
||||
}
|
||||
|
||||
func (p *parser) parseTemplateInner(end TokenType) ([]Expression, bool, zcl.Range, zcl.Diagnostics) {
|
||||
parts, diags := p.parseTemplateParts(end)
|
||||
tp := templateParser{
|
||||
Tokens: parts.Tokens,
|
||||
SrcRange: parts.SrcRange,
|
||||
}
|
||||
exprs, exprsDiags := tp.parseRoot()
|
||||
diags = append(diags, exprsDiags...)
|
||||
|
||||
unwrap := false
|
||||
if len(parts.Tokens) == 2 { // one real token and one synthetic "end" token
|
||||
if _, isInterp := parts.Tokens[0].(*templateInterpToken); isInterp {
|
||||
unwrap = true
|
||||
}
|
||||
}
|
||||
|
||||
return exprs, unwrap, parts.SrcRange, diags
|
||||
}
|
||||
|
||||
type templateParser struct {
|
||||
Tokens []templateToken
|
||||
SrcRange zcl.Range
|
||||
|
||||
pos int
|
||||
}
|
||||
|
||||
func (p *templateParser) parseRoot() ([]Expression, zcl.Diagnostics) {
|
||||
var exprs []Expression
|
||||
var diags zcl.Diagnostics
|
||||
|
||||
for {
|
||||
next := p.Peek()
|
||||
if _, isEnd := next.(*templateEndToken); isEnd {
|
||||
break
|
||||
}
|
||||
|
||||
expr, exprDiags := p.parseExpr()
|
||||
diags = append(diags, exprDiags...)
|
||||
exprs = append(exprs, expr)
|
||||
}
|
||||
|
||||
return exprs, diags
|
||||
}
|
||||
|
||||
func (p *templateParser) parseExpr() (Expression, zcl.Diagnostics) {
|
||||
next := p.Read()
|
||||
switch tok := next.(type) {
|
||||
|
||||
case *templateLiteralToken:
|
||||
return &LiteralValueExpr{
|
||||
Val: cty.StringVal(tok.Val),
|
||||
SrcRange: tok.SrcRange,
|
||||
}, nil
|
||||
|
||||
case *templateInterpToken:
|
||||
return tok.Expr, nil
|
||||
|
||||
case *templateIfToken:
|
||||
// TODO: implement
|
||||
panic("template if token not yet implemented")
|
||||
|
||||
case *templateForToken:
|
||||
// TODO: implement
|
||||
panic("template for token not yet implemented")
|
||||
|
||||
case *templateEndToken:
|
||||
return errPlaceholderExpr(tok.SrcRange), zcl.Diagnostics{
|
||||
{
|
||||
// This is a particularly unhelpful diagnostic, so callers
|
||||
// should attempt to pre-empt it and produce a more helpful
|
||||
// diagnostic that is context-aware.
|
||||
Severity: zcl.DiagError,
|
||||
Summary: "Unexpected end of template",
|
||||
Detail: "The control directives within this template are unbalanced.",
|
||||
Subject: &tok.SrcRange,
|
||||
},
|
||||
}
|
||||
|
||||
case *templateEndCtrlToken:
|
||||
return errPlaceholderExpr(tok.SrcRange), zcl.Diagnostics{
|
||||
{
|
||||
Severity: zcl.DiagError,
|
||||
Summary: fmt.Sprintf("Unexpected %s directive", tok.Name()),
|
||||
Detail: "The control directives within this template are unbalanced.",
|
||||
Subject: &tok.SrcRange,
|
||||
},
|
||||
}
|
||||
|
||||
default:
|
||||
// should never happen, because above should be exhaustive
|
||||
panic(fmt.Sprintf("unhandled template token type %T", next))
|
||||
}
|
||||
}
|
||||
|
||||
func (p *templateParser) Peek() templateToken {
|
||||
return p.Tokens[p.pos]
|
||||
}
|
||||
|
||||
func (p *templateParser) Read() templateToken {
|
||||
ret := p.Peek()
|
||||
if _, end := ret.(*templateEndToken); !end {
|
||||
p.pos++
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// parseTemplateParts produces a flat sequence of "template tokens", which are
|
||||
// either literal values (with any "trimming" already applied), interpolation
|
||||
// sequences, or control flow markers.
|
||||
//
|
||||
// A further pass is required on the result to turn it into an AST.
|
||||
func (p *parser) parseTemplateParts(end TokenType) (*templateParts, zcl.Diagnostics) {
|
||||
var parts []templateToken
|
||||
var diags zcl.Diagnostics
|
||||
|
||||
startRange := p.NextRange()
|
||||
ltrimNext := false
|
||||
nextCanTrimPrev := false
|
||||
var endRange zcl.Range
|
||||
|
||||
Token:
|
||||
for {
|
||||
next := p.Read()
|
||||
if next.Type == end {
|
||||
// all done!
|
||||
endRange = next.Range
|
||||
break
|
||||
}
|
||||
|
||||
ltrim := ltrimNext
|
||||
ltrimNext = false
|
||||
canTrimPrev := nextCanTrimPrev
|
||||
nextCanTrimPrev = false
|
||||
|
||||
switch next.Type {
|
||||
case TokenStringLit, TokenQuotedLit:
|
||||
str, strDiags := p.decodeStringLit(next)
|
||||
diags = append(diags, strDiags...)
|
||||
|
||||
if ltrim {
|
||||
str = strings.TrimLeftFunc(str, unicode.IsSpace)
|
||||
}
|
||||
|
||||
parts = append(parts, &templateLiteralToken{
|
||||
Val: str,
|
||||
SrcRange: next.Range,
|
||||
})
|
||||
nextCanTrimPrev = true
|
||||
|
||||
case TokenTemplateInterp:
|
||||
// if the opener is ${~ then we want to eat any trailing whitespace
|
||||
// in the preceding literal token, assuming it is indeed a literal
|
||||
// token.
|
||||
if canTrimPrev && len(next.Bytes) == 3 && next.Bytes[2] == '~' && len(parts) > 0 {
|
||||
prevExpr := parts[len(parts)-1]
|
||||
if lexpr, ok := prevExpr.(*templateLiteralToken); ok {
|
||||
lexpr.Val = strings.TrimRightFunc(lexpr.Val, unicode.IsSpace)
|
||||
}
|
||||
}
|
||||
|
||||
p.PushIncludeNewlines(false)
|
||||
expr, exprDiags := p.ParseExpression()
|
||||
diags = append(diags, exprDiags...)
|
||||
close := p.Peek()
|
||||
if close.Type != TokenTemplateSeqEnd {
|
||||
if !p.recovery {
|
||||
diags = append(diags, &zcl.Diagnostic{
|
||||
Severity: zcl.DiagError,
|
||||
Summary: "Extra characters after interpolation expression",
|
||||
Detail: "Expected a closing brace to end the interpolation expression, but found extra characters.",
|
||||
Subject: &close.Range,
|
||||
Context: zcl.RangeBetween(startRange, close.Range).Ptr(),
|
||||
})
|
||||
}
|
||||
p.recover(TokenTemplateSeqEnd)
|
||||
} else {
|
||||
p.Read() // eat closing brace
|
||||
|
||||
// If the closer is ~} then we want to eat any leading
|
||||
// whitespace on the next token, if it turns out to be a
|
||||
// literal token.
|
||||
if len(close.Bytes) == 2 && close.Bytes[0] == '~' {
|
||||
ltrimNext = true
|
||||
}
|
||||
}
|
||||
p.PopIncludeNewlines()
|
||||
parts = append(parts, &templateInterpToken{
|
||||
Expr: expr,
|
||||
SrcRange: zcl.RangeBetween(next.Range, close.Range),
|
||||
})
|
||||
case TokenTemplateControl:
|
||||
panic("template control sequences not yet supported")
|
||||
|
||||
default:
|
||||
if !p.recovery {
|
||||
diags = append(diags, &zcl.Diagnostic{
|
||||
Severity: zcl.DiagError,
|
||||
Summary: "Unterminated template string",
|
||||
Detail: "No closing marker was found for the string.",
|
||||
Subject: &next.Range,
|
||||
Context: zcl.RangeBetween(startRange, next.Range).Ptr(),
|
||||
})
|
||||
}
|
||||
final := p.recover(end)
|
||||
endRange = final.Range
|
||||
break Token
|
||||
}
|
||||
}
|
||||
|
||||
if len(parts) == 0 {
|
||||
// If a sequence has no content, we'll treat it as if it had an
|
||||
// empty string in it because that's what the user probably means
|
||||
// if they write "" in configuration.
|
||||
parts = append(parts, &templateLiteralToken{
|
||||
Val: "",
|
||||
SrcRange: zcl.Range{
|
||||
// Range is the zero-character span immediately after the
|
||||
// opening quote.
|
||||
Filename: startRange.Filename,
|
||||
Start: startRange.End,
|
||||
End: startRange.End,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// Always end with an end token, so the parser can produce diagnostics
|
||||
// about unclosed items with proper position information.
|
||||
parts = append(parts, &templateEndToken{
|
||||
SrcRange: endRange,
|
||||
})
|
||||
|
||||
ret := &templateParts{
|
||||
Tokens: parts,
|
||||
SrcRange: zcl.RangeBetween(startRange, endRange),
|
||||
}
|
||||
|
||||
return ret, diags
|
||||
}
|
||||
|
||||
type templateParts struct {
|
||||
Tokens []templateToken
|
||||
SrcRange zcl.Range
|
||||
}
|
||||
|
||||
// templateToken is a higher-level token that represents a single atom within
|
||||
// the template language. Our template parsing first raises the raw token
|
||||
// stream to a sequence of templateToken, and then transforms the result into
|
||||
// an expression tree.
|
||||
type templateToken interface {
|
||||
templateToken() templateToken
|
||||
}
|
||||
|
||||
type templateLiteralToken struct {
|
||||
Val string
|
||||
SrcRange zcl.Range
|
||||
isTemplateToken
|
||||
}
|
||||
|
||||
type templateInterpToken struct {
|
||||
Expr Expression
|
||||
SrcRange zcl.Range
|
||||
isTemplateToken
|
||||
}
|
||||
|
||||
type templateIfToken struct {
|
||||
CondExpr Expression
|
||||
SrcRange zcl.Range
|
||||
isTemplateToken
|
||||
}
|
||||
|
||||
type templateForToken struct {
|
||||
KeyVar string // empty if ignoring key
|
||||
ValVar string
|
||||
CollExpr Expression
|
||||
SrcRange zcl.Range
|
||||
isTemplateToken
|
||||
}
|
||||
|
||||
type templateEndCtrlType int
|
||||
|
||||
const (
|
||||
templateEndIf templateEndCtrlType = iota
|
||||
templateElse
|
||||
templateEndFor
|
||||
)
|
||||
|
||||
type templateEndCtrlToken struct {
|
||||
Type templateEndCtrlType
|
||||
SrcRange zcl.Range
|
||||
isTemplateToken
|
||||
}
|
||||
|
||||
func (t *templateEndCtrlToken) Name() string {
|
||||
switch t.Type {
|
||||
case templateEndIf:
|
||||
return "endif"
|
||||
case templateElse:
|
||||
return "else"
|
||||
case templateEndFor:
|
||||
return "endfor"
|
||||
default:
|
||||
// should never happen
|
||||
panic("invalid templateEndCtrlType")
|
||||
}
|
||||
}
|
||||
|
||||
type templateEndToken struct {
|
||||
SrcRange zcl.Range
|
||||
isTemplateToken
|
||||
}
|
||||
|
||||
type isTemplateToken [0]int
|
||||
|
||||
func (t isTemplateToken) templateToken() templateToken {
|
||||
return t
|
||||
}
|
Loading…
Reference in New Issue
Block a user