From 2f1bfd284ce62bf1c99976eeac62b540d28af285 Mon Sep 17 00:00:00 2001 From: Martin Atkins Date: Sat, 17 Jun 2017 09:05:15 -0700 Subject: [PATCH] zclsyntax: reorganize template parsing Previously we tried to do the whole template parse in one pass. This was adequate for dealing with literals and interpolations because they create a flat structure, but to parse the template control sequences we need to be able to deal with nested template sequences. As a building block towards this, we first do a pass of extracting the template-level "tokens": literals, interpolations, control sequences. We then pass over that sequence of tokens and parse it, which is then simplified because the larger template atoms have already been produced. --- zcl/zclsyntax/parser.go | 149 +------------- zcl/zclsyntax/parser_template.go | 342 +++++++++++++++++++++++++++++++ 2 files changed, 351 insertions(+), 140 deletions(-) create mode 100644 zcl/zclsyntax/parser_template.go diff --git a/zcl/zclsyntax/parser.go b/zcl/zclsyntax/parser.go index 0272617..8e25da7 100644 --- a/zcl/zclsyntax/parser.go +++ b/zcl/zclsyntax/parser.go @@ -4,8 +4,6 @@ import ( "bufio" "bytes" "fmt" - "strings" - "unicode" "github.com/apparentlymart/go-textseg/textseg" "github.com/zclconf/go-cty/cty" @@ -763,12 +761,12 @@ func (p *parser) parseExpressionTerm() (Expression, zcl.Diagnostics) { case TokenOQuote, TokenOHeredoc: open := p.Read() // eat opening marker closer := p.oppositeBracket(open.Type) - parts, unwrap, diags := p.parseTemplateParts(closer) + exprs, unwrap, _, diags := p.parseTemplateInner(closer) closeRange := p.PrevRange() return &TemplateExpr{ - Parts: parts, + Parts: exprs, Unwrap: unwrap, SrcRange: zcl.RangeBetween(open.Range, closeRange), @@ -1342,142 +1340,6 @@ func (p *parser) finishParsingForExpr(open Token) (Expression, zcl.Diagnostics) }, diags } -func (p *parser) ParseTemplate() (Expression, zcl.Diagnostics) { - startRange := p.NextRange() - parts, unwrap, diags := p.parseTemplateParts(TokenEOF) - endRange := p.PrevRange() - - return &TemplateExpr{ - Parts: parts, - Unwrap: unwrap, - - SrcRange: zcl.RangeBetween(startRange, endRange), - }, diags -} - -// parseTemplateParts parses the expressions that make up the content of a -// template, up to the given closing delimiter. It also returns a flag that -// is true if the first part should be returned as-is, or false if the -// full set of parts should be wrapped in a TemplateExpr to return. -// -// The wrapping is done separately by the caller so that any template -// delimiters can be included in the template's source range. -func (p *parser) parseTemplateParts(end TokenType) ([]Expression, bool, zcl.Diagnostics) { - var parts []Expression - var diags zcl.Diagnostics - - startRange := p.NextRange() - ltrimNext := false - nextCanTrimPrev := false - -Token: - for { - next := p.Read() - if next.Type == end { - // all done! - break - } - - ltrim := ltrimNext - ltrimNext = false - canTrimPrev := nextCanTrimPrev - nextCanTrimPrev = false - - switch next.Type { - case TokenStringLit, TokenQuotedLit: - str, strDiags := p.decodeStringLit(next) - diags = append(diags, strDiags...) - - if ltrim { - str = strings.TrimLeftFunc(str, unicode.IsSpace) - } - - parts = append(parts, &LiteralValueExpr{ - Val: cty.StringVal(str), - SrcRange: next.Range, - }) - nextCanTrimPrev = true - - case TokenTemplateInterp: - // if the opener is ${~ then we want to eat any trailing whitespace - // in the preceding literal token, assuming it is indeed a literal - // token. - if canTrimPrev && len(next.Bytes) == 3 && next.Bytes[2] == '~' && len(parts) > 0 { - prevExpr := parts[len(parts)-1] - if lexpr, ok := prevExpr.(*LiteralValueExpr); ok { - val := lexpr.Val - if val.Type() == cty.String && val.IsKnown() && !val.IsNull() { - str := val.AsString() - str = strings.TrimRightFunc(str, unicode.IsSpace) - lexpr.Val = cty.StringVal(str) - } - } - } - - p.PushIncludeNewlines(false) - expr, exprDiags := p.ParseExpression() - diags = append(diags, exprDiags...) - close := p.Peek() - if close.Type != TokenTemplateSeqEnd { - if !p.recovery { - diags = append(diags, &zcl.Diagnostic{ - Severity: zcl.DiagError, - Summary: "Extra characters after interpolation expression", - Detail: "Expected a closing brace to end the interpolation expression, but found extra characters.", - Subject: &close.Range, - Context: zcl.RangeBetween(startRange, close.Range).Ptr(), - }) - } - p.recover(TokenTemplateSeqEnd) - } else { - p.Read() // eat closing brace - - // If the closer is ~} then we want to eat any leading - // whitespace on the next token, if it turns out to be a - // literal token. - if len(close.Bytes) == 2 && close.Bytes[0] == '~' { - ltrimNext = true - } - } - p.PopIncludeNewlines() - parts = append(parts, expr) - case TokenTemplateControl: - panic("template control sequences not yet supported") - - default: - if !p.recovery { - diags = append(diags, &zcl.Diagnostic{ - Severity: zcl.DiagError, - Summary: "Unterminated template string", - Detail: "No closing marker was found for the string.", - Subject: &next.Range, - Context: zcl.RangeBetween(startRange, next.Range).Ptr(), - }) - } - p.recover(end) - break Token - } - } - - if len(parts) == 0 { - // If a sequence has no content, we'll treat it as if it had an - // empty string in it because that's what the user probably means - // if they write "" in configuration. - return []Expression{ - &LiteralValueExpr{ - Val: cty.StringVal(""), - SrcRange: zcl.Range{ - Filename: startRange.Filename, - Start: startRange.Start, - End: startRange.Start, - }, - }, - }, true, diags - } - - return parts, len(parts) == 1, diags -} - // parseQuotedStringLiteral is a helper for parsing quoted strings that // aren't allowed to contain any interpolations, such as block labels. func (p *parser) parseQuotedStringLiteral() (string, zcl.Range, zcl.Diagnostics) { @@ -1872,3 +1734,10 @@ func (p *parser) oppositeBracket(ty TokenType) TokenType { return TokenNil } } + +func errPlaceholderExpr(rng zcl.Range) Expression { + return &LiteralValueExpr{ + Val: cty.DynamicVal, + SrcRange: rng, + } +} diff --git a/zcl/zclsyntax/parser_template.go b/zcl/zclsyntax/parser_template.go new file mode 100644 index 0000000..8f84176 --- /dev/null +++ b/zcl/zclsyntax/parser_template.go @@ -0,0 +1,342 @@ +package zclsyntax + +import ( + "fmt" + "strings" + "unicode" + + "github.com/zclconf/go-cty/cty" + "github.com/zclconf/go-zcl/zcl" +) + +func (p *parser) ParseTemplate() (Expression, zcl.Diagnostics) { + return p.parseTemplate(TokenEOF) +} + +func (p *parser) parseTemplate(end TokenType) (Expression, zcl.Diagnostics) { + exprs, unwrap, rng, diags := p.parseTemplateInner(end) + + return &TemplateExpr{ + Parts: exprs, + Unwrap: unwrap, + + SrcRange: rng, + }, diags +} + +func (p *parser) parseTemplateInner(end TokenType) ([]Expression, bool, zcl.Range, zcl.Diagnostics) { + parts, diags := p.parseTemplateParts(end) + tp := templateParser{ + Tokens: parts.Tokens, + SrcRange: parts.SrcRange, + } + exprs, exprsDiags := tp.parseRoot() + diags = append(diags, exprsDiags...) + + unwrap := false + if len(parts.Tokens) == 2 { // one real token and one synthetic "end" token + if _, isInterp := parts.Tokens[0].(*templateInterpToken); isInterp { + unwrap = true + } + } + + return exprs, unwrap, parts.SrcRange, diags +} + +type templateParser struct { + Tokens []templateToken + SrcRange zcl.Range + + pos int +} + +func (p *templateParser) parseRoot() ([]Expression, zcl.Diagnostics) { + var exprs []Expression + var diags zcl.Diagnostics + + for { + next := p.Peek() + if _, isEnd := next.(*templateEndToken); isEnd { + break + } + + expr, exprDiags := p.parseExpr() + diags = append(diags, exprDiags...) + exprs = append(exprs, expr) + } + + return exprs, diags +} + +func (p *templateParser) parseExpr() (Expression, zcl.Diagnostics) { + next := p.Read() + switch tok := next.(type) { + + case *templateLiteralToken: + return &LiteralValueExpr{ + Val: cty.StringVal(tok.Val), + SrcRange: tok.SrcRange, + }, nil + + case *templateInterpToken: + return tok.Expr, nil + + case *templateIfToken: + // TODO: implement + panic("template if token not yet implemented") + + case *templateForToken: + // TODO: implement + panic("template for token not yet implemented") + + case *templateEndToken: + return errPlaceholderExpr(tok.SrcRange), zcl.Diagnostics{ + { + // This is a particularly unhelpful diagnostic, so callers + // should attempt to pre-empt it and produce a more helpful + // diagnostic that is context-aware. + Severity: zcl.DiagError, + Summary: "Unexpected end of template", + Detail: "The control directives within this template are unbalanced.", + Subject: &tok.SrcRange, + }, + } + + case *templateEndCtrlToken: + return errPlaceholderExpr(tok.SrcRange), zcl.Diagnostics{ + { + Severity: zcl.DiagError, + Summary: fmt.Sprintf("Unexpected %s directive", tok.Name()), + Detail: "The control directives within this template are unbalanced.", + Subject: &tok.SrcRange, + }, + } + + default: + // should never happen, because above should be exhaustive + panic(fmt.Sprintf("unhandled template token type %T", next)) + } +} + +func (p *templateParser) Peek() templateToken { + return p.Tokens[p.pos] +} + +func (p *templateParser) Read() templateToken { + ret := p.Peek() + if _, end := ret.(*templateEndToken); !end { + p.pos++ + } + return ret +} + +// parseTemplateParts produces a flat sequence of "template tokens", which are +// either literal values (with any "trimming" already applied), interpolation +// sequences, or control flow markers. +// +// A further pass is required on the result to turn it into an AST. +func (p *parser) parseTemplateParts(end TokenType) (*templateParts, zcl.Diagnostics) { + var parts []templateToken + var diags zcl.Diagnostics + + startRange := p.NextRange() + ltrimNext := false + nextCanTrimPrev := false + var endRange zcl.Range + +Token: + for { + next := p.Read() + if next.Type == end { + // all done! + endRange = next.Range + break + } + + ltrim := ltrimNext + ltrimNext = false + canTrimPrev := nextCanTrimPrev + nextCanTrimPrev = false + + switch next.Type { + case TokenStringLit, TokenQuotedLit: + str, strDiags := p.decodeStringLit(next) + diags = append(diags, strDiags...) + + if ltrim { + str = strings.TrimLeftFunc(str, unicode.IsSpace) + } + + parts = append(parts, &templateLiteralToken{ + Val: str, + SrcRange: next.Range, + }) + nextCanTrimPrev = true + + case TokenTemplateInterp: + // if the opener is ${~ then we want to eat any trailing whitespace + // in the preceding literal token, assuming it is indeed a literal + // token. + if canTrimPrev && len(next.Bytes) == 3 && next.Bytes[2] == '~' && len(parts) > 0 { + prevExpr := parts[len(parts)-1] + if lexpr, ok := prevExpr.(*templateLiteralToken); ok { + lexpr.Val = strings.TrimRightFunc(lexpr.Val, unicode.IsSpace) + } + } + + p.PushIncludeNewlines(false) + expr, exprDiags := p.ParseExpression() + diags = append(diags, exprDiags...) + close := p.Peek() + if close.Type != TokenTemplateSeqEnd { + if !p.recovery { + diags = append(diags, &zcl.Diagnostic{ + Severity: zcl.DiagError, + Summary: "Extra characters after interpolation expression", + Detail: "Expected a closing brace to end the interpolation expression, but found extra characters.", + Subject: &close.Range, + Context: zcl.RangeBetween(startRange, close.Range).Ptr(), + }) + } + p.recover(TokenTemplateSeqEnd) + } else { + p.Read() // eat closing brace + + // If the closer is ~} then we want to eat any leading + // whitespace on the next token, if it turns out to be a + // literal token. + if len(close.Bytes) == 2 && close.Bytes[0] == '~' { + ltrimNext = true + } + } + p.PopIncludeNewlines() + parts = append(parts, &templateInterpToken{ + Expr: expr, + SrcRange: zcl.RangeBetween(next.Range, close.Range), + }) + case TokenTemplateControl: + panic("template control sequences not yet supported") + + default: + if !p.recovery { + diags = append(diags, &zcl.Diagnostic{ + Severity: zcl.DiagError, + Summary: "Unterminated template string", + Detail: "No closing marker was found for the string.", + Subject: &next.Range, + Context: zcl.RangeBetween(startRange, next.Range).Ptr(), + }) + } + final := p.recover(end) + endRange = final.Range + break Token + } + } + + if len(parts) == 0 { + // If a sequence has no content, we'll treat it as if it had an + // empty string in it because that's what the user probably means + // if they write "" in configuration. + parts = append(parts, &templateLiteralToken{ + Val: "", + SrcRange: zcl.Range{ + // Range is the zero-character span immediately after the + // opening quote. + Filename: startRange.Filename, + Start: startRange.End, + End: startRange.End, + }, + }) + } + + // Always end with an end token, so the parser can produce diagnostics + // about unclosed items with proper position information. + parts = append(parts, &templateEndToken{ + SrcRange: endRange, + }) + + ret := &templateParts{ + Tokens: parts, + SrcRange: zcl.RangeBetween(startRange, endRange), + } + + return ret, diags +} + +type templateParts struct { + Tokens []templateToken + SrcRange zcl.Range +} + +// templateToken is a higher-level token that represents a single atom within +// the template language. Our template parsing first raises the raw token +// stream to a sequence of templateToken, and then transforms the result into +// an expression tree. +type templateToken interface { + templateToken() templateToken +} + +type templateLiteralToken struct { + Val string + SrcRange zcl.Range + isTemplateToken +} + +type templateInterpToken struct { + Expr Expression + SrcRange zcl.Range + isTemplateToken +} + +type templateIfToken struct { + CondExpr Expression + SrcRange zcl.Range + isTemplateToken +} + +type templateForToken struct { + KeyVar string // empty if ignoring key + ValVar string + CollExpr Expression + SrcRange zcl.Range + isTemplateToken +} + +type templateEndCtrlType int + +const ( + templateEndIf templateEndCtrlType = iota + templateElse + templateEndFor +) + +type templateEndCtrlToken struct { + Type templateEndCtrlType + SrcRange zcl.Range + isTemplateToken +} + +func (t *templateEndCtrlToken) Name() string { + switch t.Type { + case templateEndIf: + return "endif" + case templateElse: + return "else" + case templateEndFor: + return "endfor" + default: + // should never happen + panic("invalid templateEndCtrlType") + } +} + +type templateEndToken struct { + SrcRange zcl.Range + isTemplateToken +} + +type isTemplateToken [0]int + +func (t isTemplateToken) templateToken() templateToken { + return t +}