From 2f1bfd284ce62bf1c99976eeac62b540d28af285 Mon Sep 17 00:00:00 2001
From: Martin Atkins <mart@degeneration.co.uk>
Date: Sat, 17 Jun 2017 09:05:15 -0700
Subject: [PATCH] zclsyntax: reorganize template parsing

Previously we tried to do the whole template parse in one pass. This was
adequate for dealing with literals and interpolations because they
create a flat structure, but to parse the template control sequences we
need to be able to deal with nested template sequences.

As a building block towards this, we first do a pass of extracting the
template-level "tokens": literals, interpolations, control sequences.
We then pass over that sequence of tokens and parse it, which is then
simplified because the larger template atoms have already been produced.
---
 zcl/zclsyntax/parser.go          | 149 +-------------
 zcl/zclsyntax/parser_template.go | 342 +++++++++++++++++++++++++++++++
 2 files changed, 351 insertions(+), 140 deletions(-)
 create mode 100644 zcl/zclsyntax/parser_template.go

diff --git a/zcl/zclsyntax/parser.go b/zcl/zclsyntax/parser.go
index 0272617..8e25da7 100644
--- a/zcl/zclsyntax/parser.go
+++ b/zcl/zclsyntax/parser.go
@@ -4,8 +4,6 @@ import (
 	"bufio"
 	"bytes"
 	"fmt"
-	"strings"
-	"unicode"
 
 	"github.com/apparentlymart/go-textseg/textseg"
 	"github.com/zclconf/go-cty/cty"
@@ -763,12 +761,12 @@ func (p *parser) parseExpressionTerm() (Expression, zcl.Diagnostics) {
 	case TokenOQuote, TokenOHeredoc:
 		open := p.Read() // eat opening marker
 		closer := p.oppositeBracket(open.Type)
-		parts, unwrap, diags := p.parseTemplateParts(closer)
+		exprs, unwrap, _, diags := p.parseTemplateInner(closer)
 
 		closeRange := p.PrevRange()
 
 		return &TemplateExpr{
-			Parts:  parts,
+			Parts:  exprs,
 			Unwrap: unwrap,
 
 			SrcRange: zcl.RangeBetween(open.Range, closeRange),
@@ -1342,142 +1340,6 @@ func (p *parser) finishParsingForExpr(open Token) (Expression, zcl.Diagnostics)
 	}, diags
 }
 
-func (p *parser) ParseTemplate() (Expression, zcl.Diagnostics) {
-	startRange := p.NextRange()
-	parts, unwrap, diags := p.parseTemplateParts(TokenEOF)
-	endRange := p.PrevRange()
-
-	return &TemplateExpr{
-		Parts:  parts,
-		Unwrap: unwrap,
-
-		SrcRange: zcl.RangeBetween(startRange, endRange),
-	}, diags
-}
-
-// parseTemplateParts parses the expressions that make up the content of a
-// template, up to the given closing delimiter. It also returns a flag that
-// is true if the first part should be returned as-is, or false if the
-// full set of parts should be wrapped in a TemplateExpr to return.
-//
-// The wrapping is done separately by the caller so that any template
-// delimiters can be included in the template's source range.
-func (p *parser) parseTemplateParts(end TokenType) ([]Expression, bool, zcl.Diagnostics) {
-	var parts []Expression
-	var diags zcl.Diagnostics
-
-	startRange := p.NextRange()
-	ltrimNext := false
-	nextCanTrimPrev := false
-
-Token:
-	for {
-		next := p.Read()
-		if next.Type == end {
-			// all done!
-			break
-		}
-
-		ltrim := ltrimNext
-		ltrimNext = false
-		canTrimPrev := nextCanTrimPrev
-		nextCanTrimPrev = false
-
-		switch next.Type {
-		case TokenStringLit, TokenQuotedLit:
-			str, strDiags := p.decodeStringLit(next)
-			diags = append(diags, strDiags...)
-
-			if ltrim {
-				str = strings.TrimLeftFunc(str, unicode.IsSpace)
-			}
-
-			parts = append(parts, &LiteralValueExpr{
-				Val:      cty.StringVal(str),
-				SrcRange: next.Range,
-			})
-			nextCanTrimPrev = true
-
-		case TokenTemplateInterp:
-			// if the opener is ${~ then we want to eat any trailing whitespace
-			// in the preceding literal token, assuming it is indeed a literal
-			// token.
-			if canTrimPrev && len(next.Bytes) == 3 && next.Bytes[2] == '~' && len(parts) > 0 {
-				prevExpr := parts[len(parts)-1]
-				if lexpr, ok := prevExpr.(*LiteralValueExpr); ok {
-					val := lexpr.Val
-					if val.Type() == cty.String && val.IsKnown() && !val.IsNull() {
-						str := val.AsString()
-						str = strings.TrimRightFunc(str, unicode.IsSpace)
-						lexpr.Val = cty.StringVal(str)
-					}
-				}
-			}
-
-			p.PushIncludeNewlines(false)
-			expr, exprDiags := p.ParseExpression()
-			diags = append(diags, exprDiags...)
-			close := p.Peek()
-			if close.Type != TokenTemplateSeqEnd {
-				if !p.recovery {
-					diags = append(diags, &zcl.Diagnostic{
-						Severity: zcl.DiagError,
-						Summary:  "Extra characters after interpolation expression",
-						Detail:   "Expected a closing brace to end the interpolation expression, but found extra characters.",
-						Subject:  &close.Range,
-						Context:  zcl.RangeBetween(startRange, close.Range).Ptr(),
-					})
-				}
-				p.recover(TokenTemplateSeqEnd)
-			} else {
-				p.Read() // eat closing brace
-
-				// If the closer is ~} then we want to eat any leading
-				// whitespace on the next token, if it turns out to be a
-				// literal token.
-				if len(close.Bytes) == 2 && close.Bytes[0] == '~' {
-					ltrimNext = true
-				}
-			}
-			p.PopIncludeNewlines()
-			parts = append(parts, expr)
-		case TokenTemplateControl:
-			panic("template control sequences not yet supported")
-
-		default:
-			if !p.recovery {
-				diags = append(diags, &zcl.Diagnostic{
-					Severity: zcl.DiagError,
-					Summary:  "Unterminated template string",
-					Detail:   "No closing marker was found for the string.",
-					Subject:  &next.Range,
-					Context:  zcl.RangeBetween(startRange, next.Range).Ptr(),
-				})
-			}
-			p.recover(end)
-			break Token
-		}
-	}
-
-	if len(parts) == 0 {
-		// If a sequence has no content, we'll treat it as if it had an
-		// empty string in it because that's what the user probably means
-		// if they write "" in configuration.
-		return []Expression{
-			&LiteralValueExpr{
-				Val: cty.StringVal(""),
-				SrcRange: zcl.Range{
-					Filename: startRange.Filename,
-					Start:    startRange.Start,
-					End:      startRange.Start,
-				},
-			},
-		}, true, diags
-	}
-
-	return parts, len(parts) == 1, diags
-}
-
 // parseQuotedStringLiteral is a helper for parsing quoted strings that
 // aren't allowed to contain any interpolations, such as block labels.
 func (p *parser) parseQuotedStringLiteral() (string, zcl.Range, zcl.Diagnostics) {
@@ -1872,3 +1734,10 @@ func (p *parser) oppositeBracket(ty TokenType) TokenType {
 		return TokenNil
 	}
 }
+
+func errPlaceholderExpr(rng zcl.Range) Expression {
+	return &LiteralValueExpr{
+		Val:      cty.DynamicVal,
+		SrcRange: rng,
+	}
+}
diff --git a/zcl/zclsyntax/parser_template.go b/zcl/zclsyntax/parser_template.go
new file mode 100644
index 0000000..8f84176
--- /dev/null
+++ b/zcl/zclsyntax/parser_template.go
@@ -0,0 +1,342 @@
+package zclsyntax
+
+import (
+	"fmt"
+	"strings"
+	"unicode"
+
+	"github.com/zclconf/go-cty/cty"
+	"github.com/zclconf/go-zcl/zcl"
+)
+
+func (p *parser) ParseTemplate() (Expression, zcl.Diagnostics) {
+	return p.parseTemplate(TokenEOF)
+}
+
+func (p *parser) parseTemplate(end TokenType) (Expression, zcl.Diagnostics) {
+	exprs, unwrap, rng, diags := p.parseTemplateInner(end)
+
+	return &TemplateExpr{
+		Parts:  exprs,
+		Unwrap: unwrap,
+
+		SrcRange: rng,
+	}, diags
+}
+
+func (p *parser) parseTemplateInner(end TokenType) ([]Expression, bool, zcl.Range, zcl.Diagnostics) {
+	parts, diags := p.parseTemplateParts(end)
+	tp := templateParser{
+		Tokens:   parts.Tokens,
+		SrcRange: parts.SrcRange,
+	}
+	exprs, exprsDiags := tp.parseRoot()
+	diags = append(diags, exprsDiags...)
+
+	unwrap := false
+	if len(parts.Tokens) == 2 { // one real token and one synthetic "end" token
+		if _, isInterp := parts.Tokens[0].(*templateInterpToken); isInterp {
+			unwrap = true
+		}
+	}
+
+	return exprs, unwrap, parts.SrcRange, diags
+}
+
+type templateParser struct {
+	Tokens   []templateToken
+	SrcRange zcl.Range
+
+	pos int
+}
+
+func (p *templateParser) parseRoot() ([]Expression, zcl.Diagnostics) {
+	var exprs []Expression
+	var diags zcl.Diagnostics
+
+	for {
+		next := p.Peek()
+		if _, isEnd := next.(*templateEndToken); isEnd {
+			break
+		}
+
+		expr, exprDiags := p.parseExpr()
+		diags = append(diags, exprDiags...)
+		exprs = append(exprs, expr)
+	}
+
+	return exprs, diags
+}
+
+func (p *templateParser) parseExpr() (Expression, zcl.Diagnostics) {
+	next := p.Read()
+	switch tok := next.(type) {
+
+	case *templateLiteralToken:
+		return &LiteralValueExpr{
+			Val:      cty.StringVal(tok.Val),
+			SrcRange: tok.SrcRange,
+		}, nil
+
+	case *templateInterpToken:
+		return tok.Expr, nil
+
+	case *templateIfToken:
+		// TODO: implement
+		panic("template if token not yet implemented")
+
+	case *templateForToken:
+		// TODO: implement
+		panic("template for token not yet implemented")
+
+	case *templateEndToken:
+		return errPlaceholderExpr(tok.SrcRange), zcl.Diagnostics{
+			{
+				// This is a particularly unhelpful diagnostic, so callers
+				// should attempt to pre-empt it and produce a more helpful
+				// diagnostic that is context-aware.
+				Severity: zcl.DiagError,
+				Summary:  "Unexpected end of template",
+				Detail:   "The control directives within this template are unbalanced.",
+				Subject:  &tok.SrcRange,
+			},
+		}
+
+	case *templateEndCtrlToken:
+		return errPlaceholderExpr(tok.SrcRange), zcl.Diagnostics{
+			{
+				Severity: zcl.DiagError,
+				Summary:  fmt.Sprintf("Unexpected %s directive", tok.Name()),
+				Detail:   "The control directives within this template are unbalanced.",
+				Subject:  &tok.SrcRange,
+			},
+		}
+
+	default:
+		// should never happen, because above should be exhaustive
+		panic(fmt.Sprintf("unhandled template token type %T", next))
+	}
+}
+
+func (p *templateParser) Peek() templateToken {
+	return p.Tokens[p.pos]
+}
+
+func (p *templateParser) Read() templateToken {
+	ret := p.Peek()
+	if _, end := ret.(*templateEndToken); !end {
+		p.pos++
+	}
+	return ret
+}
+
+// parseTemplateParts produces a flat sequence of "template tokens", which are
+// either literal values (with any "trimming" already applied), interpolation
+// sequences, or control flow markers.
+//
+// A further pass is required on the result to turn it into an AST.
+func (p *parser) parseTemplateParts(end TokenType) (*templateParts, zcl.Diagnostics) {
+	var parts []templateToken
+	var diags zcl.Diagnostics
+
+	startRange := p.NextRange()
+	ltrimNext := false
+	nextCanTrimPrev := false
+	var endRange zcl.Range
+
+Token:
+	for {
+		next := p.Read()
+		if next.Type == end {
+			// all done!
+			endRange = next.Range
+			break
+		}
+
+		ltrim := ltrimNext
+		ltrimNext = false
+		canTrimPrev := nextCanTrimPrev
+		nextCanTrimPrev = false
+
+		switch next.Type {
+		case TokenStringLit, TokenQuotedLit:
+			str, strDiags := p.decodeStringLit(next)
+			diags = append(diags, strDiags...)
+
+			if ltrim {
+				str = strings.TrimLeftFunc(str, unicode.IsSpace)
+			}
+
+			parts = append(parts, &templateLiteralToken{
+				Val:      str,
+				SrcRange: next.Range,
+			})
+			nextCanTrimPrev = true
+
+		case TokenTemplateInterp:
+			// if the opener is ${~ then we want to eat any trailing whitespace
+			// in the preceding literal token, assuming it is indeed a literal
+			// token.
+			if canTrimPrev && len(next.Bytes) == 3 && next.Bytes[2] == '~' && len(parts) > 0 {
+				prevExpr := parts[len(parts)-1]
+				if lexpr, ok := prevExpr.(*templateLiteralToken); ok {
+					lexpr.Val = strings.TrimRightFunc(lexpr.Val, unicode.IsSpace)
+				}
+			}
+
+			p.PushIncludeNewlines(false)
+			expr, exprDiags := p.ParseExpression()
+			diags = append(diags, exprDiags...)
+			close := p.Peek()
+			if close.Type != TokenTemplateSeqEnd {
+				if !p.recovery {
+					diags = append(diags, &zcl.Diagnostic{
+						Severity: zcl.DiagError,
+						Summary:  "Extra characters after interpolation expression",
+						Detail:   "Expected a closing brace to end the interpolation expression, but found extra characters.",
+						Subject:  &close.Range,
+						Context:  zcl.RangeBetween(startRange, close.Range).Ptr(),
+					})
+				}
+				p.recover(TokenTemplateSeqEnd)
+			} else {
+				p.Read() // eat closing brace
+
+				// If the closer is ~} then we want to eat any leading
+				// whitespace on the next token, if it turns out to be a
+				// literal token.
+				if len(close.Bytes) == 2 && close.Bytes[0] == '~' {
+					ltrimNext = true
+				}
+			}
+			p.PopIncludeNewlines()
+			parts = append(parts, &templateInterpToken{
+				Expr:     expr,
+				SrcRange: zcl.RangeBetween(next.Range, close.Range),
+			})
+		case TokenTemplateControl:
+			panic("template control sequences not yet supported")
+
+		default:
+			if !p.recovery {
+				diags = append(diags, &zcl.Diagnostic{
+					Severity: zcl.DiagError,
+					Summary:  "Unterminated template string",
+					Detail:   "No closing marker was found for the string.",
+					Subject:  &next.Range,
+					Context:  zcl.RangeBetween(startRange, next.Range).Ptr(),
+				})
+			}
+			final := p.recover(end)
+			endRange = final.Range
+			break Token
+		}
+	}
+
+	if len(parts) == 0 {
+		// If a sequence has no content, we'll treat it as if it had an
+		// empty string in it because that's what the user probably means
+		// if they write "" in configuration.
+		parts = append(parts, &templateLiteralToken{
+			Val: "",
+			SrcRange: zcl.Range{
+				// Range is the zero-character span immediately after the
+				// opening quote.
+				Filename: startRange.Filename,
+				Start:    startRange.End,
+				End:      startRange.End,
+			},
+		})
+	}
+
+	// Always end with an end token, so the parser can produce diagnostics
+	// about unclosed items with proper position information.
+	parts = append(parts, &templateEndToken{
+		SrcRange: endRange,
+	})
+
+	ret := &templateParts{
+		Tokens:   parts,
+		SrcRange: zcl.RangeBetween(startRange, endRange),
+	}
+
+	return ret, diags
+}
+
+type templateParts struct {
+	Tokens   []templateToken
+	SrcRange zcl.Range
+}
+
+// templateToken is a higher-level token that represents a single atom within
+// the template language. Our template parsing first raises the raw token
+// stream to a sequence of templateToken, and then transforms the result into
+// an expression tree.
+type templateToken interface {
+	templateToken() templateToken
+}
+
+type templateLiteralToken struct {
+	Val      string
+	SrcRange zcl.Range
+	isTemplateToken
+}
+
+type templateInterpToken struct {
+	Expr     Expression
+	SrcRange zcl.Range
+	isTemplateToken
+}
+
+type templateIfToken struct {
+	CondExpr Expression
+	SrcRange zcl.Range
+	isTemplateToken
+}
+
+type templateForToken struct {
+	KeyVar   string // empty if ignoring key
+	ValVar   string
+	CollExpr Expression
+	SrcRange zcl.Range
+	isTemplateToken
+}
+
+type templateEndCtrlType int
+
+const (
+	templateEndIf templateEndCtrlType = iota
+	templateElse
+	templateEndFor
+)
+
+type templateEndCtrlToken struct {
+	Type     templateEndCtrlType
+	SrcRange zcl.Range
+	isTemplateToken
+}
+
+func (t *templateEndCtrlToken) Name() string {
+	switch t.Type {
+	case templateEndIf:
+		return "endif"
+	case templateElse:
+		return "else"
+	case templateEndFor:
+		return "endfor"
+	default:
+		// should never happen
+		panic("invalid templateEndCtrlType")
+	}
+}
+
+type templateEndToken struct {
+	SrcRange zcl.Range
+	isTemplateToken
+}
+
+type isTemplateToken [0]int
+
+func (t isTemplateToken) templateToken() templateToken {
+	return t
+}