zclsyntax: decode both quoted and unquoted string literals

This rewrite of decodeQuotedLit, now called decodeStringLit, is able to handle both cases with a single function, and also now correctly handles situations where double-$ and double-! are not followed immediately by a { symbol, and must thus be treated literally.
2017-05-31 07:16:32 -07:00 · 2017-05-31 07:16:32 -07:00 · d003da232e
commit d003da232e
parent f8bdda5ab9
2 changed files with 121 additions and 47 deletions
--- a/zcl/zclsyntax/parser.go
+++ b/zcl/zclsyntax/parser.go
@ -279,7 +279,7 @@ Token:
 			break Token
 		case TokenQuotedLit:
-			s, sDiags := p.decodeQuotedLit(tok)
+			s, sDiags := p.decodeStringLit(tok)
 			diags = append(diags, sDiags...)
 			ret.WriteString(s)
@ -330,45 +330,93 @@ Token:
 	return ret.String(), zcl.RangeBetween(oQuote.Range, cQuote.Range), diags
 }
-// decodeQuotedLit processes the given TokenQuotedLit token as if it were
+// decodeStringLit processes the given token, which must be either a
-// a string literal appearing in quotes, returning the string resulting from
+// TokenQuotedLit or a TokenStringLit, returning the string resulting from
 // resolving any escape sequences.
 //
 // If any error diagnostics are returned, the returned string may be incomplete
 // or otherwise invalid.
-func (p *parser) decodeQuotedLit(tok Token) (string, zcl.Diagnostics) {
+func (p *parser) decodeStringLit(tok Token) (string, zcl.Diagnostics) {
-	if tok.Type != TokenQuotedLit {
+	var quoted bool
-		panic("decodeQuotedLit can only be used with TokenQuotedLit tokens")
+	switch tok.Type {
 	case TokenQuotedLit:
 		quoted = true
 	case TokenStringLit:
 		quoted = false
 	default:
 		panic("decodeQuotedLit can only be used with TokenStringLit and TokenQuotedLit tokens")
 	}
 	var diags zcl.Diagnostics
 	ret := make([]byte, 0, len(tok.Bytes))
 	var esc []byte
 	sc := bufio.NewScanner(bytes.NewReader(tok.Bytes))
 	sc.Split(textseg.ScanGraphemeClusters)
 	escaping := rune(0)
 	pos := tok.Range.Start
 	newPos := pos
 Character:
 	for sc.Scan() {
-		switch escaping {
+		pos = newPos
-		case '\\':
+		ch := sc.Bytes()
 			escaping = 0
 			ty := sc.Text()
 			switch ty {
 			case "n":
 				ret = append(ret, 10)
 			case "r":
 				ret = append(ret, 13)
 			case "t":
 				ret = append(ret, 9)
-			// TODO: numeric character escapes with \uXXXX
+		// Adjust position based on our new character.
 		// \r\n is considered to be a single character in text segmentation,
 		if (len(ch) == 1 && ch[0] == '\n') || (len(ch) == 2 && ch[1] == '\n') {
 			newPos.Line++
 			newPos.Column = 0
 		} else {
 			newPos.Column++
 		}
 		newPos.Byte += len(ch)
 		if len(esc) > 0 {
 			switch esc[0] {
 			case '\\':
 				if len(ch) == 1 {
 					switch ch[0] {
 					// TODO: numeric character escapes with \uXXXX
 					case 'n':
 						ret = append(ret, '\n')
 						esc = esc[:0]
 						continue Character
 					case 'r':
 						ret = append(ret, '\r')
 						esc = esc[:0]
 						continue Character
 					case 't':
 						ret = append(ret, '\t')
 						esc = esc[:0]
 						continue Character
 					case '"':
 						ret = append(ret, '"')
 						esc = esc[:0]
 						continue Character
 					case '\\':
 						ret = append(ret, '\\')
 						esc = esc[:0]
 						continue Character
 					}
 				}
 				var detail string
 				switch {
 				case len(ch) == 1 && (ch[0] == '$' || ch[0] == '!'):
 					detail = fmt.Sprintf(
 						"The characters \"\\%s\" do not form a recognized escape sequence. To escape a \"%s{\" template sequence, use \"%s%s{\".",
 						ch, ch, ch, ch,
 					)
 				default:
 					detail = fmt.Sprintf("The characters \"\\%s\" do not form a recognized escape sequence.", ch)
 				}
 			default:
 				diags = append(diags, &zcl.Diagnostic{
 					Severity: zcl.DiagError,
 					Summary:  "Invalid escape sequence",
-					Detail:   fmt.Sprintf("The sequence \"\\%s\" is not a recognized escape sequence.", ty),
+					Detail:   detail,
 					Subject: &zcl.Range{
 						Filename: tok.Range.Filename,
 						Start: zcl.Pos{
@ -379,35 +427,61 @@ func (p *parser) decodeQuotedLit(tok Token) (string, zcl.Diagnostics) {
 						End: zcl.Pos{
 							Line:   pos.Line,
 							Column: pos.Column + 1, // safe because we know the previous character must be a backslash
-							Byte:   pos.Byte + len(ty),
+							Byte:   pos.Byte + len(ch),
 						},
 					},
 				})
-				ret = append(ret, sc.Bytes()...)
+				ret = append(ret, ch...)
-			}
+				esc = esc[:0]
-		case '$', '!':
+				continue Character
 			bytes := sc.Bytes()
 			if len(bytes) != 1 || bytes[0] == byte(escaping) {
 				ret = append(ret, byte(escaping))
 			}
 			ret = append(ret, bytes...)
 		default:
 			switch sc.Text() {
 			case "\\":
 				escaping = '\\'
 			case "$":
 				escaping = '$'
 			case "!":
 				escaping = '!'
 			default:
 				ret = append(ret, sc.Bytes()...)
 			}
 		}
-		// Literal newlines cannot appear in quoted literals, so it's safe
+			case '$', '!':
-		// to just increment Column and Byte in our position.
+				switch len(esc) {
-		pos.Column++
+				case 1:
-		pos.Byte += len(sc.Bytes())
+					if len(ch) == 1 && ch[0] == esc[0] {
 						esc = append(esc, ch[0])
 						continue Character
 					}
 					// Any other character means this wasn't an escape sequence
 					// after all.
 					ret = append(ret, esc...)
 					ret = append(ret, ch...)
 					esc = esc[:0]
 				case 2:
 					if len(ch) == 1 && ch[0] == '{' {
 						// successful escape sequence
 						ret = append(ret, esc[0])
 					} else {
 						// not an escape sequence, so just output literal
 						ret = append(ret, esc...)
 					}
 					ret = append(ret, ch...)
 					esc = esc[:0]
 				default:
 					// should never happen
 					panic("have invalid escape sequence >2 characters")
 				}
 			}
 		} else {
 			if len(ch) == 1 {
 				switch ch[0] {
 				case '\\':
 					if quoted { // ignore backslashes in unquoted mode
 						esc = append(esc, '\\')
 						continue Character
 					}
 				case '$':
 					esc = append(esc, '$')
 					continue Character
 				case '!':
 					esc = append(esc, '!')
 					continue Character
 				}
 			}
 			ret = append(ret, ch...)
 		}
 	}
 	return string(ret), diags
--- a/zcl/zclsyntax/parser_test.go
+++ b/zcl/zclsyntax/parser_test.go
@ -221,7 +221,7 @@ block "valid" {}
 		},
 		{
 			`block "f\o" {}`,
-			1, // \o is not a valid escape sequence
+			1, // "\o" is not a valid escape sequence
 			&Body{
 				Attributes: Attributes{},
 				Blocks: Blocks{