hclsyntax: fix end-of-string edge cases for $ and % escapes
We recognize and allow naked $ and % sequences by reading ahead one more character to see if it's a "{" that would introduce an interpolation or control sequence. Unfortunately this is problematic in the end condition because it can "eat" the terminating character and cause the scanner to continue parsing a template when the user intended the template to end. Handling this is a bit messy. For the quoted and heredoc situations we can use Ragel's fhold statement to "backtrack" to before the character we consumed, which does the trick. For bare templates this is insufficient because there _is_ no following character and so the scanner detects this as an error. Rather than adding even more complexity to the state machine, instead we just handle as a special case invalid bytes at the top-level of a bare template, returning them as a TokenStringLit instead of a TokenInvalid. This then gives the parser what it needs. The fhold approach causes some odd behavior where an escaped template introducer character causes a token split and two tokens are emitted instead of one. This is weird but harmless, since we'll ultimately just concatenate all of these strings together anyway, and so we allow it again to avoid making the scanner more complex when it's easy enough to handle this in the parser where we have more context.
This commit is contained in:
parent
d66303f45b
commit
92456935b8
@ -229,6 +229,30 @@ trim`,
|
||||
cty.StringVal("\\uu1234"),
|
||||
0,
|
||||
},
|
||||
{
|
||||
`$`,
|
||||
nil,
|
||||
cty.StringVal("$"),
|
||||
0,
|
||||
},
|
||||
{
|
||||
`$$`,
|
||||
nil,
|
||||
cty.StringVal("$$"),
|
||||
0,
|
||||
},
|
||||
{
|
||||
`%`,
|
||||
nil,
|
||||
cty.StringVal("%"),
|
||||
0,
|
||||
},
|
||||
{
|
||||
`%%`,
|
||||
nil,
|
||||
cty.StringVal("%%"),
|
||||
0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
@ -182,6 +182,12 @@ func TestExpressionParseAndValue(t *testing.T) {
|
||||
cty.StringVal("hello"),
|
||||
0,
|
||||
},
|
||||
{
|
||||
"\"hello `backtick` world\"",
|
||||
nil,
|
||||
cty.StringVal("hello `backtick` world"),
|
||||
0,
|
||||
},
|
||||
{
|
||||
`"hello\nworld"`,
|
||||
nil,
|
||||
|
@ -715,10 +715,21 @@ block "valid" {}
|
||||
Expr: &TemplateExpr{
|
||||
Parts: []Expression{
|
||||
&LiteralValueExpr{
|
||||
Val: cty.StringVal("hello $$"),
|
||||
Val: cty.StringVal("hello $"),
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 6, Byte: 5},
|
||||
End: hcl.Pos{Line: 1, Column: 13, Byte: 12},
|
||||
},
|
||||
},
|
||||
// This parses oddly due to how the scanner
|
||||
// handles escaping of the $ sequence, but it's
|
||||
// functionally equivalent to a single literal.
|
||||
&LiteralValueExpr{
|
||||
Val: cty.StringVal("$"),
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 13, Byte: 12},
|
||||
End: hcl.Pos{Line: 1, Column: 14, Byte: 13},
|
||||
},
|
||||
},
|
||||
@ -755,6 +766,56 @@ block "valid" {}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"a = \"hello $\"\n",
|
||||
0, // unterminated template interpolation sequence
|
||||
&Body{
|
||||
Attributes: Attributes{
|
||||
"a": {
|
||||
Name: "a",
|
||||
Expr: &TemplateExpr{
|
||||
Parts: []Expression{
|
||||
&LiteralValueExpr{
|
||||
Val: cty.StringVal("hello $"),
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 6, Byte: 5},
|
||||
End: hcl.Pos{Line: 1, Column: 13, Byte: 12},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 5, Byte: 4},
|
||||
End: hcl.Pos{Line: 1, Column: 14, Byte: 13},
|
||||
},
|
||||
},
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 1, Column: 14, Byte: 13},
|
||||
},
|
||||
NameRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 1, Column: 2, Byte: 1},
|
||||
},
|
||||
EqualsRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 3, Byte: 2},
|
||||
End: hcl.Pos{Line: 1, Column: 4, Byte: 3},
|
||||
},
|
||||
},
|
||||
},
|
||||
Blocks: Blocks{},
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 2, Column: 1, Byte: 14},
|
||||
},
|
||||
EndRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 2, Column: 1, Byte: 14},
|
||||
End: hcl.Pos{Line: 2, Column: 1, Byte: 14},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"a = \"hello %%\"\n",
|
||||
0,
|
||||
@ -765,10 +826,21 @@ block "valid" {}
|
||||
Expr: &TemplateExpr{
|
||||
Parts: []Expression{
|
||||
&LiteralValueExpr{
|
||||
Val: cty.StringVal("hello %%"),
|
||||
Val: cty.StringVal("hello %"),
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 6, Byte: 5},
|
||||
End: hcl.Pos{Line: 1, Column: 13, Byte: 12},
|
||||
},
|
||||
},
|
||||
// This parses oddly due to how the scanner
|
||||
// handles escaping of the $ sequence, but it's
|
||||
// functionally equivalent to a single literal.
|
||||
&LiteralValueExpr{
|
||||
Val: cty.StringVal("%"),
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 13, Byte: 12},
|
||||
End: hcl.Pos{Line: 1, Column: 14, Byte: 13},
|
||||
},
|
||||
},
|
||||
@ -805,6 +877,56 @@ block "valid" {}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"a = \"hello %\"\n",
|
||||
0, // unterminated template control sequence
|
||||
&Body{
|
||||
Attributes: Attributes{
|
||||
"a": {
|
||||
Name: "a",
|
||||
Expr: &TemplateExpr{
|
||||
Parts: []Expression{
|
||||
&LiteralValueExpr{
|
||||
Val: cty.StringVal("hello %"),
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 6, Byte: 5},
|
||||
End: hcl.Pos{Line: 1, Column: 13, Byte: 12},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 5, Byte: 4},
|
||||
End: hcl.Pos{Line: 1, Column: 14, Byte: 13},
|
||||
},
|
||||
},
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 1, Column: 14, Byte: 13},
|
||||
},
|
||||
NameRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 1, Column: 2, Byte: 1},
|
||||
},
|
||||
EqualsRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 3, Byte: 2},
|
||||
End: hcl.Pos{Line: 1, Column: 4, Byte: 3},
|
||||
},
|
||||
},
|
||||
},
|
||||
Blocks: Blocks{},
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 2, Column: 1, Byte: 14},
|
||||
},
|
||||
EndRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 2, Column: 1, Byte: 14},
|
||||
End: hcl.Pos{Line: 2, Column: 1, Byte: 14},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"a = \"hello!\"\n",
|
||||
0,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -197,14 +197,14 @@ func scanTokens(data []byte, filename string, start hcl.Pos, mode scanMode) []To
|
||||
EndStringTmpl = '"';
|
||||
StringLiteralChars = (AnyUTF8 - ("\r"|"\n"));
|
||||
TemplateStringLiteral = (
|
||||
('$' ^'{') |
|
||||
('%' ^'{') |
|
||||
('$' ^'{' %{ fhold; }) |
|
||||
('%' ^'{' %{ fhold; }) |
|
||||
('\\' StringLiteralChars) |
|
||||
(StringLiteralChars - ("$" | '%' | '"'))
|
||||
)+;
|
||||
HeredocStringLiteral = (
|
||||
('$' ^'{') |
|
||||
('%' ^'{') |
|
||||
('$' ^'{' %{ fhold; }) |
|
||||
('%' ^'{' %{ fhold; }) |
|
||||
(StringLiteralChars - ("$" | '%'))
|
||||
)*;
|
||||
BareStringLiteral = (
|
||||
@ -337,7 +337,17 @@ func scanTokens(data []byte, filename string, start hcl.Pos, mode scanMode) []To
|
||||
// encountered something that the scanner can't match, which we'll
|
||||
// deal with as an invalid.
|
||||
if cs < hcltok_first_final {
|
||||
f.emitToken(TokenInvalid, p, len(data))
|
||||
if mode == scanTemplate && len(stack) == 0 {
|
||||
// If we're scanning a bare template then any straggling
|
||||
// top-level stuff is actually literal string, rather than
|
||||
// invalid. This handles the case where the template ends
|
||||
// with a single "$" or "%", which trips us up because we
|
||||
// want to see another character to decide if it's a sequence
|
||||
// or an escape.
|
||||
f.emitToken(TokenStringLit, ts, len(data))
|
||||
} else {
|
||||
f.emitToken(TokenInvalid, ts, len(data))
|
||||
}
|
||||
}
|
||||
|
||||
// We always emit a synthetic EOF token at the end, since it gives the
|
||||
|
@ -372,6 +372,178 @@ func TestScanTokens_normal(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
`"hello $$"`,
|
||||
[]Token{
|
||||
{
|
||||
Type: TokenOQuote,
|
||||
Bytes: []byte(`"`),
|
||||
Range: hcl.Range{
|
||||
Start: hcl.Pos{Byte: 0, Line: 1, Column: 1},
|
||||
End: hcl.Pos{Byte: 1, Line: 1, Column: 2},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: TokenQuotedLit,
|
||||
Bytes: []byte(`hello $`),
|
||||
Range: hcl.Range{
|
||||
Start: hcl.Pos{Byte: 1, Line: 1, Column: 2},
|
||||
End: hcl.Pos{Byte: 8, Line: 1, Column: 9},
|
||||
},
|
||||
},
|
||||
// This one scans a little oddly because of how the scanner
|
||||
// handles the escaping of the dollar sign, but it's still
|
||||
// good enough for the parser since it'll just concatenate
|
||||
// these two string literals together anyway.
|
||||
{
|
||||
Type: TokenQuotedLit,
|
||||
Bytes: []byte(`$`),
|
||||
Range: hcl.Range{
|
||||
Start: hcl.Pos{Byte: 8, Line: 1, Column: 9},
|
||||
End: hcl.Pos{Byte: 9, Line: 1, Column: 10},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: TokenCQuote,
|
||||
Bytes: []byte(`"`),
|
||||
Range: hcl.Range{
|
||||
Start: hcl.Pos{Byte: 9, Line: 1, Column: 10},
|
||||
End: hcl.Pos{Byte: 10, Line: 1, Column: 11},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: TokenEOF,
|
||||
Bytes: []byte{},
|
||||
Range: hcl.Range{
|
||||
Start: hcl.Pos{Byte: 10, Line: 1, Column: 11},
|
||||
End: hcl.Pos{Byte: 10, Line: 1, Column: 11},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
`"hello %%"`,
|
||||
[]Token{
|
||||
{
|
||||
Type: TokenOQuote,
|
||||
Bytes: []byte(`"`),
|
||||
Range: hcl.Range{
|
||||
Start: hcl.Pos{Byte: 0, Line: 1, Column: 1},
|
||||
End: hcl.Pos{Byte: 1, Line: 1, Column: 2},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: TokenQuotedLit,
|
||||
Bytes: []byte(`hello %`),
|
||||
Range: hcl.Range{
|
||||
Start: hcl.Pos{Byte: 1, Line: 1, Column: 2},
|
||||
End: hcl.Pos{Byte: 8, Line: 1, Column: 9},
|
||||
},
|
||||
},
|
||||
// This one scans a little oddly because of how the scanner
|
||||
// handles the escaping of the dollar sign, but it's still
|
||||
// good enough for the parser since it'll just concatenate
|
||||
// these two string literals together anyway.
|
||||
{
|
||||
Type: TokenQuotedLit,
|
||||
Bytes: []byte(`%`),
|
||||
Range: hcl.Range{
|
||||
Start: hcl.Pos{Byte: 8, Line: 1, Column: 9},
|
||||
End: hcl.Pos{Byte: 9, Line: 1, Column: 10},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: TokenCQuote,
|
||||
Bytes: []byte(`"`),
|
||||
Range: hcl.Range{
|
||||
Start: hcl.Pos{Byte: 9, Line: 1, Column: 10},
|
||||
End: hcl.Pos{Byte: 10, Line: 1, Column: 11},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: TokenEOF,
|
||||
Bytes: []byte{},
|
||||
Range: hcl.Range{
|
||||
Start: hcl.Pos{Byte: 10, Line: 1, Column: 11},
|
||||
End: hcl.Pos{Byte: 10, Line: 1, Column: 11},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
`"hello $"`,
|
||||
[]Token{
|
||||
{
|
||||
Type: TokenOQuote,
|
||||
Bytes: []byte(`"`),
|
||||
Range: hcl.Range{
|
||||
Start: hcl.Pos{Byte: 0, Line: 1, Column: 1},
|
||||
End: hcl.Pos{Byte: 1, Line: 1, Column: 2},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: TokenQuotedLit,
|
||||
Bytes: []byte(`hello $`),
|
||||
Range: hcl.Range{
|
||||
Start: hcl.Pos{Byte: 1, Line: 1, Column: 2},
|
||||
End: hcl.Pos{Byte: 8, Line: 1, Column: 9},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: TokenCQuote,
|
||||
Bytes: []byte(`"`),
|
||||
Range: hcl.Range{
|
||||
Start: hcl.Pos{Byte: 8, Line: 1, Column: 9},
|
||||
End: hcl.Pos{Byte: 9, Line: 1, Column: 10},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: TokenEOF,
|
||||
Bytes: []byte{},
|
||||
Range: hcl.Range{
|
||||
Start: hcl.Pos{Byte: 9, Line: 1, Column: 10},
|
||||
End: hcl.Pos{Byte: 9, Line: 1, Column: 10},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
`"hello %"`,
|
||||
[]Token{
|
||||
{
|
||||
Type: TokenOQuote,
|
||||
Bytes: []byte(`"`),
|
||||
Range: hcl.Range{
|
||||
Start: hcl.Pos{Byte: 0, Line: 1, Column: 1},
|
||||
End: hcl.Pos{Byte: 1, Line: 1, Column: 2},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: TokenQuotedLit,
|
||||
Bytes: []byte(`hello %`),
|
||||
Range: hcl.Range{
|
||||
Start: hcl.Pos{Byte: 1, Line: 1, Column: 2},
|
||||
End: hcl.Pos{Byte: 8, Line: 1, Column: 9},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: TokenCQuote,
|
||||
Bytes: []byte(`"`),
|
||||
Range: hcl.Range{
|
||||
Start: hcl.Pos{Byte: 8, Line: 1, Column: 9},
|
||||
End: hcl.Pos{Byte: 9, Line: 1, Column: 10},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: TokenEOF,
|
||||
Bytes: []byte{},
|
||||
Range: hcl.Range{
|
||||
Start: hcl.Pos{Byte: 9, Line: 1, Column: 10},
|
||||
End: hcl.Pos{Byte: 9, Line: 1, Column: 10},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
// Templates with interpolations and control sequences
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user