hclsyntax: \uxxxx and \Uxxxxxxxx escape sequences in string literals
These allow the inclusion of arbitrary unicode codepoints (always encoded as UTF-8) using a hex representation. \u expects four digits and can thus represent only characters in the basic multilingual plane. \U expects eight digits and can thus represent all unicode characters, at the cost of being extra-verbose. Since our parser properly accounts for unicode characters (including combining sequences) it's recommended to include them literally (UTF-8 encoded) in source code, but these sequences are useful for explicitly representing non-printable characters that could otherwise appear invisible in source code, such as zero-width modifier characters. This fixes #6.
This commit is contained in:
parent
f0bf2b15ae
commit
a1c55afeca
@ -4,6 +4,8 @@ import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/apparentlymart/go-textseg/textseg"
|
||||
"github.com/hashicorp/hcl2/hcl"
|
||||
@ -1501,11 +1503,87 @@ Character:
|
||||
if len(esc) > 0 {
|
||||
switch esc[0] {
|
||||
case '\\':
|
||||
|
||||
if len(esc) >= 2 {
|
||||
switch esc[1] {
|
||||
case 'u', 'U':
|
||||
// Our new character must be an ASCII hex digit
|
||||
_, err := strconv.ParseInt(string(ch), 16, 0)
|
||||
if err != nil {
|
||||
var detail string
|
||||
switch esc[1] {
|
||||
case 'u':
|
||||
detail = "Escape sequence \\u must be followed by exactly four hexidecimal digits."
|
||||
case 'U':
|
||||
detail = "Escape sequence \\U must be followed by exactly eight hexidecimal digits."
|
||||
}
|
||||
diags = append(diags, &hcl.Diagnostic{
|
||||
Severity: hcl.DiagError,
|
||||
Summary: "Invalid escape sequence",
|
||||
Detail: detail,
|
||||
Subject: &hcl.Range{
|
||||
Filename: tok.Range.Filename,
|
||||
Start: hcl.Pos{
|
||||
Line: pos.Line,
|
||||
Column: pos.Column,
|
||||
Byte: pos.Byte,
|
||||
},
|
||||
End: hcl.Pos{
|
||||
Line: pos.Line,
|
||||
Column: pos.Column + 1,
|
||||
Byte: pos.Byte + len(ch),
|
||||
},
|
||||
},
|
||||
})
|
||||
ret = append(ret, esc...)
|
||||
ret = append(ret, ch...)
|
||||
esc = esc[:0]
|
||||
continue Character
|
||||
}
|
||||
|
||||
esc = append(esc, ch...)
|
||||
|
||||
var complete bool
|
||||
switch esc[1] {
|
||||
case 'u':
|
||||
complete = (len(esc) == 6) // four digits plus our \u introducer
|
||||
case 'U':
|
||||
complete = (len(esc) == 10) // eight digits plus our \U introducer
|
||||
}
|
||||
if !complete {
|
||||
// Keep accumulating more digits, then
|
||||
continue Character
|
||||
}
|
||||
|
||||
digits := string(esc[2:])
|
||||
valInt, err := strconv.ParseInt(digits, 16, 32)
|
||||
if err != nil {
|
||||
// Should never happen because we validated our digits
|
||||
// as they arrived, above.
|
||||
panic(err)
|
||||
}
|
||||
r := rune(valInt)
|
||||
rl := utf8.RuneLen(r)
|
||||
|
||||
// Make room in our ret buffer for the extra characters
|
||||
for i := 0; i < rl; i++ {
|
||||
ret = append(ret, 0)
|
||||
}
|
||||
|
||||
// Fill those extra characters with the canonical UTF-8
|
||||
// representation of our rune.
|
||||
utf8.EncodeRune(ret[len(ret)-rl:], r)
|
||||
|
||||
// ...and now finally we're finished escaping!
|
||||
esc = esc[:0]
|
||||
|
||||
continue Character
|
||||
}
|
||||
}
|
||||
|
||||
if len(ch) == 1 {
|
||||
switch ch[0] {
|
||||
|
||||
// TODO: numeric character escapes with \uXXXX
|
||||
|
||||
case 'n':
|
||||
ret = append(ret, '\n')
|
||||
esc = esc[:0]
|
||||
@ -1526,6 +1604,11 @@ Character:
|
||||
ret = append(ret, '\\')
|
||||
esc = esc[:0]
|
||||
continue Character
|
||||
case 'u', 'U':
|
||||
// For these, we'll continue working on them until
|
||||
// we accumulate the expected number of digits.
|
||||
esc = append(esc, ch...)
|
||||
continue Character
|
||||
}
|
||||
}
|
||||
|
||||
@ -1622,7 +1705,7 @@ Character:
|
||||
diags = append(diags, &hcl.Diagnostic{
|
||||
Severity: hcl.DiagError,
|
||||
Summary: "Invalid escape sequence",
|
||||
Detail: fmt.Sprintf("The characters %q do not form a recognized escape sequence.", esc),
|
||||
Detail: fmt.Sprintf("The characters %q do not form a complete escape sequence.", esc),
|
||||
Subject: &hcl.Range{
|
||||
Filename: tok.Range.Filename,
|
||||
Start: hcl.Pos{
|
||||
|
@ -715,6 +715,259 @@ block "valid" {}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"a = \"\\u2022\"\n",
|
||||
0,
|
||||
&Body{
|
||||
Attributes: Attributes{
|
||||
"a": {
|
||||
Name: "a",
|
||||
Expr: &TemplateExpr{
|
||||
Parts: []Expression{
|
||||
&LiteralValueExpr{
|
||||
Val: cty.StringVal("\u2022"),
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 6, Byte: 5},
|
||||
End: hcl.Pos{Line: 1, Column: 12, Byte: 11},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 5, Byte: 4},
|
||||
End: hcl.Pos{Line: 1, Column: 13, Byte: 12},
|
||||
},
|
||||
},
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 1, Column: 13, Byte: 12},
|
||||
},
|
||||
NameRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 1, Column: 2, Byte: 1},
|
||||
},
|
||||
EqualsRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 3, Byte: 2},
|
||||
End: hcl.Pos{Line: 1, Column: 4, Byte: 3},
|
||||
},
|
||||
},
|
||||
},
|
||||
Blocks: Blocks{},
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 2, Column: 1, Byte: 13},
|
||||
},
|
||||
EndRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 2, Column: 1, Byte: 13},
|
||||
End: hcl.Pos{Line: 2, Column: 1, Byte: 13},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"a = \"\\U0001d11e\"\n",
|
||||
0,
|
||||
&Body{
|
||||
Attributes: Attributes{
|
||||
"a": {
|
||||
Name: "a",
|
||||
Expr: &TemplateExpr{
|
||||
Parts: []Expression{
|
||||
&LiteralValueExpr{
|
||||
Val: cty.StringVal("\U0001d11e"),
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 6, Byte: 5},
|
||||
End: hcl.Pos{Line: 1, Column: 16, Byte: 15},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 5, Byte: 4},
|
||||
End: hcl.Pos{Line: 1, Column: 17, Byte: 16},
|
||||
},
|
||||
},
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 1, Column: 17, Byte: 16},
|
||||
},
|
||||
NameRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 1, Column: 2, Byte: 1},
|
||||
},
|
||||
EqualsRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 3, Byte: 2},
|
||||
End: hcl.Pos{Line: 1, Column: 4, Byte: 3},
|
||||
},
|
||||
},
|
||||
},
|
||||
Blocks: Blocks{},
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 2, Column: 1, Byte: 17},
|
||||
},
|
||||
EndRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 2, Column: 1, Byte: 17},
|
||||
End: hcl.Pos{Line: 2, Column: 1, Byte: 17},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"a = \"\\u0001d11e\"\n",
|
||||
0, // This is valid, but probably not what the user intended :(
|
||||
&Body{
|
||||
Attributes: Attributes{
|
||||
"a": {
|
||||
Name: "a",
|
||||
Expr: &TemplateExpr{
|
||||
Parts: []Expression{
|
||||
&LiteralValueExpr{
|
||||
// Only the first four digits were used for the
|
||||
// escape sequence, so the remaining four just
|
||||
// get echoed out literally.
|
||||
Val: cty.StringVal("\u0001d11e"),
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 6, Byte: 5},
|
||||
End: hcl.Pos{Line: 1, Column: 16, Byte: 15},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 5, Byte: 4},
|
||||
End: hcl.Pos{Line: 1, Column: 17, Byte: 16},
|
||||
},
|
||||
},
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 1, Column: 17, Byte: 16},
|
||||
},
|
||||
NameRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 1, Column: 2, Byte: 1},
|
||||
},
|
||||
EqualsRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 3, Byte: 2},
|
||||
End: hcl.Pos{Line: 1, Column: 4, Byte: 3},
|
||||
},
|
||||
},
|
||||
},
|
||||
Blocks: Blocks{},
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 2, Column: 1, Byte: 17},
|
||||
},
|
||||
EndRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 2, Column: 1, Byte: 17},
|
||||
End: hcl.Pos{Line: 2, Column: 1, Byte: 17},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"a = \"\\U2022\"\n",
|
||||
1, // Invalid escape sequence, since we need eight hex digits for \U
|
||||
&Body{
|
||||
Attributes: Attributes{
|
||||
"a": {
|
||||
Name: "a",
|
||||
Expr: &TemplateExpr{
|
||||
Parts: []Expression{
|
||||
&LiteralValueExpr{
|
||||
Val: cty.StringVal("\\U2022"),
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 6, Byte: 5},
|
||||
End: hcl.Pos{Line: 1, Column: 12, Byte: 11},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 5, Byte: 4},
|
||||
End: hcl.Pos{Line: 1, Column: 13, Byte: 12},
|
||||
},
|
||||
},
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 1, Column: 13, Byte: 12},
|
||||
},
|
||||
NameRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 1, Column: 2, Byte: 1},
|
||||
},
|
||||
EqualsRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 3, Byte: 2},
|
||||
End: hcl.Pos{Line: 1, Column: 4, Byte: 3},
|
||||
},
|
||||
},
|
||||
},
|
||||
Blocks: Blocks{},
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 2, Column: 1, Byte: 13},
|
||||
},
|
||||
EndRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 2, Column: 1, Byte: 13},
|
||||
End: hcl.Pos{Line: 2, Column: 1, Byte: 13},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"a = \"\\u20m2\"\n",
|
||||
1, // Invalid escape sequence
|
||||
&Body{
|
||||
Attributes: Attributes{
|
||||
"a": {
|
||||
Name: "a",
|
||||
Expr: &TemplateExpr{
|
||||
Parts: []Expression{
|
||||
&LiteralValueExpr{
|
||||
Val: cty.StringVal("\\u20m2"),
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 6, Byte: 5},
|
||||
End: hcl.Pos{Line: 1, Column: 12, Byte: 11},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 5, Byte: 4},
|
||||
End: hcl.Pos{Line: 1, Column: 13, Byte: 12},
|
||||
},
|
||||
},
|
||||
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 1, Column: 13, Byte: 12},
|
||||
},
|
||||
NameRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 1, Column: 2, Byte: 1},
|
||||
},
|
||||
EqualsRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 3, Byte: 2},
|
||||
End: hcl.Pos{Line: 1, Column: 4, Byte: 3},
|
||||
},
|
||||
},
|
||||
},
|
||||
Blocks: Blocks{},
|
||||
SrcRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 1, Column: 1, Byte: 0},
|
||||
End: hcl.Pos{Line: 2, Column: 1, Byte: 13},
|
||||
},
|
||||
EndRange: hcl.Range{
|
||||
Start: hcl.Pos{Line: 2, Column: 1, Byte: 13},
|
||||
End: hcl.Pos{Line: 2, Column: 1, Byte: 13},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"a = foo.bar\n",
|
||||
0,
|
||||
|
Loading…
Reference in New Issue
Block a user