zclsyntax: decode both quoted and unquoted string literals
This rewrite of decodeQuotedLit, now called decodeStringLit, is able to handle both cases with a single function, and also now correctly handles situations where double-$ and double-! are not followed immediately by a { symbol, and must thus be treated literally.
This commit is contained in:
parent
f8bdda5ab9
commit
d003da232e
@ -279,7 +279,7 @@ Token:
|
|||||||
break Token
|
break Token
|
||||||
|
|
||||||
case TokenQuotedLit:
|
case TokenQuotedLit:
|
||||||
s, sDiags := p.decodeQuotedLit(tok)
|
s, sDiags := p.decodeStringLit(tok)
|
||||||
diags = append(diags, sDiags...)
|
diags = append(diags, sDiags...)
|
||||||
ret.WriteString(s)
|
ret.WriteString(s)
|
||||||
|
|
||||||
@ -330,45 +330,93 @@ Token:
|
|||||||
return ret.String(), zcl.RangeBetween(oQuote.Range, cQuote.Range), diags
|
return ret.String(), zcl.RangeBetween(oQuote.Range, cQuote.Range), diags
|
||||||
}
|
}
|
||||||
|
|
||||||
// decodeQuotedLit processes the given TokenQuotedLit token as if it were
|
// decodeStringLit processes the given token, which must be either a
|
||||||
// a string literal appearing in quotes, returning the string resulting from
|
// TokenQuotedLit or a TokenStringLit, returning the string resulting from
|
||||||
// resolving any escape sequences.
|
// resolving any escape sequences.
|
||||||
//
|
//
|
||||||
// If any error diagnostics are returned, the returned string may be incomplete
|
// If any error diagnostics are returned, the returned string may be incomplete
|
||||||
// or otherwise invalid.
|
// or otherwise invalid.
|
||||||
func (p *parser) decodeQuotedLit(tok Token) (string, zcl.Diagnostics) {
|
func (p *parser) decodeStringLit(tok Token) (string, zcl.Diagnostics) {
|
||||||
if tok.Type != TokenQuotedLit {
|
var quoted bool
|
||||||
panic("decodeQuotedLit can only be used with TokenQuotedLit tokens")
|
switch tok.Type {
|
||||||
|
case TokenQuotedLit:
|
||||||
|
quoted = true
|
||||||
|
case TokenStringLit:
|
||||||
|
quoted = false
|
||||||
|
default:
|
||||||
|
panic("decodeQuotedLit can only be used with TokenStringLit and TokenQuotedLit tokens")
|
||||||
}
|
}
|
||||||
var diags zcl.Diagnostics
|
var diags zcl.Diagnostics
|
||||||
|
|
||||||
ret := make([]byte, 0, len(tok.Bytes))
|
ret := make([]byte, 0, len(tok.Bytes))
|
||||||
|
var esc []byte
|
||||||
|
|
||||||
sc := bufio.NewScanner(bytes.NewReader(tok.Bytes))
|
sc := bufio.NewScanner(bytes.NewReader(tok.Bytes))
|
||||||
sc.Split(textseg.ScanGraphemeClusters)
|
sc.Split(textseg.ScanGraphemeClusters)
|
||||||
|
|
||||||
escaping := rune(0)
|
|
||||||
pos := tok.Range.Start
|
pos := tok.Range.Start
|
||||||
|
newPos := pos
|
||||||
|
Character:
|
||||||
for sc.Scan() {
|
for sc.Scan() {
|
||||||
switch escaping {
|
pos = newPos
|
||||||
case '\\':
|
ch := sc.Bytes()
|
||||||
escaping = 0
|
|
||||||
ty := sc.Text()
|
|
||||||
switch ty {
|
|
||||||
case "n":
|
|
||||||
ret = append(ret, 10)
|
|
||||||
case "r":
|
|
||||||
ret = append(ret, 13)
|
|
||||||
case "t":
|
|
||||||
ret = append(ret, 9)
|
|
||||||
|
|
||||||
// TODO: numeric character escapes with \uXXXX
|
// Adjust position based on our new character.
|
||||||
|
// \r\n is considered to be a single character in text segmentation,
|
||||||
|
if (len(ch) == 1 && ch[0] == '\n') || (len(ch) == 2 && ch[1] == '\n') {
|
||||||
|
newPos.Line++
|
||||||
|
newPos.Column = 0
|
||||||
|
} else {
|
||||||
|
newPos.Column++
|
||||||
|
}
|
||||||
|
newPos.Byte += len(ch)
|
||||||
|
|
||||||
|
if len(esc) > 0 {
|
||||||
|
switch esc[0] {
|
||||||
|
case '\\':
|
||||||
|
if len(ch) == 1 {
|
||||||
|
switch ch[0] {
|
||||||
|
|
||||||
|
// TODO: numeric character escapes with \uXXXX
|
||||||
|
|
||||||
|
case 'n':
|
||||||
|
ret = append(ret, '\n')
|
||||||
|
esc = esc[:0]
|
||||||
|
continue Character
|
||||||
|
case 'r':
|
||||||
|
ret = append(ret, '\r')
|
||||||
|
esc = esc[:0]
|
||||||
|
continue Character
|
||||||
|
case 't':
|
||||||
|
ret = append(ret, '\t')
|
||||||
|
esc = esc[:0]
|
||||||
|
continue Character
|
||||||
|
case '"':
|
||||||
|
ret = append(ret, '"')
|
||||||
|
esc = esc[:0]
|
||||||
|
continue Character
|
||||||
|
case '\\':
|
||||||
|
ret = append(ret, '\\')
|
||||||
|
esc = esc[:0]
|
||||||
|
continue Character
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var detail string
|
||||||
|
switch {
|
||||||
|
case len(ch) == 1 && (ch[0] == '$' || ch[0] == '!'):
|
||||||
|
detail = fmt.Sprintf(
|
||||||
|
"The characters \"\\%s\" do not form a recognized escape sequence. To escape a \"%s{\" template sequence, use \"%s%s{\".",
|
||||||
|
ch, ch, ch, ch,
|
||||||
|
)
|
||||||
|
default:
|
||||||
|
detail = fmt.Sprintf("The characters \"\\%s\" do not form a recognized escape sequence.", ch)
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
|
||||||
diags = append(diags, &zcl.Diagnostic{
|
diags = append(diags, &zcl.Diagnostic{
|
||||||
Severity: zcl.DiagError,
|
Severity: zcl.DiagError,
|
||||||
Summary: "Invalid escape sequence",
|
Summary: "Invalid escape sequence",
|
||||||
Detail: fmt.Sprintf("The sequence \"\\%s\" is not a recognized escape sequence.", ty),
|
Detail: detail,
|
||||||
Subject: &zcl.Range{
|
Subject: &zcl.Range{
|
||||||
Filename: tok.Range.Filename,
|
Filename: tok.Range.Filename,
|
||||||
Start: zcl.Pos{
|
Start: zcl.Pos{
|
||||||
@ -379,35 +427,61 @@ func (p *parser) decodeQuotedLit(tok Token) (string, zcl.Diagnostics) {
|
|||||||
End: zcl.Pos{
|
End: zcl.Pos{
|
||||||
Line: pos.Line,
|
Line: pos.Line,
|
||||||
Column: pos.Column + 1, // safe because we know the previous character must be a backslash
|
Column: pos.Column + 1, // safe because we know the previous character must be a backslash
|
||||||
Byte: pos.Byte + len(ty),
|
Byte: pos.Byte + len(ch),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
ret = append(ret, sc.Bytes()...)
|
ret = append(ret, ch...)
|
||||||
}
|
esc = esc[:0]
|
||||||
case '$', '!':
|
continue Character
|
||||||
bytes := sc.Bytes()
|
|
||||||
if len(bytes) != 1 || bytes[0] == byte(escaping) {
|
|
||||||
ret = append(ret, byte(escaping))
|
|
||||||
}
|
|
||||||
ret = append(ret, bytes...)
|
|
||||||
default:
|
|
||||||
switch sc.Text() {
|
|
||||||
case "\\":
|
|
||||||
escaping = '\\'
|
|
||||||
case "$":
|
|
||||||
escaping = '$'
|
|
||||||
case "!":
|
|
||||||
escaping = '!'
|
|
||||||
default:
|
|
||||||
ret = append(ret, sc.Bytes()...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Literal newlines cannot appear in quoted literals, so it's safe
|
case '$', '!':
|
||||||
// to just increment Column and Byte in our position.
|
switch len(esc) {
|
||||||
pos.Column++
|
case 1:
|
||||||
pos.Byte += len(sc.Bytes())
|
if len(ch) == 1 && ch[0] == esc[0] {
|
||||||
|
esc = append(esc, ch[0])
|
||||||
|
continue Character
|
||||||
|
}
|
||||||
|
|
||||||
|
// Any other character means this wasn't an escape sequence
|
||||||
|
// after all.
|
||||||
|
ret = append(ret, esc...)
|
||||||
|
ret = append(ret, ch...)
|
||||||
|
esc = esc[:0]
|
||||||
|
case 2:
|
||||||
|
if len(ch) == 1 && ch[0] == '{' {
|
||||||
|
// successful escape sequence
|
||||||
|
ret = append(ret, esc[0])
|
||||||
|
} else {
|
||||||
|
// not an escape sequence, so just output literal
|
||||||
|
ret = append(ret, esc...)
|
||||||
|
}
|
||||||
|
ret = append(ret, ch...)
|
||||||
|
esc = esc[:0]
|
||||||
|
default:
|
||||||
|
// should never happen
|
||||||
|
panic("have invalid escape sequence >2 characters")
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if len(ch) == 1 {
|
||||||
|
switch ch[0] {
|
||||||
|
case '\\':
|
||||||
|
if quoted { // ignore backslashes in unquoted mode
|
||||||
|
esc = append(esc, '\\')
|
||||||
|
continue Character
|
||||||
|
}
|
||||||
|
case '$':
|
||||||
|
esc = append(esc, '$')
|
||||||
|
continue Character
|
||||||
|
case '!':
|
||||||
|
esc = append(esc, '!')
|
||||||
|
continue Character
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret = append(ret, ch...)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return string(ret), diags
|
return string(ret), diags
|
||||||
|
@ -221,7 +221,7 @@ block "valid" {}
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
`block "f\o" {}`,
|
`block "f\o" {}`,
|
||||||
1, // \o is not a valid escape sequence
|
1, // "\o" is not a valid escape sequence
|
||||||
&Body{
|
&Body{
|
||||||
Attributes: Attributes{},
|
Attributes: Attributes{},
|
||||||
Blocks: Blocks{
|
Blocks: Blocks{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user