zclsyntax: heredoc support in the scanner

This commit is contained in:
Martin Atkins 2017-05-29 08:55:53 -07:00
parent 4a939a2b46
commit 2551856d22
4 changed files with 3044 additions and 1372 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,8 @@
package zclsyntax
import (
"bytes"
"github.com/zclconf/go-zcl/zcl"
)
@ -46,6 +48,7 @@ func scanTokens(data []byte, filename string, start zcl.Pos, mode scanMode) []To
Newline = '\r' ? '\n';
BeginStringTmpl = '"';
BeginHeredocTmpl = '<<' ('-')? Ident Newline;
# Tabs are not valid, but we accept them in the scanner and mark them
# as tokens so that we can produce diagnostics advising the user to
@ -64,10 +67,62 @@ func scanTokens(data []byte, filename string, start zcl.Pos, mode scanMode) []To
fret;
}
action beginHeredocTemplate {
token(TokenOHeredoc);
// the token is currently the whole heredoc introducer, like
// <<EOT or <<-EOT, followed by a newline. We want to extract
// just the "EOT" portion that we'll use as the closing marker.
marker := data[ts+2:te-1]
if marker[0] == '-' {
marker = marker[1:]
}
if marker[len(marker)-1] == '\r' {
marker = marker[:len(marker)-1]
}
heredocs = append(heredocs, heredocInProgress{
Marker: marker,
StartOfLine: true,
})
fcall heredocTemplate;
}
action heredocLiteralEOL {
// This action is called specificially when a heredoc literal
// ends with a newline character.
// This might actually be our end marker.
topdoc := &heredocs[len(heredocs)-1]
if topdoc.StartOfLine {
maybeMarker := bytes.TrimSpace(data[ts:te])
if bytes.Equal(maybeMarker, topdoc.Marker) {
token(TokenCHeredoc);
heredocs = heredocs[:len(heredocs)-1]
fret;
}
}
topdoc.StartOfLine = true;
token(TokenStringLit);
}
action heredocLiteralMidline {
// This action is called when a heredoc literal _doesn't_ end
// with a newline character, e.g. because we're about to enter
// an interpolation sequence.
heredocs[len(heredocs)-1].StartOfLine = false;
token(TokenStringLit);
}
action beginTemplateInterp {
token(TokenTemplateInterp);
braces++;
retBraces = append(retBraces, braces);
if len(heredocs) > 0 {
heredocs[len(heredocs)-1].StartOfLine = false;
}
fcall main;
}
@ -75,6 +130,9 @@ func scanTokens(data []byte, filename string, start zcl.Pos, mode scanMode) []To
token(TokenTemplateControl);
braces++;
retBraces = append(retBraces, braces);
if len(heredocs) > 0 {
heredocs[len(heredocs)-1].StartOfLine = false;
}
fcall main;
}
@ -119,6 +177,11 @@ func scanTokens(data []byte, filename string, start zcl.Pos, mode scanMode) []To
('\\' StringLiteralChars) |
(StringLiteralChars - ("$" | "!" | '"'))
)+;
HeredocStringLiteral = (
('$' ^'{') |
('!' ^'{') |
(StringLiteralChars - ("$" | "!"))
)*;
stringTemplate := |*
TemplateInterp => beginTemplateInterp;
@ -129,6 +192,14 @@ func scanTokens(data []byte, filename string, start zcl.Pos, mode scanMode) []To
BrokenUTF8 => { token(TokenBadUTF8); };
*|;
heredocTemplate := |*
TemplateInterp => beginTemplateInterp;
TemplateControl => beginTemplateControl;
HeredocStringLiteral Newline => heredocLiteralEOL;
HeredocStringLiteral => heredocLiteralMidline;
BrokenUTF8 => { token(TokenBadUTF8); };
*|;
main := |*
Spaces => {};
NumberLit => { token(TokenNumberLit) };
@ -147,6 +218,7 @@ func scanTokens(data []byte, filename string, start zcl.Pos, mode scanMode) []To
"~}" => closeTemplateSeqEatWhitespace;
BeginStringTmpl => beginStringTemplate;
BeginHeredocTmpl => beginHeredocTemplate;
Tabs => { token(TokenTabs) };
AnyUTF8 => { token(TokenInvalid) };
@ -179,6 +251,7 @@ func scanTokens(data []byte, filename string, start zcl.Pos, mode scanMode) []To
braces := 0
var retBraces []int // stack of brace levels that cause us to use fret
var heredocs []heredocInProgress // stack of heredocs we're currently processing
%%{
prepush {

View File

@ -689,6 +689,267 @@ func TestScanTokens(t *testing.T) {
},
},
// Heredoc Templates
{
`<<EOT
hello world
EOT
`,
[]Token{
{
Type: TokenOHeredoc,
Bytes: []byte("<<EOT\n"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 0, Line: 1, Column: 1},
End: zcl.Pos{Byte: 6, Line: 2, Column: 1},
},
},
{
Type: TokenStringLit,
Bytes: []byte("hello world\n"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 6, Line: 2, Column: 1},
End: zcl.Pos{Byte: 18, Line: 3, Column: 1},
},
},
{
Type: TokenCHeredoc,
Bytes: []byte("EOT\n"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 18, Line: 3, Column: 1},
End: zcl.Pos{Byte: 22, Line: 4, Column: 1},
},
},
{
Type: TokenEOF,
Bytes: []byte{},
Range: zcl.Range{
Start: zcl.Pos{Byte: 22, Line: 4, Column: 1},
End: zcl.Pos{Byte: 22, Line: 4, Column: 1},
},
},
},
},
{
`<<EOT
hello ${name}
EOT
`,
[]Token{
{
Type: TokenOHeredoc,
Bytes: []byte("<<EOT\n"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 0, Line: 1, Column: 1},
End: zcl.Pos{Byte: 6, Line: 2, Column: 1},
},
},
{
Type: TokenStringLit,
Bytes: []byte("hello "),
Range: zcl.Range{
Start: zcl.Pos{Byte: 6, Line: 2, Column: 1},
End: zcl.Pos{Byte: 12, Line: 2, Column: 7},
},
},
{
Type: TokenTemplateInterp,
Bytes: []byte("${"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 12, Line: 2, Column: 7},
End: zcl.Pos{Byte: 14, Line: 2, Column: 9},
},
},
{
Type: TokenIdent,
Bytes: []byte("name"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 14, Line: 2, Column: 9},
End: zcl.Pos{Byte: 18, Line: 2, Column: 13},
},
},
{
Type: TokenTemplateSeqEnd,
Bytes: []byte("}"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 18, Line: 2, Column: 13},
End: zcl.Pos{Byte: 19, Line: 2, Column: 14},
},
},
{
Type: TokenStringLit,
Bytes: []byte("\n"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 19, Line: 2, Column: 14},
End: zcl.Pos{Byte: 20, Line: 3, Column: 1},
},
},
{
Type: TokenCHeredoc,
Bytes: []byte("EOT\n"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 20, Line: 3, Column: 1},
End: zcl.Pos{Byte: 24, Line: 4, Column: 1},
},
},
{
Type: TokenEOF,
Bytes: []byte{},
Range: zcl.Range{
Start: zcl.Pos{Byte: 24, Line: 4, Column: 1},
End: zcl.Pos{Byte: 24, Line: 4, Column: 1},
},
},
},
},
{
`<<EOT
${name}EOT
EOT
`,
[]Token{
{
Type: TokenOHeredoc,
Bytes: []byte("<<EOT\n"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 0, Line: 1, Column: 1},
End: zcl.Pos{Byte: 6, Line: 2, Column: 1},
},
},
{
Type: TokenTemplateInterp,
Bytes: []byte("${"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 6, Line: 2, Column: 1},
End: zcl.Pos{Byte: 8, Line: 2, Column: 3},
},
},
{
Type: TokenIdent,
Bytes: []byte("name"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 8, Line: 2, Column: 3},
End: zcl.Pos{Byte: 12, Line: 2, Column: 7},
},
},
{
Type: TokenTemplateSeqEnd,
Bytes: []byte("}"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 12, Line: 2, Column: 7},
End: zcl.Pos{Byte: 13, Line: 2, Column: 8},
},
},
{
Type: TokenStringLit,
Bytes: []byte("EOT\n"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 13, Line: 2, Column: 8},
End: zcl.Pos{Byte: 17, Line: 3, Column: 1},
},
},
{
Type: TokenCHeredoc,
Bytes: []byte("EOT\n"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 17, Line: 3, Column: 1},
End: zcl.Pos{Byte: 21, Line: 4, Column: 1},
},
},
{
Type: TokenEOF,
Bytes: []byte{},
Range: zcl.Range{
Start: zcl.Pos{Byte: 21, Line: 4, Column: 1},
End: zcl.Pos{Byte: 21, Line: 4, Column: 1},
},
},
},
},
{
`<<EOF
${<<-EOF
hello
EOF
}
EOF
`,
[]Token{
{
Type: TokenOHeredoc,
Bytes: []byte("<<EOF\n"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 0, Line: 1, Column: 1},
End: zcl.Pos{Byte: 6, Line: 2, Column: 1},
},
},
{
Type: TokenTemplateInterp,
Bytes: []byte("${"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 6, Line: 2, Column: 1},
End: zcl.Pos{Byte: 8, Line: 2, Column: 3},
},
},
{
Type: TokenOHeredoc,
Bytes: []byte("<<-EOF\n"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 8, Line: 2, Column: 3},
End: zcl.Pos{Byte: 15, Line: 3, Column: 1},
},
},
{
Type: TokenStringLit,
Bytes: []byte("hello\n"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 15, Line: 3, Column: 1},
End: zcl.Pos{Byte: 21, Line: 4, Column: 1},
},
},
{
Type: TokenCHeredoc,
Bytes: []byte("EOF\n"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 21, Line: 4, Column: 1},
End: zcl.Pos{Byte: 25, Line: 5, Column: 1},
},
},
{
Type: TokenTemplateSeqEnd,
Bytes: []byte("}"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 25, Line: 5, Column: 1},
End: zcl.Pos{Byte: 26, Line: 5, Column: 2},
},
},
{
Type: TokenStringLit,
Bytes: []byte("\n"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 26, Line: 5, Column: 2},
End: zcl.Pos{Byte: 27, Line: 6, Column: 1},
},
},
{
Type: TokenCHeredoc,
Bytes: []byte("EOF\n"),
Range: zcl.Range{
Start: zcl.Pos{Byte: 27, Line: 6, Column: 1},
End: zcl.Pos{Byte: 31, Line: 7, Column: 1},
},
},
{
Type: TokenEOF,
Bytes: []byte{},
Range: zcl.Range{
Start: zcl.Pos{Byte: 31, Line: 7, Column: 1},
End: zcl.Pos{Byte: 31, Line: 7, Column: 1},
},
},
},
},
// Combinations
{
` (1 + 2) * 3 `,

View File

@ -137,3 +137,8 @@ func (f *tokenAccum) emitToken(ty TokenType, startOfs, endOfs int) {
},
})
}
type heredocInProgress struct {
Marker []byte
StartOfLine bool
}