2018-02-05 02:55:09 +00:00
|
|
|
|
2018-01-24 05:54:38 +00:00
|
|
|
package hclsyntax
|
2017-05-28 02:01:43 +00:00
|
|
|
|
|
|
|
import (
|
2017-05-29 15:55:53 +00:00
|
|
|
"bytes"
|
|
|
|
|
2019-09-09 23:08:19 +00:00
|
|
|
"github.com/hashicorp/hcl/v2"
|
2017-05-28 02:01:43 +00:00
|
|
|
)
|
|
|
|
|
2017-05-28 14:11:24 +00:00
|
|
|
// This file is generated from scan_tokens.rl. DO NOT EDIT.
|
2017-05-28 02:01:43 +00:00
|
|
|
%%{
|
2019-01-25 18:05:28 +00:00
|
|
|
# (except when you are actually in scan_tokens.rl here, so edit away!)
|
2017-05-28 02:01:43 +00:00
|
|
|
|
2018-01-27 18:47:57 +00:00
|
|
|
machine hcltok;
|
2017-05-28 02:01:43 +00:00
|
|
|
write data;
|
|
|
|
}%%
|
|
|
|
|
2017-09-11 23:40:37 +00:00
|
|
|
func scanTokens(data []byte, filename string, start hcl.Pos, mode scanMode) []Token {
|
2018-12-19 23:52:15 +00:00
|
|
|
stripData := stripUTF8BOM(data)
|
|
|
|
start.Byte += len(data) - len(stripData)
|
|
|
|
data = stripData
|
|
|
|
|
2017-05-28 14:11:24 +00:00
|
|
|
f := &tokenAccum{
|
2018-12-13 00:38:20 +00:00
|
|
|
Filename: filename,
|
|
|
|
Bytes: data,
|
|
|
|
Pos: start,
|
|
|
|
StartByte: start.Byte,
|
2017-05-28 02:01:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
%%{
|
2017-05-28 16:16:53 +00:00
|
|
|
include UnicodeDerived "unicode_derived.rl";
|
2017-05-28 02:01:43 +00:00
|
|
|
|
|
|
|
UTF8Cont = 0x80 .. 0xBF;
|
|
|
|
AnyUTF8 = (
|
|
|
|
0x00..0x7F |
|
|
|
|
0xC0..0xDF . UTF8Cont |
|
|
|
|
0xE0..0xEF . UTF8Cont . UTF8Cont |
|
|
|
|
0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont
|
|
|
|
);
|
|
|
|
BrokenUTF8 = any - AnyUTF8;
|
|
|
|
|
2017-06-24 16:39:16 +00:00
|
|
|
NumberLitContinue = (digit|'.'|('e'|'E') ('+'|'-')? digit);
|
|
|
|
NumberLit = digit ("" | (NumberLitContinue - '.') | (NumberLitContinue* (NumberLitContinue - '.')));
|
2018-03-03 16:03:52 +00:00
|
|
|
Ident = (ID_Start | '_') (ID_Continue | '-')*;
|
2017-05-28 15:56:43 +00:00
|
|
|
|
2017-05-28 16:34:20 +00:00
|
|
|
# Symbols that just represent themselves are handled as a single rule.
|
2019-01-25 18:05:28 +00:00
|
|
|
SelfToken = "[" | "]" | "(" | ")" | "." | "," | "*" | "/" | "%" | "+" | "-" | "=" | "<" | ">" | "!" | "?" | ":" | "\n" | "&" | "|" | "~" | "^" | ";" | "`" | "'";
|
2017-05-28 16:34:20 +00:00
|
|
|
|
2017-06-04 23:00:40 +00:00
|
|
|
EqualOp = "==";
|
2017-05-28 16:40:58 +00:00
|
|
|
NotEqual = "!=";
|
|
|
|
GreaterThanEqual = ">=";
|
|
|
|
LessThanEqual = "<=";
|
|
|
|
LogicalAnd = "&&";
|
|
|
|
LogicalOr = "||";
|
|
|
|
|
2017-06-13 15:50:20 +00:00
|
|
|
Ellipsis = "...";
|
|
|
|
FatArrow = "=>";
|
|
|
|
|
2017-05-28 22:33:01 +00:00
|
|
|
Newline = '\r' ? '\n';
|
2017-05-29 16:13:35 +00:00
|
|
|
EndOfLine = Newline;
|
2017-05-28 22:33:01 +00:00
|
|
|
|
|
|
|
BeginStringTmpl = '"';
|
2017-05-29 15:55:53 +00:00
|
|
|
BeginHeredocTmpl = '<<' ('-')? Ident Newline;
|
2017-05-28 22:33:01 +00:00
|
|
|
|
2017-05-29 16:13:35 +00:00
|
|
|
Comment = (
|
2019-05-15 22:13:57 +00:00
|
|
|
# The :>> operator in these is a "finish-guarded concatenation",
|
|
|
|
# which terminates the sequence on its left when it completes
|
|
|
|
# the sequence on its right.
|
|
|
|
# In the single-line comment cases this is allowing us to make
|
|
|
|
# the trailing EndOfLine optional while still having the overall
|
|
|
|
# pattern terminate. In the multi-line case it ensures that
|
|
|
|
# the first comment in the file ends at the first */, rather than
|
|
|
|
# gobbling up all of the "any*" until the _final_ */ in the file.
|
|
|
|
("#" (any - EndOfLine)* :>> EndOfLine?) |
|
|
|
|
("//" (any - EndOfLine)* :>> EndOfLine?) |
|
2018-12-04 00:25:56 +00:00
|
|
|
("/*" any* :>> "*/")
|
2017-05-29 16:13:35 +00:00
|
|
|
);
|
|
|
|
|
2018-01-27 18:47:57 +00:00
|
|
|
# Note: hclwrite assumes that only ASCII spaces appear between tokens,
|
2017-05-29 23:59:20 +00:00
|
|
|
# and uses this assumption to recreate the spaces between tokens by
|
2018-01-27 17:26:56 +00:00
|
|
|
# looking at byte offset differences. This means it will produce
|
|
|
|
# incorrect results in the presence of tabs, but that's acceptable
|
2018-01-27 18:47:57 +00:00
|
|
|
# because the canonical style (which hclwrite itself can impose
|
2018-01-27 17:26:56 +00:00
|
|
|
# automatically is to never use tabs).
|
|
|
|
Spaces = (' ' | 0x09)+;
|
2017-05-28 02:01:43 +00:00
|
|
|
|
2017-05-28 22:33:01 +00:00
|
|
|
action beginStringTemplate {
|
|
|
|
token(TokenOQuote);
|
|
|
|
fcall stringTemplate;
|
|
|
|
}
|
|
|
|
|
|
|
|
action endStringTemplate {
|
|
|
|
token(TokenCQuote);
|
|
|
|
fret;
|
|
|
|
}
|
|
|
|
|
2017-05-29 15:55:53 +00:00
|
|
|
action beginHeredocTemplate {
|
|
|
|
token(TokenOHeredoc);
|
|
|
|
// the token is currently the whole heredoc introducer, like
|
|
|
|
// <<EOT or <<-EOT, followed by a newline. We want to extract
|
|
|
|
// just the "EOT" portion that we'll use as the closing marker.
|
|
|
|
|
|
|
|
marker := data[ts+2:te-1]
|
|
|
|
if marker[0] == '-' {
|
|
|
|
marker = marker[1:]
|
|
|
|
}
|
|
|
|
if marker[len(marker)-1] == '\r' {
|
|
|
|
marker = marker[:len(marker)-1]
|
|
|
|
}
|
|
|
|
|
|
|
|
heredocs = append(heredocs, heredocInProgress{
|
|
|
|
Marker: marker,
|
|
|
|
StartOfLine: true,
|
|
|
|
})
|
|
|
|
|
|
|
|
fcall heredocTemplate;
|
|
|
|
}
|
|
|
|
|
|
|
|
action heredocLiteralEOL {
|
|
|
|
// This action is called specificially when a heredoc literal
|
|
|
|
// ends with a newline character.
|
|
|
|
|
|
|
|
// This might actually be our end marker.
|
|
|
|
topdoc := &heredocs[len(heredocs)-1]
|
|
|
|
if topdoc.StartOfLine {
|
|
|
|
maybeMarker := bytes.TrimSpace(data[ts:te])
|
|
|
|
if bytes.Equal(maybeMarker, topdoc.Marker) {
|
2018-03-08 16:22:32 +00:00
|
|
|
// We actually emit two tokens here: the end-of-heredoc
|
|
|
|
// marker first, and then separately the newline that
|
|
|
|
// follows it. This then avoids issues with the closing
|
|
|
|
// marker consuming a newline that would normally be used
|
|
|
|
// to mark the end of an attribute definition.
|
|
|
|
// We might have either a \n sequence or an \r\n sequence
|
|
|
|
// here, so we must handle both.
|
|
|
|
nls := te-1
|
|
|
|
nle := te
|
|
|
|
te--
|
|
|
|
if data[te-1] == '\r' {
|
|
|
|
// back up one more byte
|
|
|
|
nls--
|
|
|
|
te--
|
|
|
|
}
|
2017-05-29 15:55:53 +00:00
|
|
|
token(TokenCHeredoc);
|
2018-03-08 16:22:32 +00:00
|
|
|
ts = nls
|
|
|
|
te = nle
|
|
|
|
token(TokenNewline);
|
2017-05-29 15:55:53 +00:00
|
|
|
heredocs = heredocs[:len(heredocs)-1]
|
|
|
|
fret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
topdoc.StartOfLine = true;
|
|
|
|
token(TokenStringLit);
|
|
|
|
}
|
|
|
|
|
|
|
|
action heredocLiteralMidline {
|
|
|
|
// This action is called when a heredoc literal _doesn't_ end
|
|
|
|
// with a newline character, e.g. because we're about to enter
|
|
|
|
// an interpolation sequence.
|
|
|
|
heredocs[len(heredocs)-1].StartOfLine = false;
|
|
|
|
token(TokenStringLit);
|
|
|
|
}
|
|
|
|
|
2017-05-29 16:42:20 +00:00
|
|
|
action bareTemplateLiteral {
|
|
|
|
token(TokenStringLit);
|
|
|
|
}
|
|
|
|
|
2017-05-28 22:33:01 +00:00
|
|
|
action beginTemplateInterp {
|
|
|
|
token(TokenTemplateInterp);
|
|
|
|
braces++;
|
|
|
|
retBraces = append(retBraces, braces);
|
2017-05-29 15:55:53 +00:00
|
|
|
if len(heredocs) > 0 {
|
|
|
|
heredocs[len(heredocs)-1].StartOfLine = false;
|
|
|
|
}
|
2017-05-28 22:33:01 +00:00
|
|
|
fcall main;
|
|
|
|
}
|
|
|
|
|
|
|
|
action beginTemplateControl {
|
|
|
|
token(TokenTemplateControl);
|
|
|
|
braces++;
|
|
|
|
retBraces = append(retBraces, braces);
|
2017-05-29 15:55:53 +00:00
|
|
|
if len(heredocs) > 0 {
|
|
|
|
heredocs[len(heredocs)-1].StartOfLine = false;
|
|
|
|
}
|
2017-05-28 22:33:01 +00:00
|
|
|
fcall main;
|
|
|
|
}
|
|
|
|
|
|
|
|
action openBrace {
|
|
|
|
token(TokenOBrace);
|
|
|
|
braces++;
|
|
|
|
}
|
|
|
|
|
|
|
|
action closeBrace {
|
|
|
|
if len(retBraces) > 0 && retBraces[len(retBraces)-1] == braces {
|
|
|
|
token(TokenTemplateSeqEnd);
|
|
|
|
braces--;
|
|
|
|
retBraces = retBraces[0:len(retBraces)-1]
|
|
|
|
fret;
|
|
|
|
} else {
|
|
|
|
token(TokenCBrace);
|
|
|
|
braces--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
action closeTemplateSeqEatWhitespace {
|
|
|
|
// Only consume from the retBraces stack and return if we are at
|
|
|
|
// a suitable brace nesting level, otherwise things will get
|
|
|
|
// confused. (Not entering this branch indicates a syntax error,
|
|
|
|
// which we will catch in the parser.)
|
|
|
|
if len(retBraces) > 0 && retBraces[len(retBraces)-1] == braces {
|
2017-06-01 15:36:48 +00:00
|
|
|
token(TokenTemplateSeqEnd);
|
|
|
|
braces--;
|
2017-05-28 22:33:01 +00:00
|
|
|
retBraces = retBraces[0:len(retBraces)-1]
|
|
|
|
fret;
|
2017-06-01 15:36:48 +00:00
|
|
|
} else {
|
|
|
|
// We intentionally generate a TokenTemplateSeqEnd here,
|
|
|
|
// even though the user apparently wanted a brace, because
|
|
|
|
// we want to allow the parser to catch the incorrect use
|
|
|
|
// of a ~} to balance a generic opening brace, rather than
|
|
|
|
// a template sequence.
|
|
|
|
token(TokenTemplateSeqEnd);
|
|
|
|
braces--;
|
2017-05-28 22:33:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TemplateInterp = "${" ("~")?;
|
2017-06-18 01:33:56 +00:00
|
|
|
TemplateControl = "%{" ("~")?;
|
2017-05-28 22:33:01 +00:00
|
|
|
EndStringTmpl = '"';
|
2019-05-03 21:00:54 +00:00
|
|
|
NewlineChars = ("\r"|"\n");
|
|
|
|
NewlineCharsSeq = NewlineChars+;
|
|
|
|
StringLiteralChars = (AnyUTF8 - NewlineChars);
|
|
|
|
TemplateIgnoredNonBrace = (^'{' %{ fhold; });
|
|
|
|
TemplateNotInterp = '$' (TemplateIgnoredNonBrace | TemplateInterp);
|
|
|
|
TemplateNotControl = '%' (TemplateIgnoredNonBrace | TemplateControl);
|
|
|
|
QuotedStringLiteralWithEsc = ('\\' StringLiteralChars) | (StringLiteralChars - ("$" | '%' | '"' | "\\"));
|
2017-05-28 22:33:01 +00:00
|
|
|
TemplateStringLiteral = (
|
2019-05-03 21:00:54 +00:00
|
|
|
(TemplateNotInterp) |
|
|
|
|
(TemplateNotControl) |
|
|
|
|
(QuotedStringLiteralWithEsc)+
|
|
|
|
);
|
2017-05-29 15:55:53 +00:00
|
|
|
HeredocStringLiteral = (
|
2019-05-03 21:00:54 +00:00
|
|
|
(TemplateNotInterp) |
|
|
|
|
(TemplateNotControl) |
|
|
|
|
(StringLiteralChars - ("$" | '%'))*
|
|
|
|
);
|
2017-05-29 16:42:20 +00:00
|
|
|
BareStringLiteral = (
|
2019-05-03 21:00:54 +00:00
|
|
|
(TemplateNotInterp) |
|
|
|
|
(TemplateNotControl) |
|
|
|
|
(StringLiteralChars - ("$" | '%'))*
|
|
|
|
) Newline?;
|
2017-05-28 22:33:01 +00:00
|
|
|
|
|
|
|
stringTemplate := |*
|
|
|
|
TemplateInterp => beginTemplateInterp;
|
|
|
|
TemplateControl => beginTemplateControl;
|
|
|
|
EndStringTmpl => endStringTemplate;
|
2017-05-31 02:03:25 +00:00
|
|
|
TemplateStringLiteral => { token(TokenQuotedLit); };
|
2019-05-03 21:00:54 +00:00
|
|
|
NewlineCharsSeq => { token(TokenQuotedNewline); };
|
2017-05-28 22:35:55 +00:00
|
|
|
AnyUTF8 => { token(TokenInvalid); };
|
2017-05-28 22:33:01 +00:00
|
|
|
BrokenUTF8 => { token(TokenBadUTF8); };
|
|
|
|
*|;
|
|
|
|
|
2017-05-29 15:55:53 +00:00
|
|
|
heredocTemplate := |*
|
|
|
|
TemplateInterp => beginTemplateInterp;
|
|
|
|
TemplateControl => beginTemplateControl;
|
2017-05-29 16:13:35 +00:00
|
|
|
HeredocStringLiteral EndOfLine => heredocLiteralEOL;
|
2017-05-29 15:55:53 +00:00
|
|
|
HeredocStringLiteral => heredocLiteralMidline;
|
|
|
|
BrokenUTF8 => { token(TokenBadUTF8); };
|
|
|
|
*|;
|
|
|
|
|
2017-05-29 16:42:20 +00:00
|
|
|
bareTemplate := |*
|
|
|
|
TemplateInterp => beginTemplateInterp;
|
|
|
|
TemplateControl => beginTemplateControl;
|
|
|
|
BareStringLiteral => bareTemplateLiteral;
|
|
|
|
BrokenUTF8 => { token(TokenBadUTF8); };
|
|
|
|
*|;
|
|
|
|
|
2018-02-02 16:09:40 +00:00
|
|
|
identOnly := |*
|
|
|
|
Ident => { token(TokenIdent) };
|
|
|
|
BrokenUTF8 => { token(TokenBadUTF8) };
|
|
|
|
AnyUTF8 => { token(TokenInvalid) };
|
|
|
|
*|;
|
|
|
|
|
2017-05-28 15:38:13 +00:00
|
|
|
main := |*
|
2017-05-28 15:56:43 +00:00
|
|
|
Spaces => {};
|
|
|
|
NumberLit => { token(TokenNumberLit) };
|
2017-05-28 16:16:53 +00:00
|
|
|
Ident => { token(TokenIdent) };
|
2017-05-28 16:40:58 +00:00
|
|
|
|
2017-05-29 16:13:35 +00:00
|
|
|
Comment => { token(TokenComment) };
|
2017-05-29 16:25:21 +00:00
|
|
|
Newline => { token(TokenNewline) };
|
2017-05-29 16:13:35 +00:00
|
|
|
|
2017-06-04 23:00:40 +00:00
|
|
|
EqualOp => { token(TokenEqualOp); };
|
2017-05-28 16:40:58 +00:00
|
|
|
NotEqual => { token(TokenNotEqual); };
|
|
|
|
GreaterThanEqual => { token(TokenGreaterThanEq); };
|
|
|
|
LessThanEqual => { token(TokenLessThanEq); };
|
|
|
|
LogicalAnd => { token(TokenAnd); };
|
|
|
|
LogicalOr => { token(TokenOr); };
|
2017-06-13 15:50:20 +00:00
|
|
|
Ellipsis => { token(TokenEllipsis); };
|
|
|
|
FatArrow => { token(TokenFatArrow); };
|
2017-05-28 16:34:20 +00:00
|
|
|
SelfToken => { selfToken() };
|
2017-05-28 16:40:58 +00:00
|
|
|
|
2017-05-28 22:33:01 +00:00
|
|
|
"{" => openBrace;
|
|
|
|
"}" => closeBrace;
|
|
|
|
|
|
|
|
"~}" => closeTemplateSeqEatWhitespace;
|
|
|
|
|
|
|
|
BeginStringTmpl => beginStringTemplate;
|
2017-05-29 15:55:53 +00:00
|
|
|
BeginHeredocTmpl => beginHeredocTemplate;
|
2017-05-28 22:33:01 +00:00
|
|
|
|
2017-05-28 15:56:43 +00:00
|
|
|
BrokenUTF8 => { token(TokenBadUTF8) };
|
2017-06-04 14:34:26 +00:00
|
|
|
AnyUTF8 => { token(TokenInvalid) };
|
2017-05-28 15:38:13 +00:00
|
|
|
*|;
|
2017-05-28 02:01:43 +00:00
|
|
|
|
|
|
|
}%%
|
|
|
|
|
|
|
|
// Ragel state
|
|
|
|
p := 0 // "Pointer" into data
|
|
|
|
pe := len(data) // End-of-data "pointer"
|
|
|
|
ts := 0
|
|
|
|
te := 0
|
|
|
|
act := 0
|
|
|
|
eof := pe
|
2017-05-28 22:33:01 +00:00
|
|
|
var stack []int
|
|
|
|
var top int
|
|
|
|
|
2017-05-28 22:44:22 +00:00
|
|
|
var cs int // current state
|
|
|
|
switch mode {
|
|
|
|
case scanNormal:
|
2018-01-27 18:47:57 +00:00
|
|
|
cs = hcltok_en_main
|
2017-05-28 22:44:22 +00:00
|
|
|
case scanTemplate:
|
2018-01-27 18:47:57 +00:00
|
|
|
cs = hcltok_en_bareTemplate
|
2018-02-02 16:09:40 +00:00
|
|
|
case scanIdentOnly:
|
|
|
|
cs = hcltok_en_identOnly
|
2017-05-28 22:44:22 +00:00
|
|
|
default:
|
|
|
|
panic("invalid scanMode")
|
|
|
|
}
|
|
|
|
|
2017-05-28 22:33:01 +00:00
|
|
|
braces := 0
|
|
|
|
var retBraces []int // stack of brace levels that cause us to use fret
|
2017-05-29 15:55:53 +00:00
|
|
|
var heredocs []heredocInProgress // stack of heredocs we're currently processing
|
2017-05-28 22:33:01 +00:00
|
|
|
|
|
|
|
%%{
|
|
|
|
prepush {
|
|
|
|
stack = append(stack, 0);
|
|
|
|
}
|
|
|
|
postpop {
|
|
|
|
stack = stack[:len(stack)-1];
|
|
|
|
}
|
|
|
|
}%%
|
2017-05-28 02:01:43 +00:00
|
|
|
|
|
|
|
// Make Go compiler happy
|
|
|
|
_ = ts
|
|
|
|
_ = te
|
|
|
|
_ = act
|
|
|
|
_ = eof
|
|
|
|
|
2017-05-28 15:38:13 +00:00
|
|
|
token := func (ty TokenType) {
|
|
|
|
f.emitToken(ty, ts, te)
|
|
|
|
}
|
2017-05-28 16:34:20 +00:00
|
|
|
selfToken := func () {
|
|
|
|
b := data[ts:te]
|
|
|
|
if len(b) != 1 {
|
|
|
|
// should never happen
|
|
|
|
panic("selfToken only works for single-character tokens")
|
|
|
|
}
|
|
|
|
f.emitToken(TokenType(b[0]), ts, te)
|
|
|
|
}
|
2017-05-28 15:38:13 +00:00
|
|
|
|
2017-05-28 02:01:43 +00:00
|
|
|
%%{
|
2017-05-28 22:44:22 +00:00
|
|
|
write init nocs;
|
2017-05-28 02:01:43 +00:00
|
|
|
write exec;
|
|
|
|
}%%
|
|
|
|
|
2017-05-28 14:11:24 +00:00
|
|
|
// If we fall out here without being in a final state then we've
|
|
|
|
// encountered something that the scanner can't match, which we'll
|
|
|
|
// deal with as an invalid.
|
2018-01-27 18:47:57 +00:00
|
|
|
if cs < hcltok_first_final {
|
2018-03-03 19:24:31 +00:00
|
|
|
if mode == scanTemplate && len(stack) == 0 {
|
|
|
|
// If we're scanning a bare template then any straggling
|
|
|
|
// top-level stuff is actually literal string, rather than
|
|
|
|
// invalid. This handles the case where the template ends
|
|
|
|
// with a single "$" or "%", which trips us up because we
|
|
|
|
// want to see another character to decide if it's a sequence
|
|
|
|
// or an escape.
|
|
|
|
f.emitToken(TokenStringLit, ts, len(data))
|
|
|
|
} else {
|
|
|
|
f.emitToken(TokenInvalid, ts, len(data))
|
|
|
|
}
|
2017-05-28 14:11:24 +00:00
|
|
|
}
|
|
|
|
|
2017-05-28 15:38:13 +00:00
|
|
|
// We always emit a synthetic EOF token at the end, since it gives the
|
|
|
|
// parser position information for an "unexpected EOF" diagnostic.
|
|
|
|
f.emitToken(TokenEOF, len(data), len(data))
|
|
|
|
|
2017-05-28 14:11:24 +00:00
|
|
|
return f.Tokens
|
2017-05-28 02:01:43 +00:00
|
|
|
}
|