2017-05-28 02:00:00 +00:00
|
|
|
|
package zclsyntax
|
|
|
|
|
|
|
|
|
|
import (
|
2017-05-28 14:38:17 +00:00
|
|
|
|
"fmt"
|
|
|
|
|
|
2017-05-28 02:00:00 +00:00
|
|
|
|
"github.com/apparentlymart/go-textseg/textseg"
|
|
|
|
|
"github.com/zclconf/go-zcl/zcl"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// Token represents a sequence of bytes from some zcl code that has been
|
|
|
|
|
// tagged with a type and its range within the source file.
|
|
|
|
|
type Token struct {
|
|
|
|
|
Type TokenType
|
|
|
|
|
Bytes []byte
|
|
|
|
|
Range zcl.Range
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-29 23:17:07 +00:00
|
|
|
|
// Tokens is a slice of Token.
|
|
|
|
|
type Tokens []Token
|
|
|
|
|
|
2017-05-28 02:00:00 +00:00
|
|
|
|
// TokenType is an enumeration used for the Type field on Token.
|
|
|
|
|
type TokenType rune
|
|
|
|
|
|
2017-05-28 14:38:17 +00:00
|
|
|
|
//go:generate stringer -type TokenType -output token_type_string.go
|
|
|
|
|
|
2017-05-28 02:00:00 +00:00
|
|
|
|
const (
|
|
|
|
|
// Single-character tokens are represented by their own character, for
|
|
|
|
|
// convenience in producing these within the scanner. However, the values
|
|
|
|
|
// are otherwise arbitrary and just intended to be mnemonic for humans
|
|
|
|
|
// who might see them in debug output.
|
|
|
|
|
|
2017-05-28 16:36:32 +00:00
|
|
|
|
TokenOBrace TokenType = '{'
|
|
|
|
|
TokenCBrace TokenType = '}'
|
|
|
|
|
TokenOBrack TokenType = '['
|
|
|
|
|
TokenCBrack TokenType = ']'
|
|
|
|
|
TokenOParen TokenType = '('
|
|
|
|
|
TokenCParen TokenType = ')'
|
|
|
|
|
TokenOQuote TokenType = '«'
|
|
|
|
|
TokenCQuote TokenType = '»'
|
|
|
|
|
TokenOHeredoc TokenType = 'H'
|
|
|
|
|
TokenCHeredoc TokenType = 'h'
|
2017-05-28 02:00:00 +00:00
|
|
|
|
|
2017-05-31 14:31:49 +00:00
|
|
|
|
TokenStar TokenType = '*'
|
|
|
|
|
TokenSlash TokenType = '/'
|
|
|
|
|
TokenPlus TokenType = '+'
|
|
|
|
|
TokenMinus TokenType = '-'
|
|
|
|
|
TokenPercent TokenType = '%'
|
2017-05-28 02:00:00 +00:00
|
|
|
|
|
|
|
|
|
TokenEqual TokenType = '='
|
|
|
|
|
TokenNotEqual TokenType = '≠'
|
|
|
|
|
TokenLessThan TokenType = '<'
|
|
|
|
|
TokenLessThanEq TokenType = '≤'
|
|
|
|
|
TokenGreaterThan TokenType = '>'
|
|
|
|
|
TokenGreaterThanEq TokenType = '≥'
|
|
|
|
|
|
|
|
|
|
TokenAnd TokenType = '∧'
|
|
|
|
|
TokenOr TokenType = '∨'
|
|
|
|
|
TokenBang TokenType = '!'
|
|
|
|
|
|
2017-06-02 14:40:42 +00:00
|
|
|
|
TokenDot TokenType = '.'
|
|
|
|
|
TokenComma TokenType = ','
|
2017-05-31 14:31:49 +00:00
|
|
|
|
|
2017-05-28 02:00:00 +00:00
|
|
|
|
TokenQuestion TokenType = '?'
|
|
|
|
|
TokenColon TokenType = ':'
|
|
|
|
|
|
|
|
|
|
TokenTemplateInterp TokenType = '∫'
|
|
|
|
|
TokenTemplateControl TokenType = 'λ'
|
2017-05-28 14:20:39 +00:00
|
|
|
|
TokenTemplateSeqEnd TokenType = '∎'
|
2017-05-28 02:00:00 +00:00
|
|
|
|
|
2017-05-31 02:03:25 +00:00
|
|
|
|
TokenQuotedLit TokenType = 'Q' // might contain backslash escapes
|
|
|
|
|
TokenStringLit TokenType = 'S' // cannot contain backslash escapes
|
2017-05-28 02:00:00 +00:00
|
|
|
|
TokenNumberLit TokenType = 'N'
|
|
|
|
|
TokenIdent TokenType = 'I'
|
|
|
|
|
|
2017-05-29 16:13:35 +00:00
|
|
|
|
TokenComment TokenType = 'C'
|
|
|
|
|
|
2017-05-28 02:00:00 +00:00
|
|
|
|
TokenNewline TokenType = '\n'
|
|
|
|
|
TokenEOF TokenType = '␄'
|
|
|
|
|
|
|
|
|
|
// The rest are not used in the language but recognized by the scanner so
|
|
|
|
|
// we can generate good diagnostics in the parser when users try to write
|
|
|
|
|
// things that might work in other languages they are familiar with, or
|
|
|
|
|
// simply make incorrect assumptions about the zcl language.
|
|
|
|
|
|
|
|
|
|
TokenBitwiseAnd TokenType = '&'
|
|
|
|
|
TokenBitwiseOr TokenType = '|'
|
|
|
|
|
TokenBitwiseNot TokenType = '~'
|
|
|
|
|
TokenBitwiseXor TokenType = '^'
|
|
|
|
|
TokenStarStar TokenType = '➚'
|
|
|
|
|
TokenBacktick TokenType = '`'
|
|
|
|
|
TokenSemicolon TokenType = ';'
|
2017-05-28 15:38:13 +00:00
|
|
|
|
TokenTabs TokenType = '␉'
|
2017-05-28 02:00:00 +00:00
|
|
|
|
TokenInvalid TokenType = '<27>'
|
|
|
|
|
TokenBadUTF8 TokenType = '💩'
|
2017-05-30 02:28:10 +00:00
|
|
|
|
|
|
|
|
|
// TokenNil is a placeholder for when a token is required but none is
|
|
|
|
|
// available, e.g. when reporting errors. The scanner will never produce
|
|
|
|
|
// this as part of a token stream.
|
|
|
|
|
TokenNil TokenType = '\x00'
|
2017-05-28 02:00:00 +00:00
|
|
|
|
)
|
|
|
|
|
|
2017-05-28 14:38:17 +00:00
|
|
|
|
func (t TokenType) GoString() string {
|
|
|
|
|
return fmt.Sprintf("zclsyntax.%s", t.String())
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-28 22:44:22 +00:00
|
|
|
|
type scanMode int
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
scanNormal scanMode = iota
|
|
|
|
|
scanTemplate
|
|
|
|
|
)
|
|
|
|
|
|
2017-05-28 14:11:24 +00:00
|
|
|
|
type tokenAccum struct {
|
2017-05-28 02:00:00 +00:00
|
|
|
|
Filename string
|
|
|
|
|
Bytes []byte
|
2017-05-28 15:38:13 +00:00
|
|
|
|
Pos zcl.Pos
|
2017-05-28 14:11:24 +00:00
|
|
|
|
Tokens []Token
|
2017-05-28 02:00:00 +00:00
|
|
|
|
}
|
|
|
|
|
|
2017-05-28 15:38:13 +00:00
|
|
|
|
func (f *tokenAccum) emitToken(ty TokenType, startOfs, endOfs int) {
|
2017-05-28 02:00:00 +00:00
|
|
|
|
// Walk through our buffer to figure out how much we need to adjust
|
|
|
|
|
// the start pos to get our end pos.
|
|
|
|
|
|
2017-05-28 15:38:13 +00:00
|
|
|
|
start := f.Pos
|
|
|
|
|
start.Column += startOfs - f.Pos.Byte // Safe because only ASCII spaces can be in the offset
|
|
|
|
|
start.Byte = startOfs
|
2017-05-28 02:00:00 +00:00
|
|
|
|
|
|
|
|
|
end := start
|
2017-05-28 15:38:13 +00:00
|
|
|
|
end.Byte = endOfs
|
|
|
|
|
b := f.Bytes[startOfs:endOfs]
|
2017-05-28 02:00:00 +00:00
|
|
|
|
for len(b) > 0 {
|
|
|
|
|
advance, seq, _ := textseg.ScanGraphemeClusters(b, true)
|
|
|
|
|
if len(seq) == 1 && seq[0] == '\n' {
|
|
|
|
|
end.Line++
|
|
|
|
|
end.Column = 1
|
|
|
|
|
} else {
|
|
|
|
|
end.Column++
|
|
|
|
|
}
|
|
|
|
|
b = b[advance:]
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-28 15:38:13 +00:00
|
|
|
|
f.Pos = end
|
|
|
|
|
|
2017-05-28 14:11:24 +00:00
|
|
|
|
f.Tokens = append(f.Tokens, Token{
|
2017-05-28 02:00:00 +00:00
|
|
|
|
Type: ty,
|
|
|
|
|
Bytes: f.Bytes[startOfs:endOfs],
|
|
|
|
|
Range: zcl.Range{
|
|
|
|
|
Filename: f.Filename,
|
|
|
|
|
Start: start,
|
|
|
|
|
End: end,
|
|
|
|
|
},
|
2017-05-28 14:11:24 +00:00
|
|
|
|
})
|
2017-05-28 02:00:00 +00:00
|
|
|
|
}
|
2017-05-29 15:55:53 +00:00
|
|
|
|
|
|
|
|
|
type heredocInProgress struct {
|
|
|
|
|
Marker []byte
|
|
|
|
|
StartOfLine bool
|
|
|
|
|
}
|
2017-06-04 14:34:26 +00:00
|
|
|
|
|
|
|
|
|
// checkInvalidTokens does a simple pass across the given tokens and generates
|
|
|
|
|
// diagnostics for tokens that should _never_ appear in ZCL source. This
|
|
|
|
|
// is intended to avoid the need for the parser to have special support
|
|
|
|
|
// for them all over.
|
|
|
|
|
//
|
|
|
|
|
// Returns a diagnostics with no errors if everything seems acceptable.
|
|
|
|
|
// Otherwise, returns zero or more error diagnostics, though tries to limit
|
|
|
|
|
// repetition of the same information.
|
|
|
|
|
func checkInvalidTokens(tokens Tokens) zcl.Diagnostics {
|
|
|
|
|
var diags zcl.Diagnostics
|
|
|
|
|
|
|
|
|
|
toldBitwise := 0
|
|
|
|
|
toldExponent := 0
|
|
|
|
|
toldBacktick := 0
|
|
|
|
|
toldSemicolon := 0
|
|
|
|
|
toldTabs := 0
|
|
|
|
|
toldBadUTF8 := 0
|
|
|
|
|
|
|
|
|
|
for _, tok := range tokens {
|
|
|
|
|
switch tok.Type {
|
|
|
|
|
case TokenBitwiseAnd, TokenBitwiseOr, TokenBitwiseXor, TokenBitwiseNot:
|
|
|
|
|
if toldBitwise < 4 {
|
|
|
|
|
var suggestion string
|
|
|
|
|
switch tok.Type {
|
|
|
|
|
case TokenBitwiseAnd:
|
|
|
|
|
suggestion = " Did you mean boolean AND (\"&&\")?"
|
|
|
|
|
case TokenBitwiseOr:
|
|
|
|
|
suggestion = " Did you mean boolean OR (\"&&\")?"
|
|
|
|
|
case TokenBitwiseNot:
|
|
|
|
|
suggestion = " Did you mean boolean NOT (\"!\")?"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
diags = append(diags, &zcl.Diagnostic{
|
|
|
|
|
Severity: zcl.DiagError,
|
|
|
|
|
Summary: "Unsupported operator",
|
|
|
|
|
Detail: fmt.Sprintf("Bitwise operators are not supported.%s", suggestion),
|
|
|
|
|
Subject: &tok.Range,
|
|
|
|
|
})
|
|
|
|
|
toldBitwise++
|
|
|
|
|
}
|
|
|
|
|
case TokenStarStar:
|
|
|
|
|
if toldExponent < 1 {
|
|
|
|
|
diags = append(diags, &zcl.Diagnostic{
|
|
|
|
|
Severity: zcl.DiagError,
|
|
|
|
|
Summary: "Unsupported operator",
|
|
|
|
|
Detail: "\"**\" is not a supported operator. Exponentiation is not supported as an operator.",
|
|
|
|
|
Subject: &tok.Range,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
toldExponent++
|
|
|
|
|
}
|
|
|
|
|
case TokenBacktick:
|
|
|
|
|
// Only report for alternating (even) backticks, so we won't report both start and ends of the same
|
|
|
|
|
// backtick-quoted string.
|
|
|
|
|
if toldExponent < 4 && (toldExponent%2) == 0 {
|
|
|
|
|
diags = append(diags, &zcl.Diagnostic{
|
|
|
|
|
Severity: zcl.DiagError,
|
|
|
|
|
Summary: "Invalid character",
|
|
|
|
|
Detail: "The \"`\" character is not valid. To create a multi-line string, use the \"heredoc\" syntax, like \"<<EOT\".",
|
|
|
|
|
Subject: &tok.Range,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
toldBacktick++
|
|
|
|
|
}
|
|
|
|
|
case TokenSemicolon:
|
|
|
|
|
if toldSemicolon < 1 {
|
|
|
|
|
diags = append(diags, &zcl.Diagnostic{
|
|
|
|
|
Severity: zcl.DiagError,
|
|
|
|
|
Summary: "Invalid character",
|
|
|
|
|
Detail: "The \";\" character is not valid. Use newlines to separate attributes and blocks, and commas to separate items in collection values.",
|
|
|
|
|
Subject: &tok.Range,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
toldSemicolon++
|
|
|
|
|
}
|
|
|
|
|
case TokenTabs:
|
|
|
|
|
if toldTabs < 1 {
|
|
|
|
|
diags = append(diags, &zcl.Diagnostic{
|
|
|
|
|
Severity: zcl.DiagError,
|
|
|
|
|
Summary: "Invalid character",
|
|
|
|
|
Detail: "Tab characters may not be used. The recommended indentation style is two spaces per indent.",
|
|
|
|
|
Subject: &tok.Range,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
toldTabs++
|
|
|
|
|
}
|
|
|
|
|
case TokenBadUTF8:
|
|
|
|
|
if toldBadUTF8 < 1 {
|
|
|
|
|
diags = append(diags, &zcl.Diagnostic{
|
|
|
|
|
Severity: zcl.DiagError,
|
|
|
|
|
Summary: "Invalid character encoding",
|
|
|
|
|
Detail: "All input files must be UTF-8 encoded. Ensure that UTF-8 encoding is selected in your editor.",
|
|
|
|
|
Subject: &tok.Range,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
toldBadUTF8++
|
|
|
|
|
}
|
|
|
|
|
case TokenInvalid:
|
|
|
|
|
diags = append(diags, &zcl.Diagnostic{
|
|
|
|
|
Severity: zcl.DiagError,
|
|
|
|
|
Summary: "Invalid character",
|
|
|
|
|
Detail: "This character is not used within the language.",
|
|
|
|
|
Subject: &tok.Range,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
toldTabs++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return diags
|
|
|
|
|
}
|