zclsyntax: define the initial set of language tokens for the scanner
This commit is contained in:
parent
6bf26fc9cc
commit
e65eafbe83
119
zcl/zclsyntax/token.go
Normal file
119
zcl/zclsyntax/token.go
Normal file
@ -0,0 +1,119 @@
|
||||
package zclsyntax
|
||||
|
||||
import (
|
||||
"github.com/apparentlymart/go-textseg/textseg"
|
||||
"github.com/zclconf/go-zcl/zcl"
|
||||
)
|
||||
|
||||
// Token represents a sequence of bytes from some zcl code that has been
|
||||
// tagged with a type and its range within the source file.
|
||||
type Token struct {
|
||||
Type TokenType
|
||||
Bytes []byte
|
||||
Range zcl.Range
|
||||
}
|
||||
|
||||
// TokenType is an enumeration used for the Type field on Token.
|
||||
type TokenType rune
|
||||
|
||||
const (
|
||||
// Single-character tokens are represented by their own character, for
|
||||
// convenience in producing these within the scanner. However, the values
|
||||
// are otherwise arbitrary and just intended to be mnemonic for humans
|
||||
// who might see them in debug output.
|
||||
|
||||
TokenOBrace TokenType = '{'
|
||||
TokenCBrace TokenType = '}'
|
||||
TokenOBrack TokenType = '['
|
||||
TokenCBrack TokenType = ']'
|
||||
TokenOParen TokenType = '('
|
||||
TokenCParen TokenType = ')'
|
||||
TokenOQuote TokenType = '«'
|
||||
TokenCQuote TokenType = '»'
|
||||
|
||||
TokenDot TokenType = '.'
|
||||
TokenStar TokenType = '*'
|
||||
TokenSlash TokenType = '/'
|
||||
TokenPlus TokenType = '+'
|
||||
TokenMinus TokenType = '-'
|
||||
|
||||
TokenEqual TokenType = '='
|
||||
TokenNotEqual TokenType = '≠'
|
||||
TokenLessThan TokenType = '<'
|
||||
TokenLessThanEq TokenType = '≤'
|
||||
TokenGreaterThan TokenType = '>'
|
||||
TokenGreaterThanEq TokenType = '≥'
|
||||
|
||||
TokenAnd TokenType = '∧'
|
||||
TokenOr TokenType = '∨'
|
||||
TokenBang TokenType = '!'
|
||||
|
||||
TokenQuestion TokenType = '?'
|
||||
TokenColon TokenType = ':'
|
||||
|
||||
TokenTemplateInterp TokenType = '∫'
|
||||
TokenTemplateControl TokenType = 'λ'
|
||||
|
||||
TokenStringLit TokenType = 'S'
|
||||
TokenHeredoc TokenType = 'H'
|
||||
TokenNumberLit TokenType = 'N'
|
||||
TokenIdent TokenType = 'I'
|
||||
|
||||
TokenNewline TokenType = '\n'
|
||||
TokenEOF TokenType = '␄'
|
||||
|
||||
// The rest are not used in the language but recognized by the scanner so
|
||||
// we can generate good diagnostics in the parser when users try to write
|
||||
// things that might work in other languages they are familiar with, or
|
||||
// simply make incorrect assumptions about the zcl language.
|
||||
|
||||
TokenBitwiseAnd TokenType = '&'
|
||||
TokenBitwiseOr TokenType = '|'
|
||||
TokenBitwiseNot TokenType = '~'
|
||||
TokenBitwiseXor TokenType = '^'
|
||||
TokenStarStar TokenType = '➚'
|
||||
TokenBacktick TokenType = '`'
|
||||
TokenSemicolon TokenType = ';'
|
||||
TokenTab TokenType = '␉'
|
||||
TokenInvalid TokenType = '<27>'
|
||||
TokenBadUTF8 TokenType = '💩'
|
||||
)
|
||||
|
||||
type tokenFactory struct {
|
||||
Filename string
|
||||
Bytes []byte
|
||||
Start zcl.Pos
|
||||
}
|
||||
|
||||
func (f tokenFactory) makeToken(ty TokenType, startOfs int, endOfs int) (Token, []byte) {
|
||||
// Walk through our buffer to figure out how much we need to adjust
|
||||
// the start pos to get our end pos.
|
||||
|
||||
start := f.Start
|
||||
start.Byte += startOfs
|
||||
start.Column += startOfs // Safe because only ASCII spaces can be in the offset
|
||||
|
||||
end := start
|
||||
end.Byte = f.Start.Byte + endOfs
|
||||
b := f.Bytes
|
||||
for len(b) > 0 {
|
||||
advance, seq, _ := textseg.ScanGraphemeClusters(b, true)
|
||||
if len(seq) == 1 && seq[0] == '\n' {
|
||||
end.Line++
|
||||
end.Column = 1
|
||||
} else {
|
||||
end.Column++
|
||||
}
|
||||
b = b[advance:]
|
||||
}
|
||||
|
||||
return Token{
|
||||
Type: ty,
|
||||
Bytes: f.Bytes[startOfs:endOfs],
|
||||
Range: zcl.Range{
|
||||
Filename: f.Filename,
|
||||
Start: start,
|
||||
End: end,
|
||||
},
|
||||
}, f.Bytes[endOfs:]
|
||||
}
|
Loading…
Reference in New Issue
Block a user