zclsyntax: define the initial set of language tokens for the scanner

This commit is contained in:
Martin Atkins 2017-05-27 19:00:00 -07:00
parent 6bf26fc9cc
commit e65eafbe83

119
zcl/zclsyntax/token.go Normal file
View File

@ -0,0 +1,119 @@
package zclsyntax
import (
"github.com/apparentlymart/go-textseg/textseg"
"github.com/zclconf/go-zcl/zcl"
)
// Token represents a sequence of bytes from some zcl code that has been
// tagged with a type and its range within the source file.
type Token struct {
Type TokenType
Bytes []byte
Range zcl.Range
}
// TokenType is an enumeration used for the Type field on Token.
type TokenType rune
const (
// Single-character tokens are represented by their own character, for
// convenience in producing these within the scanner. However, the values
// are otherwise arbitrary and just intended to be mnemonic for humans
// who might see them in debug output.
TokenOBrace TokenType = '{'
TokenCBrace TokenType = '}'
TokenOBrack TokenType = '['
TokenCBrack TokenType = ']'
TokenOParen TokenType = '('
TokenCParen TokenType = ')'
TokenOQuote TokenType = '«'
TokenCQuote TokenType = '»'
TokenDot TokenType = '.'
TokenStar TokenType = '*'
TokenSlash TokenType = '/'
TokenPlus TokenType = '+'
TokenMinus TokenType = '-'
TokenEqual TokenType = '='
TokenNotEqual TokenType = '≠'
TokenLessThan TokenType = '<'
TokenLessThanEq TokenType = '≤'
TokenGreaterThan TokenType = '>'
TokenGreaterThanEq TokenType = '≥'
TokenAnd TokenType = '∧'
TokenOr TokenType = ''
TokenBang TokenType = '!'
TokenQuestion TokenType = '?'
TokenColon TokenType = ':'
TokenTemplateInterp TokenType = '∫'
TokenTemplateControl TokenType = 'λ'
TokenStringLit TokenType = 'S'
TokenHeredoc TokenType = 'H'
TokenNumberLit TokenType = 'N'
TokenIdent TokenType = 'I'
TokenNewline TokenType = '\n'
TokenEOF TokenType = '␄'
// The rest are not used in the language but recognized by the scanner so
// we can generate good diagnostics in the parser when users try to write
// things that might work in other languages they are familiar with, or
// simply make incorrect assumptions about the zcl language.
TokenBitwiseAnd TokenType = '&'
TokenBitwiseOr TokenType = '|'
TokenBitwiseNot TokenType = '~'
TokenBitwiseXor TokenType = '^'
TokenStarStar TokenType = '➚'
TokenBacktick TokenType = '`'
TokenSemicolon TokenType = ';'
TokenTab TokenType = '␉'
TokenInvalid TokenType = '<27>'
TokenBadUTF8 TokenType = '💩'
)
type tokenFactory struct {
Filename string
Bytes []byte
Start zcl.Pos
}
func (f tokenFactory) makeToken(ty TokenType, startOfs int, endOfs int) (Token, []byte) {
// Walk through our buffer to figure out how much we need to adjust
// the start pos to get our end pos.
start := f.Start
start.Byte += startOfs
start.Column += startOfs // Safe because only ASCII spaces can be in the offset
end := start
end.Byte = f.Start.Byte + endOfs
b := f.Bytes
for len(b) > 0 {
advance, seq, _ := textseg.ScanGraphemeClusters(b, true)
if len(seq) == 1 && seq[0] == '\n' {
end.Line++
end.Column = 1
} else {
end.Column++
}
b = b[advance:]
}
return Token{
Type: ty,
Bytes: f.Bytes[startOfs:endOfs],
Range: zcl.Range{
Filename: f.Filename,
Start: start,
End: end,
},
}, f.Bytes[endOfs:]
}