diff --git a/zcl/zclsyntax/token.go b/zcl/zclsyntax/token.go new file mode 100644 index 0000000..5c0e9db --- /dev/null +++ b/zcl/zclsyntax/token.go @@ -0,0 +1,119 @@ +package zclsyntax + +import ( + "github.com/apparentlymart/go-textseg/textseg" + "github.com/zclconf/go-zcl/zcl" +) + +// Token represents a sequence of bytes from some zcl code that has been +// tagged with a type and its range within the source file. +type Token struct { + Type TokenType + Bytes []byte + Range zcl.Range +} + +// TokenType is an enumeration used for the Type field on Token. +type TokenType rune + +const ( + // Single-character tokens are represented by their own character, for + // convenience in producing these within the scanner. However, the values + // are otherwise arbitrary and just intended to be mnemonic for humans + // who might see them in debug output. + + TokenOBrace TokenType = '{' + TokenCBrace TokenType = '}' + TokenOBrack TokenType = '[' + TokenCBrack TokenType = ']' + TokenOParen TokenType = '(' + TokenCParen TokenType = ')' + TokenOQuote TokenType = '«' + TokenCQuote TokenType = '»' + + TokenDot TokenType = '.' + TokenStar TokenType = '*' + TokenSlash TokenType = '/' + TokenPlus TokenType = '+' + TokenMinus TokenType = '-' + + TokenEqual TokenType = '=' + TokenNotEqual TokenType = '≠' + TokenLessThan TokenType = '<' + TokenLessThanEq TokenType = '≤' + TokenGreaterThan TokenType = '>' + TokenGreaterThanEq TokenType = '≥' + + TokenAnd TokenType = '∧' + TokenOr TokenType = '∨' + TokenBang TokenType = '!' + + TokenQuestion TokenType = '?' + TokenColon TokenType = ':' + + TokenTemplateInterp TokenType = '∫' + TokenTemplateControl TokenType = 'λ' + + TokenStringLit TokenType = 'S' + TokenHeredoc TokenType = 'H' + TokenNumberLit TokenType = 'N' + TokenIdent TokenType = 'I' + + TokenNewline TokenType = '\n' + TokenEOF TokenType = '␄' + + // The rest are not used in the language but recognized by the scanner so + // we can generate good diagnostics in the parser when users try to write + // things that might work in other languages they are familiar with, or + // simply make incorrect assumptions about the zcl language. + + TokenBitwiseAnd TokenType = '&' + TokenBitwiseOr TokenType = '|' + TokenBitwiseNot TokenType = '~' + TokenBitwiseXor TokenType = '^' + TokenStarStar TokenType = '➚' + TokenBacktick TokenType = '`' + TokenSemicolon TokenType = ';' + TokenTab TokenType = '␉' + TokenInvalid TokenType = '�' + TokenBadUTF8 TokenType = '💩' +) + +type tokenFactory struct { + Filename string + Bytes []byte + Start zcl.Pos +} + +func (f tokenFactory) makeToken(ty TokenType, startOfs int, endOfs int) (Token, []byte) { + // Walk through our buffer to figure out how much we need to adjust + // the start pos to get our end pos. + + start := f.Start + start.Byte += startOfs + start.Column += startOfs // Safe because only ASCII spaces can be in the offset + + end := start + end.Byte = f.Start.Byte + endOfs + b := f.Bytes + for len(b) > 0 { + advance, seq, _ := textseg.ScanGraphemeClusters(b, true) + if len(seq) == 1 && seq[0] == '\n' { + end.Line++ + end.Column = 1 + } else { + end.Column++ + } + b = b[advance:] + } + + return Token{ + Type: ty, + Bytes: f.Bytes[startOfs:endOfs], + Range: zcl.Range{ + Filename: f.Filename, + Start: start, + End: end, + }, + }, f.Bytes[endOfs:] +}