zclsyntax: define the initial set of language tokens for the scanner

2017-05-27 19:00:00 -07:00 · 2017-05-27 19:00:00 -07:00 · e65eafbe83
commit e65eafbe83
parent 6bf26fc9cc
1 changed files with 119 additions and 0 deletions
--- a/zcl/zclsyntax/token.go
+++ b/zcl/zclsyntax/token.go
@ -0,0 +1,119 @@
 package zclsyntax
 import (
 	"github.com/apparentlymart/go-textseg/textseg"
 	"github.com/zclconf/go-zcl/zcl"
 )
 // Token represents a sequence of bytes from some zcl code that has been
 // tagged with a type and its range within the source file.
 type Token struct {
 	Type  TokenType
 	Bytes []byte
 	Range zcl.Range
 }
 // TokenType is an enumeration used for the Type field on Token.
 type TokenType rune
 const (
 	// Single-character tokens are represented by their own character, for
 	// convenience in producing these within the scanner. However, the values
 	// are otherwise arbitrary and just intended to be mnemonic for humans
 	// who might see them in debug output.
 	TokenOBrace TokenType = '{'
 	TokenCBrace TokenType = '}'
 	TokenOBrack TokenType = '['
 	TokenCBrack TokenType = ']'
 	TokenOParen TokenType = '('
 	TokenCParen TokenType = ')'
 	TokenOQuote TokenType = '«'
 	TokenCQuote TokenType = '»'
 	TokenDot   TokenType = '.'
 	TokenStar  TokenType = '*'
 	TokenSlash TokenType = '/'
 	TokenPlus  TokenType = '+'
 	TokenMinus TokenType = '-'
 	TokenEqual         TokenType = '='
 	TokenNotEqual      TokenType = '≠'
 	TokenLessThan      TokenType = '<'
 	TokenLessThanEq    TokenType = '≤'
 	TokenGreaterThan   TokenType = '>'
 	TokenGreaterThanEq TokenType = '≥'
 	TokenAnd  TokenType = '∧'
 	TokenOr   TokenType = '∨'
 	TokenBang TokenType = '!'
 	TokenQuestion TokenType = '?'
 	TokenColon    TokenType = ':'
 	TokenTemplateInterp  TokenType = '∫'
 	TokenTemplateControl TokenType = 'λ'
 	TokenStringLit TokenType = 'S'
 	TokenHeredoc   TokenType = 'H'
 	TokenNumberLit TokenType = 'N'
 	TokenIdent     TokenType = 'I'
 	TokenNewline TokenType = '\n'
 	TokenEOF     TokenType = '␄'
 	// The rest are not used in the language but recognized by the scanner so
 	// we can generate good diagnostics in the parser when users try to write
 	// things that might work in other languages they are familiar with, or
 	// simply make incorrect assumptions about the zcl language.
 	TokenBitwiseAnd TokenType = '&'
 	TokenBitwiseOr  TokenType = '|'
 	TokenBitwiseNot TokenType = '~'
 	TokenBitwiseXor TokenType = '^'
 	TokenStarStar   TokenType = '➚'
 	TokenBacktick   TokenType = '`'
 	TokenSemicolon  TokenType = ';'
 	TokenTab        TokenType = '␉'
 	TokenInvalid    TokenType = '<27>'
 	TokenBadUTF8    TokenType = '💩'
 )
 type tokenFactory struct {
 	Filename string
 	Bytes    []byte
 	Start    zcl.Pos
 }
 func (f tokenFactory) makeToken(ty TokenType, startOfs int, endOfs int) (Token, []byte) {
 	// Walk through our buffer to figure out how much we need to adjust
 	// the start pos to get our end pos.
 	start := f.Start
 	start.Byte += startOfs
 	start.Column += startOfs // Safe because only ASCII spaces can be in the offset
 	end := start
 	end.Byte = f.Start.Byte + endOfs
 	b := f.Bytes
 	for len(b) > 0 {
 		advance, seq, _ := textseg.ScanGraphemeClusters(b, true)
 		if len(seq) == 1 && seq[0] == '\n' {
 			end.Line++
 			end.Column = 1
 		} else {
 			end.Column++
 		}
 		b = b[advance:]
 	}
 	return Token{
 		Type:  ty,
 		Bytes: f.Bytes[startOfs:endOfs],
 		Range: zcl.Range{
 			Filename: f.Filename,
 			Start:    start,
 			End:      end,
 		},
 	}, f.Bytes[endOfs:]
 }