From 8169cb79d7ea1ec5d6bd88556137b955c574dcb2 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Wed, 7 Oct 2015 12:20:35 +0300 Subject: [PATCH] scanner: use a better token type --- scanner/scanner.go | 59 +++++++++++++------------ scanner/token.go | 108 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+), 28 deletions(-) create mode 100644 scanner/token.go diff --git a/scanner/scanner.go b/scanner/scanner.go index 385d7bb..fd96004 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -8,8 +8,6 @@ import ( "os" "unicode" "unicode/utf8" - - "github.com/fatih/hcl/token" ) // eof represents a marker rune for the end of the reader. @@ -126,7 +124,7 @@ func (s *Scanner) peek() rune { } // Scan scans the next token and returns the token. -func (s *Scanner) Scan() token.Token { +func (s *Scanner) Scan() Token { ch := s.next() // skip white space @@ -134,7 +132,7 @@ func (s *Scanner) Scan() token.Token { ch = s.next() } - var tok token.Token + var tok TokenType // token text markings s.tokBuf.Reset() @@ -157,54 +155,59 @@ func (s *Scanner) Scan() token.Token { switch { case isLetter(ch): - tok = token.IDENT + tok = IDENT lit := s.scanIdentifier() if lit == "true" || lit == "false" { - tok = token.BOOL + tok = BOOL } case isDecimal(ch): tok = s.scanNumber(ch) default: switch ch { case eof: - tok = token.EOF + tok = EOF case '"': - tok = token.STRING + tok = STRING s.scanString() case '#', '/': - tok = token.COMMENT + tok = COMMENT s.scanComment(ch) case '.': - tok = token.PERIOD + tok = PERIOD ch = s.peek() if isDecimal(ch) { - tok = token.FLOAT + tok = FLOAT ch = s.scanMantissa(ch) ch = s.scanExponent(ch) } case '[': - tok = token.LBRACK + tok = LBRACK case ']': - tok = token.RBRACK + tok = RBRACK case '{': - tok = token.LBRACE + tok = LBRACE case '}': - tok = token.RBRACE + tok = RBRACE case ',': - tok = token.COMMA + tok = COMMA case '=': - tok = token.ASSIGN + tok = ASSIGN case '+': - tok = token.ADD + tok = ADD case '-': - tok = token.SUB + tok = SUB default: s.err("illegal char") } } s.tokEnd = s.srcPos.Offset - return tok + + return Token{ + token: tok, + pos: s.tokPos, + text: s.TokenText(), + } } // TokenText returns the literal string corresponding to the most recently @@ -261,7 +264,7 @@ func (s *Scanner) scanComment(ch rune) { } // scanNumber scans a HCL number definition starting with the given rune -func (s *Scanner) scanNumber(ch rune) token.Token { +func (s *Scanner) scanNumber(ch rune) TokenType { if ch == '0' { // check for hexadecimal, octal or float ch = s.next() @@ -282,7 +285,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token { s.unread() } - return token.NUMBER + return NUMBER } // now it's either something like: 0421(octal) or 0.1231(float) @@ -300,7 +303,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token { // literals of form 01e10 are treates as Numbers in HCL, which differs from Go. if ch == 'e' || ch == 'E' { ch = s.scanExponent(ch) - return token.NUMBER + return NUMBER } if ch == '.' { @@ -310,7 +313,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token { ch = s.next() ch = s.scanExponent(ch) } - return token.FLOAT + return FLOAT } if illegalOctal { @@ -320,7 +323,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token { if ch != eof { s.unread() } - return token.NUMBER + return NUMBER } s.scanMantissa(ch) @@ -328,7 +331,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token { // literals of form 1e10 are treates as Numbers in HCL, which differs from Go. if ch == 'e' || ch == 'E' { ch = s.scanExponent(ch) - return token.NUMBER + return NUMBER } if ch == '.' { @@ -337,11 +340,11 @@ func (s *Scanner) scanNumber(ch rune) token.Token { ch = s.next() ch = s.scanExponent(ch) } - return token.FLOAT + return FLOAT } s.unread() - return token.NUMBER + return NUMBER } // scanMantissa scans the mantissa begining from the rune. It returns the next diff --git a/scanner/token.go b/scanner/token.go new file mode 100644 index 0000000..9d31f27 --- /dev/null +++ b/scanner/token.go @@ -0,0 +1,108 @@ +package scanner + +import "strconv" + +// Token defines a single HCL token which can be obtained via the Scanner +type Token struct { + token TokenType + pos Position + text string +} + +// TokenType is the set of lexical tokens of the HCL (HashiCorp Configuration Language) +type TokenType int + +const ( + // Special tokens + ILLEGAL TokenType = iota + EOF + COMMENT + + literal_beg + IDENT // literals + NUMBER // 12345 + FLOAT // 123.45 + BOOL // true,false + STRING // "abc" + literal_end + + operator_beg + LBRACK // [ + LBRACE // { + COMMA // , + PERIOD // . + + RBRACK // ] + RBRACE // } + + ASSIGN // = + ADD // + + SUB // - + operator_end +) + +var tokens = [...]string{ + ILLEGAL: "ILLEGAL", + + EOF: "EOF", + COMMENT: "COMMENT", + + IDENT: "IDENT", + NUMBER: "NUMBER", + FLOAT: "FLOAT", + BOOL: "BOOL", + STRING: "STRING", + + LBRACK: "LBRACK", + LBRACE: "LBRACE", + COMMA: "COMMA", + PERIOD: "PERIOD", + + RBRACK: "RBRACK", + RBRACE: "RBRACE", + + ASSIGN: "ASSIGN", + ADD: "ADD", + SUB: "SUB", +} + +// String returns the string corresponding to the token tok. +// For operators, delimiters, and keywords the string is the actual +// token character sequence (e.g., for the token ADD, the string is +// "+"). For all other tokens the string corresponds to the token +// constant name (e.g. for the token IDENT, the string is "IDENT"). +func (t TokenType) String() string { + s := "" + if 0 <= t && t < TokenType(len(tokens)) { + s = tokens[t] + } + if s == "" { + s = "token(" + strconv.Itoa(int(t)) + ")" + } + return s +} + +// IsLiteral returns true for tokens corresponding to identifiers and basic +// type literals; it returns false otherwise. +func (t TokenType) IsLiteral() bool { return literal_beg < t && t < literal_end } + +// IsOperator returns true for tokens corresponding to operators and +// delimiters; it returns false otherwise. +func (t TokenType) IsOperator() bool { return operator_beg < t && t < operator_end } + +// Type returns the token's type +func (t Token) Type() TokenType { + return t.token +} + +// Pos returns the token's position +func (t Token) Pos() Position { + return t.pos +} + +// Text retusn the token's literal text. Note that this is only +// applicable for certain token types, such as token.IDENT, +// token.STRING, etc.. +func (t Token) Text() string { + return t.text +}