scanner: use a better token type

This commit is contained in:
Fatih Arslan 2015-10-07 12:20:35 +03:00
parent 760a028e8a
commit 8169cb79d7
2 changed files with 139 additions and 28 deletions

View File

@ -8,8 +8,6 @@ import (
"os"
"unicode"
"unicode/utf8"
"github.com/fatih/hcl/token"
)
// eof represents a marker rune for the end of the reader.
@ -126,7 +124,7 @@ func (s *Scanner) peek() rune {
}
// Scan scans the next token and returns the token.
func (s *Scanner) Scan() token.Token {
func (s *Scanner) Scan() Token {
ch := s.next()
// skip white space
@ -134,7 +132,7 @@ func (s *Scanner) Scan() token.Token {
ch = s.next()
}
var tok token.Token
var tok TokenType
// token text markings
s.tokBuf.Reset()
@ -157,54 +155,59 @@ func (s *Scanner) Scan() token.Token {
switch {
case isLetter(ch):
tok = token.IDENT
tok = IDENT
lit := s.scanIdentifier()
if lit == "true" || lit == "false" {
tok = token.BOOL
tok = BOOL
}
case isDecimal(ch):
tok = s.scanNumber(ch)
default:
switch ch {
case eof:
tok = token.EOF
tok = EOF
case '"':
tok = token.STRING
tok = STRING
s.scanString()
case '#', '/':
tok = token.COMMENT
tok = COMMENT
s.scanComment(ch)
case '.':
tok = token.PERIOD
tok = PERIOD
ch = s.peek()
if isDecimal(ch) {
tok = token.FLOAT
tok = FLOAT
ch = s.scanMantissa(ch)
ch = s.scanExponent(ch)
}
case '[':
tok = token.LBRACK
tok = LBRACK
case ']':
tok = token.RBRACK
tok = RBRACK
case '{':
tok = token.LBRACE
tok = LBRACE
case '}':
tok = token.RBRACE
tok = RBRACE
case ',':
tok = token.COMMA
tok = COMMA
case '=':
tok = token.ASSIGN
tok = ASSIGN
case '+':
tok = token.ADD
tok = ADD
case '-':
tok = token.SUB
tok = SUB
default:
s.err("illegal char")
}
}
s.tokEnd = s.srcPos.Offset
return tok
return Token{
token: tok,
pos: s.tokPos,
text: s.TokenText(),
}
}
// TokenText returns the literal string corresponding to the most recently
@ -261,7 +264,7 @@ func (s *Scanner) scanComment(ch rune) {
}
// scanNumber scans a HCL number definition starting with the given rune
func (s *Scanner) scanNumber(ch rune) token.Token {
func (s *Scanner) scanNumber(ch rune) TokenType {
if ch == '0' {
// check for hexadecimal, octal or float
ch = s.next()
@ -282,7 +285,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token {
s.unread()
}
return token.NUMBER
return NUMBER
}
// now it's either something like: 0421(octal) or 0.1231(float)
@ -300,7 +303,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token {
// literals of form 01e10 are treates as Numbers in HCL, which differs from Go.
if ch == 'e' || ch == 'E' {
ch = s.scanExponent(ch)
return token.NUMBER
return NUMBER
}
if ch == '.' {
@ -310,7 +313,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token {
ch = s.next()
ch = s.scanExponent(ch)
}
return token.FLOAT
return FLOAT
}
if illegalOctal {
@ -320,7 +323,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token {
if ch != eof {
s.unread()
}
return token.NUMBER
return NUMBER
}
s.scanMantissa(ch)
@ -328,7 +331,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token {
// literals of form 1e10 are treates as Numbers in HCL, which differs from Go.
if ch == 'e' || ch == 'E' {
ch = s.scanExponent(ch)
return token.NUMBER
return NUMBER
}
if ch == '.' {
@ -337,11 +340,11 @@ func (s *Scanner) scanNumber(ch rune) token.Token {
ch = s.next()
ch = s.scanExponent(ch)
}
return token.FLOAT
return FLOAT
}
s.unread()
return token.NUMBER
return NUMBER
}
// scanMantissa scans the mantissa begining from the rune. It returns the next

108
scanner/token.go Normal file
View File

@ -0,0 +1,108 @@
package scanner
import "strconv"
// Token defines a single HCL token which can be obtained via the Scanner
type Token struct {
token TokenType
pos Position
text string
}
// TokenType is the set of lexical tokens of the HCL (HashiCorp Configuration Language)
type TokenType int
const (
// Special tokens
ILLEGAL TokenType = iota
EOF
COMMENT
literal_beg
IDENT // literals
NUMBER // 12345
FLOAT // 123.45
BOOL // true,false
STRING // "abc"
literal_end
operator_beg
LBRACK // [
LBRACE // {
COMMA // ,
PERIOD // .
RBRACK // ]
RBRACE // }
ASSIGN // =
ADD // +
SUB // -
operator_end
)
var tokens = [...]string{
ILLEGAL: "ILLEGAL",
EOF: "EOF",
COMMENT: "COMMENT",
IDENT: "IDENT",
NUMBER: "NUMBER",
FLOAT: "FLOAT",
BOOL: "BOOL",
STRING: "STRING",
LBRACK: "LBRACK",
LBRACE: "LBRACE",
COMMA: "COMMA",
PERIOD: "PERIOD",
RBRACK: "RBRACK",
RBRACE: "RBRACE",
ASSIGN: "ASSIGN",
ADD: "ADD",
SUB: "SUB",
}
// String returns the string corresponding to the token tok.
// For operators, delimiters, and keywords the string is the actual
// token character sequence (e.g., for the token ADD, the string is
// "+"). For all other tokens the string corresponds to the token
// constant name (e.g. for the token IDENT, the string is "IDENT").
func (t TokenType) String() string {
s := ""
if 0 <= t && t < TokenType(len(tokens)) {
s = tokens[t]
}
if s == "" {
s = "token(" + strconv.Itoa(int(t)) + ")"
}
return s
}
// IsLiteral returns true for tokens corresponding to identifiers and basic
// type literals; it returns false otherwise.
func (t TokenType) IsLiteral() bool { return literal_beg < t && t < literal_end }
// IsOperator returns true for tokens corresponding to operators and
// delimiters; it returns false otherwise.
func (t TokenType) IsOperator() bool { return operator_beg < t && t < operator_end }
// Type returns the token's type
func (t Token) Type() TokenType {
return t.token
}
// Pos returns the token's position
func (t Token) Pos() Position {
return t.pos
}
// Text retusn the token's literal text. Note that this is only
// applicable for certain token types, such as token.IDENT,
// token.STRING, etc..
func (t Token) Text() string {
return t.text
}