scanner: use a better token type

2015-10-07 12:20:35 +03:00 · 2015-10-07 12:20:35 +03:00 · 8169cb79d7
commit 8169cb79d7
parent 760a028e8a
2 changed files with 139 additions and 28 deletions
--- a/scanner/scanner.go
+++ b/scanner/scanner.go
@ -8,8 +8,6 @@ import (
 	"os"
 	"unicode"
 	"unicode/utf8"
-
-	"github.com/fatih/hcl/token"
 )

 // eof represents a marker rune for the end of the reader.
@ -126,7 +124,7 @@ func (s *Scanner) peek() rune {
 }

 // Scan scans the next token and returns the token.
-func (s *Scanner) Scan() token.Token {
+func (s *Scanner) Scan() Token {
 	ch := s.next()

 	// skip white space
@ -134,7 +132,7 @@ func (s *Scanner) Scan() token.Token {
 		ch = s.next()
 	}

-	var tok token.Token
+	var tok TokenType

 	// token text markings
 	s.tokBuf.Reset()
@ -157,54 +155,59 @@ func (s *Scanner) Scan() token.Token {

 	switch {
 	case isLetter(ch):
-		tok = token.IDENT
+		tok = IDENT
 		lit := s.scanIdentifier()
 		if lit == "true" || lit == "false" {
-			tok = token.BOOL
+			tok = BOOL
 		}
 	case isDecimal(ch):
 		tok = s.scanNumber(ch)
 	default:
 		switch ch {
 		case eof:
-			tok = token.EOF
+			tok = EOF
 		case '"':
-			tok = token.STRING
+			tok = STRING
 			s.scanString()
 		case '#', '/':
-			tok = token.COMMENT
+			tok = COMMENT
 			s.scanComment(ch)
 		case '.':
-			tok = token.PERIOD
+			tok = PERIOD
 			ch = s.peek()
 			if isDecimal(ch) {
-				tok = token.FLOAT
+				tok = FLOAT
 				ch = s.scanMantissa(ch)
 				ch = s.scanExponent(ch)
 			}
 		case '[':
-			tok = token.LBRACK
+			tok = LBRACK
 		case ']':
-			tok = token.RBRACK
+			tok = RBRACK
 		case '{':
-			tok = token.LBRACE
+			tok = LBRACE
 		case '}':
-			tok = token.RBRACE
+			tok = RBRACE
 		case ',':
-			tok = token.COMMA
+			tok = COMMA
 		case '=':
-			tok = token.ASSIGN
+			tok = ASSIGN
 		case '+':
-			tok = token.ADD
+			tok = ADD
 		case '-':
-			tok = token.SUB
+			tok = SUB
 		default:
 			s.err("illegal char")
 		}
 	}

 	s.tokEnd = s.srcPos.Offset
-	return tok
+
+	return Token{
+		token: tok,
+		pos:   s.tokPos,
+		text:  s.TokenText(),
+	}
 }

 // TokenText returns the literal string corresponding to the most recently
@ -261,7 +264,7 @@ func (s *Scanner) scanComment(ch rune) {
 }

 // scanNumber scans a HCL number definition starting with the given rune
-func (s *Scanner) scanNumber(ch rune) token.Token {
+func (s *Scanner) scanNumber(ch rune) TokenType {
 	if ch == '0' {
 		// check for hexadecimal, octal or float
 		ch = s.next()
@ -282,7 +285,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token {
 				s.unread()
 			}

-			return token.NUMBER
+			return NUMBER
 		}

 		// now it's either something like: 0421(octal) or 0.1231(float)
@ -300,7 +303,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token {
 		// literals of form 01e10 are treates as Numbers in HCL, which differs from Go.
 		if ch == 'e' || ch == 'E' {
 			ch = s.scanExponent(ch)
-			return token.NUMBER
+			return NUMBER
 		}

 		if ch == '.' {
@ -310,7 +313,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token {
 				ch = s.next()
 				ch = s.scanExponent(ch)
 			}
-			return token.FLOAT
+			return FLOAT
 		}

 		if illegalOctal {
@ -320,7 +323,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token {
 		if ch != eof {
 			s.unread()
 		}
-		return token.NUMBER
+		return NUMBER
 	}

 	s.scanMantissa(ch)
@ -328,7 +331,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token {
 	// literals of form 1e10 are treates as Numbers in HCL, which differs from Go.
 	if ch == 'e' || ch == 'E' {
 		ch = s.scanExponent(ch)
-		return token.NUMBER
+		return NUMBER
 	}

 	if ch == '.' {
@ -337,11 +340,11 @@ func (s *Scanner) scanNumber(ch rune) token.Token {
 			ch = s.next()
 			ch = s.scanExponent(ch)
 		}
-		return token.FLOAT
+		return FLOAT
 	}

 	s.unread()
-	return token.NUMBER
+	return NUMBER
 }

 // scanMantissa scans the mantissa begining from the rune. It returns the next
--- a/scanner/token.go
+++ b/scanner/token.go
@ -0,0 +1,108 @@
+package scanner
+
+import "strconv"
+
+// Token defines a single HCL token which can be obtained via the Scanner
+type Token struct {
+	token TokenType
+	pos   Position
+	text  string
+}
+
+// TokenType is the set of lexical tokens of the HCL (HashiCorp Configuration Language)
+type TokenType int
+
+const (
+	// Special tokens
+	ILLEGAL TokenType = iota
+	EOF
+	COMMENT
+
+	literal_beg
+	IDENT  // literals
+	NUMBER // 12345
+	FLOAT  // 123.45
+	BOOL   // true,false
+	STRING // "abc"
+	literal_end
+
+	operator_beg
+	LBRACK // [
+	LBRACE // {
+	COMMA  // ,
+	PERIOD // .
+
+	RBRACK // ]
+	RBRACE // }
+
+	ASSIGN // =
+	ADD    // +
+	SUB    // -
+	operator_end
+)
+
+var tokens = [...]string{
+	ILLEGAL: "ILLEGAL",
+
+	EOF:     "EOF",
+	COMMENT: "COMMENT",
+
+	IDENT:  "IDENT",
+	NUMBER: "NUMBER",
+	FLOAT:  "FLOAT",
+	BOOL:   "BOOL",
+	STRING: "STRING",
+
+	LBRACK: "LBRACK",
+	LBRACE: "LBRACE",
+	COMMA:  "COMMA",
+	PERIOD: "PERIOD",
+
+	RBRACK: "RBRACK",
+	RBRACE: "RBRACE",
+
+	ASSIGN: "ASSIGN",
+	ADD:    "ADD",
+	SUB:    "SUB",
+}
+
+// String returns the string corresponding to the token tok.
+// For operators, delimiters, and keywords the string is the actual
+// token character sequence (e.g., for the token ADD, the string is
+// "+"). For all other tokens the string corresponds to the token
+// constant name (e.g. for the token IDENT, the string is "IDENT").
+func (t TokenType) String() string {
+	s := ""
+	if 0 <= t && t < TokenType(len(tokens)) {
+		s = tokens[t]
+	}
+	if s == "" {
+		s = "token(" + strconv.Itoa(int(t)) + ")"
+	}
+	return s
+}
+
+// IsLiteral returns true for tokens corresponding to identifiers and basic
+// type literals; it returns false otherwise.
+func (t TokenType) IsLiteral() bool { return literal_beg < t && t < literal_end }
+
+// IsOperator returns true for tokens corresponding to operators and
+// delimiters; it returns false otherwise.
+func (t TokenType) IsOperator() bool { return operator_beg < t && t < operator_end }
+
+// Type returns the token's type
+func (t Token) Type() TokenType {
+	return t.token
+}
+
+// Pos returns the token's position
+func (t Token) Pos() Position {
+	return t.pos
+}
+
+// Text retusn the token's literal text. Note that this is only
+// applicable for certain token types, such as token.IDENT,
+// token.STRING, etc..
+func (t Token) Text() string {
+	return t.text
+}