json: scanner and token

This commit is contained in:
Mitchell Hashimoto 2015-11-07 18:14:32 -08:00
parent 338eebdbf7
commit 87a91d1019
5 changed files with 1102 additions and 0 deletions

449
json/scanner/scanner.go Normal file
View File

@ -0,0 +1,449 @@
package scanner
import (
"bytes"
"fmt"
"os"
"unicode"
"unicode/utf8"
"github.com/hashicorp/hcl/json/token"
)
// eof represents a marker rune for the end of the reader.
const eof = rune(0)
// Scanner defines a lexical scanner
type Scanner struct {
buf *bytes.Buffer // Source buffer for advancing and scanning
src []byte // Source buffer for immutable access
// Source Position
srcPos token.Pos // current position
prevPos token.Pos // previous position, used for peek() method
lastCharLen int // length of last character in bytes
lastLineLen int // length of last line in characters (for correct column reporting)
tokStart int // token text start position
tokEnd int // token text end position
// Error is called for each error encountered. If no Error
// function is set, the error is reported to os.Stderr.
Error func(pos token.Pos, msg string)
// ErrorCount is incremented by one for each error encountered.
ErrorCount int
// tokPos is the start position of most recently scanned token; set by
// Scan. The Filename field is always left untouched by the Scanner. If
// an error is reported (via Error) and Position is invalid, the scanner is
// not inside a token.
tokPos token.Pos
}
// New creates and initializes a new instance of Scanner using src as
// its source content.
func New(src []byte) *Scanner {
// even though we accept a src, we read from a io.Reader compatible type
// (*bytes.Buffer). So in the future we might easily change it to streaming
// read.
b := bytes.NewBuffer(src)
s := &Scanner{
buf: b,
src: src,
}
// srcPosition always starts with 1
s.srcPos.Line = 1
return s
}
// next reads the next rune from the bufferred reader. Returns the rune(0) if
// an error occurs (or io.EOF is returned).
func (s *Scanner) next() rune {
ch, size, err := s.buf.ReadRune()
if err != nil {
// advance for error reporting
s.srcPos.Column++
s.srcPos.Offset += size
s.lastCharLen = size
return eof
}
if ch == utf8.RuneError && size == 1 {
s.srcPos.Column++
s.srcPos.Offset += size
s.lastCharLen = size
s.err("illegal UTF-8 encoding")
return ch
}
// remember last position
s.prevPos = s.srcPos
s.srcPos.Column++
s.lastCharLen = size
s.srcPos.Offset += size
if ch == '\n' {
s.srcPos.Line++
s.lastLineLen = s.srcPos.Column
s.srcPos.Column = 0
}
// debug
// fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
return ch
}
// unread unreads the previous read Rune and updates the source position
func (s *Scanner) unread() {
if err := s.buf.UnreadRune(); err != nil {
panic(err) // this is user fault, we should catch it
}
s.srcPos = s.prevPos // put back last position
}
// peek returns the next rune without advancing the reader.
func (s *Scanner) peek() rune {
peek, _, err := s.buf.ReadRune()
if err != nil {
return eof
}
s.buf.UnreadRune()
return peek
}
// Scan scans the next token and returns the token.
func (s *Scanner) Scan() token.Token {
ch := s.next()
// skip white space
for isWhitespace(ch) {
ch = s.next()
}
var tok token.Type
// token text markings
s.tokStart = s.srcPos.Offset - s.lastCharLen
// token position, initial next() is moving the offset by one(size of rune
// actually), though we are interested with the starting point
s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen
if s.srcPos.Column > 0 {
// common case: last character was not a '\n'
s.tokPos.Line = s.srcPos.Line
s.tokPos.Column = s.srcPos.Column
} else {
// last character was a '\n'
// (we cannot be at the beginning of the source
// since we have called next() at least once)
s.tokPos.Line = s.srcPos.Line - 1
s.tokPos.Column = s.lastLineLen
}
switch {
case isLetter(ch):
lit := s.scanIdentifier()
if lit == "true" || lit == "false" {
tok = token.BOOL
} else if lit == "null" {
tok = token.NULL
} else {
s.err("illegal char")
}
case isDecimal(ch):
tok = s.scanNumber(ch)
default:
switch ch {
case eof:
tok = token.EOF
case '"':
tok = token.STRING
s.scanString()
case '.':
tok = token.PERIOD
ch = s.peek()
if isDecimal(ch) {
tok = token.FLOAT
ch = s.scanMantissa(ch)
ch = s.scanExponent(ch)
}
case '[':
tok = token.LBRACK
case ']':
tok = token.RBRACK
case '{':
tok = token.LBRACE
case '}':
tok = token.RBRACE
case ',':
tok = token.COMMA
case '-':
if isDecimal(s.peek()) {
ch := s.next()
tok = s.scanNumber(ch)
} else {
s.err("illegal char")
}
default:
s.err("illegal char")
}
}
// finish token ending
s.tokEnd = s.srcPos.Offset
// create token literal
var tokenText string
if s.tokStart >= 0 {
tokenText = string(s.src[s.tokStart:s.tokEnd])
}
s.tokStart = s.tokEnd // ensure idempotency of tokenText() call
return token.Token{
Type: tok,
Pos: s.tokPos,
Text: tokenText,
}
}
// scanNumber scans a HCL number definition starting with the given rune
func (s *Scanner) scanNumber(ch rune) token.Type {
zero := ch == '0'
pos := s.srcPos
s.scanMantissa(ch)
ch = s.next() // seek forward
if ch == 'e' || ch == 'E' {
ch = s.scanExponent(ch)
return token.FLOAT
}
if ch == '.' {
ch = s.scanFraction(ch)
if ch == 'e' || ch == 'E' {
ch = s.next()
ch = s.scanExponent(ch)
}
return token.FLOAT
}
if ch != eof {
s.unread()
}
// If we have a larger number and this is zero, error
if zero && pos != s.srcPos {
s.err("numbers cannot start with 0")
}
return token.NUMBER
}
// scanMantissa scans the mantissa begining from the rune. It returns the next
// non decimal rune. It's used to determine wheter it's a fraction or exponent.
func (s *Scanner) scanMantissa(ch rune) rune {
scanned := false
for isDecimal(ch) {
ch = s.next()
scanned = true
}
if scanned && ch != eof {
s.unread()
}
return ch
}
// scanFraction scans the fraction after the '.' rune
func (s *Scanner) scanFraction(ch rune) rune {
if ch == '.' {
ch = s.peek() // we peek just to see if we can move forward
ch = s.scanMantissa(ch)
}
return ch
}
// scanExponent scans the remaining parts of an exponent after the 'e' or 'E'
// rune.
func (s *Scanner) scanExponent(ch rune) rune {
if ch == 'e' || ch == 'E' {
ch = s.next()
if ch == '-' || ch == '+' {
ch = s.next()
}
ch = s.scanMantissa(ch)
}
return ch
}
// scanString scans a quoted string
func (s *Scanner) scanString() {
braces := 0
for {
// '"' opening already consumed
// read character after quote
ch := s.next()
if ch == '\n' || ch < 0 || ch == eof {
s.err("literal not terminated")
return
}
if ch == '"' && braces == 0 {
break
}
// If we're going into a ${} then we can ignore quotes for awhile
if braces == 0 && ch == '$' && s.peek() == '{' {
braces++
s.next()
} else if braces > 0 && ch == '{' {
braces++
}
if braces > 0 && ch == '}' {
braces--
}
if ch == '\\' {
s.scanEscape()
}
}
return
}
// scanEscape scans an escape sequence
func (s *Scanner) scanEscape() rune {
// http://en.cppreference.com/w/cpp/language/escape
ch := s.next() // read character after '/'
switch ch {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
// nothing to do
case '0', '1', '2', '3', '4', '5', '6', '7':
// octal notation
ch = s.scanDigits(ch, 8, 3)
case 'x':
// hexademical notation
ch = s.scanDigits(s.next(), 16, 2)
case 'u':
// universal character name
ch = s.scanDigits(s.next(), 16, 4)
case 'U':
// universal character name
ch = s.scanDigits(s.next(), 16, 8)
default:
s.err("illegal char escape")
}
return ch
}
// scanDigits scans a rune with the given base for n times. For example an
// octal notation \184 would yield in scanDigits(ch, 8, 3)
func (s *Scanner) scanDigits(ch rune, base, n int) rune {
for n > 0 && digitVal(ch) < base {
ch = s.next()
n--
}
if n > 0 {
s.err("illegal char escape")
}
// we scanned all digits, put the last non digit char back
s.unread()
return ch
}
// scanIdentifier scans an identifier and returns the literal string
func (s *Scanner) scanIdentifier() string {
offs := s.srcPos.Offset - s.lastCharLen
ch := s.next()
for isLetter(ch) || isDigit(ch) || ch == '-' {
ch = s.next()
}
if ch != eof {
s.unread() // we got identifier, put back latest char
}
return string(s.src[offs:s.srcPos.Offset])
}
// recentPosition returns the position of the character immediately after the
// character or token returned by the last call to Scan.
func (s *Scanner) recentPosition() (pos token.Pos) {
pos.Offset = s.srcPos.Offset - s.lastCharLen
switch {
case s.srcPos.Column > 0:
// common case: last character was not a '\n'
pos.Line = s.srcPos.Line
pos.Column = s.srcPos.Column
case s.lastLineLen > 0:
// last character was a '\n'
// (we cannot be at the beginning of the source
// since we have called next() at least once)
pos.Line = s.srcPos.Line - 1
pos.Column = s.lastLineLen
default:
// at the beginning of the source
pos.Line = 1
pos.Column = 1
}
return
}
// err prints the error of any scanning to s.Error function. If the function is
// not defined, by default it prints them to os.Stderr
func (s *Scanner) err(msg string) {
s.ErrorCount++
pos := s.recentPosition()
if s.Error != nil {
s.Error(pos, msg)
return
}
fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
}
// isHexadecimal returns true if the given rune is a letter
func isLetter(ch rune) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
}
// isHexadecimal returns true if the given rune is a decimal digit
func isDigit(ch rune) bool {
return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
}
// isHexadecimal returns true if the given rune is a decimal number
func isDecimal(ch rune) bool {
return '0' <= ch && ch <= '9'
}
// isHexadecimal returns true if the given rune is an hexadecimal number
func isHexadecimal(ch rune) bool {
return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
}
// isWhitespace returns true if the rune is a space, tab, newline or carriage return
func isWhitespace(ch rune) bool {
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
}
// digitVal returns the integer value of a given octal,decimal or hexadecimal rune
func digitVal(ch rune) int {
switch {
case '0' <= ch && ch <= '9':
return int(ch - '0')
case 'a' <= ch && ch <= 'f':
return int(ch - 'a' + 10)
case 'A' <= ch && ch <= 'F':
return int(ch - 'A' + 10)
}
return 16 // larger than any legal digit val
}

View File

@ -0,0 +1,411 @@
package scanner
import (
"bytes"
"fmt"
"testing"
"github.com/hashicorp/hcl/json/token"
)
var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
type tokenPair struct {
tok token.Type
text string
}
var tokenLists = map[string][]tokenPair{
"operator": []tokenPair{
{token.LBRACK, "["},
{token.LBRACE, "{"},
{token.COMMA, ","},
{token.PERIOD, "."},
{token.RBRACK, "]"},
{token.RBRACE, "}"},
},
"bool": []tokenPair{
{token.BOOL, "true"},
{token.BOOL, "false"},
},
"string": []tokenPair{
{token.STRING, `" "`},
{token.STRING, `"a"`},
{token.STRING, `"本"`},
{token.STRING, `"${file("foo")}"`},
{token.STRING, `"\a"`},
{token.STRING, `"\b"`},
{token.STRING, `"\f"`},
{token.STRING, `"\n"`},
{token.STRING, `"\r"`},
{token.STRING, `"\t"`},
{token.STRING, `"\v"`},
{token.STRING, `"\""`},
{token.STRING, `"\000"`},
{token.STRING, `"\777"`},
{token.STRING, `"\x00"`},
{token.STRING, `"\xff"`},
{token.STRING, `"\u0000"`},
{token.STRING, `"\ufA16"`},
{token.STRING, `"\U00000000"`},
{token.STRING, `"\U0000ffAB"`},
{token.STRING, `"` + f100 + `"`},
},
"number": []tokenPair{
{token.NUMBER, "0"},
{token.NUMBER, "1"},
{token.NUMBER, "9"},
{token.NUMBER, "42"},
{token.NUMBER, "1234567890"},
{token.NUMBER, "-0"},
{token.NUMBER, "-1"},
{token.NUMBER, "-9"},
{token.NUMBER, "-42"},
{token.NUMBER, "-1234567890"},
},
"float": []tokenPair{
{token.FLOAT, "0."},
{token.FLOAT, "1."},
{token.FLOAT, "42."},
{token.FLOAT, "01234567890."},
{token.FLOAT, ".0"},
{token.FLOAT, ".1"},
{token.FLOAT, ".42"},
{token.FLOAT, ".0123456789"},
{token.FLOAT, "0.0"},
{token.FLOAT, "1.0"},
{token.FLOAT, "42.0"},
{token.FLOAT, "01234567890.0"},
{token.FLOAT, "0e0"},
{token.FLOAT, "1e0"},
{token.FLOAT, "42e0"},
{token.FLOAT, "01234567890e0"},
{token.FLOAT, "0E0"},
{token.FLOAT, "1E0"},
{token.FLOAT, "42E0"},
{token.FLOAT, "01234567890E0"},
{token.FLOAT, "0e+10"},
{token.FLOAT, "1e-10"},
{token.FLOAT, "42e+10"},
{token.FLOAT, "01234567890e-10"},
{token.FLOAT, "0E+10"},
{token.FLOAT, "1E-10"},
{token.FLOAT, "42E+10"},
{token.FLOAT, "01234567890E-10"},
{token.FLOAT, "01.8e0"},
{token.FLOAT, "1.4e0"},
{token.FLOAT, "42.2e0"},
{token.FLOAT, "01234567890.12e0"},
{token.FLOAT, "0.E0"},
{token.FLOAT, "1.12E0"},
{token.FLOAT, "42.123E0"},
{token.FLOAT, "01234567890.213E0"},
{token.FLOAT, "0.2e+10"},
{token.FLOAT, "1.2e-10"},
{token.FLOAT, "42.54e+10"},
{token.FLOAT, "01234567890.98e-10"},
{token.FLOAT, "0.1E+10"},
{token.FLOAT, "1.1E-10"},
{token.FLOAT, "42.1E+10"},
{token.FLOAT, "01234567890.1E-10"},
{token.FLOAT, "-0.0"},
{token.FLOAT, "-1.0"},
{token.FLOAT, "-42.0"},
{token.FLOAT, "-01234567890.0"},
{token.FLOAT, "-0e0"},
{token.FLOAT, "-1e0"},
{token.FLOAT, "-42e0"},
{token.FLOAT, "-01234567890e0"},
{token.FLOAT, "-0E0"},
{token.FLOAT, "-1E0"},
{token.FLOAT, "-42E0"},
{token.FLOAT, "-01234567890E0"},
{token.FLOAT, "-0e+10"},
{token.FLOAT, "-1e-10"},
{token.FLOAT, "-42e+10"},
{token.FLOAT, "-01234567890e-10"},
{token.FLOAT, "-0E+10"},
{token.FLOAT, "-1E-10"},
{token.FLOAT, "-42E+10"},
{token.FLOAT, "-01234567890E-10"},
{token.FLOAT, "-01.8e0"},
{token.FLOAT, "-1.4e0"},
{token.FLOAT, "-42.2e0"},
{token.FLOAT, "-01234567890.12e0"},
{token.FLOAT, "-0.E0"},
{token.FLOAT, "-1.12E0"},
{token.FLOAT, "-42.123E0"},
{token.FLOAT, "-01234567890.213E0"},
{token.FLOAT, "-0.2e+10"},
{token.FLOAT, "-1.2e-10"},
{token.FLOAT, "-42.54e+10"},
{token.FLOAT, "-01234567890.98e-10"},
{token.FLOAT, "-0.1E+10"},
{token.FLOAT, "-1.1E-10"},
{token.FLOAT, "-42.1E+10"},
{token.FLOAT, "-01234567890.1E-10"},
},
}
var orderedTokenLists = []string{
"comment",
"operator",
"bool",
"string",
"number",
"float",
}
func TestPosition(t *testing.T) {
// create artifical source code
buf := new(bytes.Buffer)
for _, listName := range orderedTokenLists {
for _, ident := range tokenLists[listName] {
fmt.Fprintf(buf, "\t\t\t\t%s\n", ident.text)
}
}
s := New(buf.Bytes())
pos := token.Pos{"", 4, 1, 5}
s.Scan()
for _, listName := range orderedTokenLists {
for _, k := range tokenLists[listName] {
curPos := s.tokPos
// fmt.Printf("[%q] s = %+v:%+v\n", k.text, curPos.Offset, curPos.Column)
if curPos.Offset != pos.Offset {
t.Fatalf("offset = %d, want %d for %q", curPos.Offset, pos.Offset, k.text)
}
if curPos.Line != pos.Line {
t.Fatalf("line = %d, want %d for %q", curPos.Line, pos.Line, k.text)
}
if curPos.Column != pos.Column {
t.Fatalf("column = %d, want %d for %q", curPos.Column, pos.Column, k.text)
}
pos.Offset += 4 + len(k.text) + 1 // 4 tabs + token bytes + newline
pos.Line += countNewlines(k.text) + 1 // each token is on a new line
s.Error = func(pos token.Pos, msg string) {
t.Errorf("error %q for %q", msg, k.text)
}
s.Scan()
}
}
// make sure there were no token-internal errors reported by scanner
if s.ErrorCount != 0 {
t.Errorf("%d errors", s.ErrorCount)
}
}
func TestComment(t *testing.T) {
testTokenList(t, tokenLists["comment"])
}
func TestOperator(t *testing.T) {
testTokenList(t, tokenLists["operator"])
}
func TestBool(t *testing.T) {
testTokenList(t, tokenLists["bool"])
}
func TestIdent(t *testing.T) {
testTokenList(t, tokenLists["ident"])
}
func TestString(t *testing.T) {
testTokenList(t, tokenLists["string"])
}
func TestNumber(t *testing.T) {
testTokenList(t, tokenLists["number"])
}
func TestFloat(t *testing.T) {
testTokenList(t, tokenLists["float"])
}
/*
func TestRealExample(t *testing.T) {
complexHCL := `// This comes from Terraform, as a test
variable "foo" {
default = "bar"
description = "bar"
}
provider "aws" {
access_key = "foo"
secret_key = "bar"
}
resource "aws_security_group" "firewall" {
count = 5
}
resource aws_instance "web" {
ami = "${var.foo}"
security_groups = [
"foo",
"${aws_security_group.firewall.foo}"
]
network_interface {
device_index = 0
description = "Main network interface"
}
}`
literals := []struct {
tokenType token.Type
literal string
}{
{token.COMMENT, `// This comes from Terraform, as a test`},
{token.IDENT, `variable`},
{token.STRING, `"foo"`},
{token.LBRACE, `{`},
{token.IDENT, `default`},
{token.ASSIGN, `=`},
{token.STRING, `"bar"`},
{token.IDENT, `description`},
{token.ASSIGN, `=`},
{token.STRING, `"bar"`},
{token.RBRACE, `}`},
{token.IDENT, `provider`},
{token.STRING, `"aws"`},
{token.LBRACE, `{`},
{token.IDENT, `access_key`},
{token.ASSIGN, `=`},
{token.STRING, `"foo"`},
{token.IDENT, `secret_key`},
{token.ASSIGN, `=`},
{token.STRING, `"bar"`},
{token.RBRACE, `}`},
{token.IDENT, `resource`},
{token.STRING, `"aws_security_group"`},
{token.STRING, `"firewall"`},
{token.LBRACE, `{`},
{token.IDENT, `count`},
{token.ASSIGN, `=`},
{token.NUMBER, `5`},
{token.RBRACE, `}`},
{token.IDENT, `resource`},
{token.IDENT, `aws_instance`},
{token.STRING, `"web"`},
{token.LBRACE, `{`},
{token.IDENT, `ami`},
{token.ASSIGN, `=`},
{token.STRING, `"${var.foo}"`},
{token.IDENT, `security_groups`},
{token.ASSIGN, `=`},
{token.LBRACK, `[`},
{token.STRING, `"foo"`},
{token.COMMA, `,`},
{token.STRING, `"${aws_security_group.firewall.foo}"`},
{token.RBRACK, `]`},
{token.IDENT, `network_interface`},
{token.LBRACE, `{`},
{token.IDENT, `device_index`},
{token.ASSIGN, `=`},
{token.NUMBER, `0`},
{token.IDENT, `description`},
{token.ASSIGN, `=`},
{token.STRING, `"Main network interface"`},
{token.RBRACE, `}`},
{token.RBRACE, `}`},
{token.EOF, ``},
}
s := New([]byte(complexHCL))
for _, l := range literals {
tok := s.Scan()
if l.tokenType != tok.Type {
t.Errorf("got: %s want %s for %s\n", tok, l.tokenType, tok.String())
}
if l.literal != tok.Text {
t.Errorf("got: %s want %s\n", tok, l.literal)
}
}
}
*/
func TestError(t *testing.T) {
testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING)
testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING)
testError(t, `01238`, "1:7", "numbers cannot start with 0", token.NUMBER)
testError(t, `01238123`, "1:10", "numbers cannot start with 0", token.NUMBER)
testError(t, `'aa'`, "1:1", "illegal char", token.ILLEGAL)
testError(t, `"`, "1:2", "literal not terminated", token.STRING)
testError(t, `"abc`, "1:5", "literal not terminated", token.STRING)
testError(t, `"abc`+"\n", "1:5", "literal not terminated", token.STRING)
}
func testError(t *testing.T, src, pos, msg string, tok token.Type) {
s := New([]byte(src))
errorCalled := false
s.Error = func(p token.Pos, m string) {
if !errorCalled {
if pos != p.String() {
t.Errorf("pos = %q, want %q for %q", p, pos, src)
}
if m != msg {
t.Errorf("msg = %q, want %q for %q", m, msg, src)
}
errorCalled = true
}
}
tk := s.Scan()
if tk.Type != tok {
t.Errorf("tok = %s, want %s for %q", tk, tok, src)
}
if !errorCalled {
t.Errorf("error handler not called for %q", src)
}
if s.ErrorCount == 0 {
t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src)
}
}
func testTokenList(t *testing.T, tokenList []tokenPair) {
// create artifical source code
buf := new(bytes.Buffer)
for _, ident := range tokenList {
fmt.Fprintf(buf, "%s\n", ident.text)
}
s := New(buf.Bytes())
for _, ident := range tokenList {
tok := s.Scan()
if tok.Type != ident.tok {
t.Errorf("tok = %q want %q for %q\n", tok, ident.tok, ident.text)
}
if tok.Text != ident.text {
t.Errorf("text = %q want %q", tok.String(), ident.text)
}
}
}
func countNewlines(s string) int {
n := 0
for _, ch := range s {
if ch == '\n' {
n++
}
}
return n
}

46
json/token/position.go Normal file
View File

@ -0,0 +1,46 @@
package token
import "fmt"
// Pos describes an arbitrary source position
// including the file, line, and column location.
// A Position is valid if the line number is > 0.
type Pos struct {
Filename string // filename, if any
Offset int // offset, starting at 0
Line int // line number, starting at 1
Column int // column number, starting at 1 (character count)
}
// IsValid returns true if the position is valid.
func (p *Pos) IsValid() bool { return p.Line > 0 }
// String returns a string in one of several forms:
//
// file:line:column valid position with file name
// line:column valid position without file name
// file invalid position with file name
// - invalid position without file name
func (p Pos) String() string {
s := p.Filename
if p.IsValid() {
if s != "" {
s += ":"
}
s += fmt.Sprintf("%d:%d", p.Line, p.Column)
}
if s == "" {
s = "-"
}
return s
}
// Before reports whether the position p is before u.
func (p Pos) Before(u Pos) bool {
return u.Offset > p.Offset || u.Line > p.Line
}
// After reports whether the position p is after u.
func (p Pos) After(u Pos) bool {
return u.Offset < p.Offset || u.Line < p.Line
}

139
json/token/token.go Normal file
View File

@ -0,0 +1,139 @@
package token
import (
"fmt"
"strconv"
hclstrconv "github.com/hashicorp/hcl/hcl/strconv"
)
// Token defines a single HCL token which can be obtained via the Scanner
type Token struct {
Type Type
Pos Pos
Text string
}
// Type is the set of lexical tokens of the HCL (HashiCorp Configuration Language)
type Type int
const (
// Special tokens
ILLEGAL Type = iota
EOF
identifier_beg
literal_beg
NUMBER // 12345
FLOAT // 123.45
BOOL // true,false
STRING // "abc"
NULL // null
literal_end
identifier_end
operator_beg
LBRACK // [
LBRACE // {
COMMA // ,
PERIOD // .
RBRACK // ]
RBRACE // }
operator_end
)
var tokens = [...]string{
ILLEGAL: "ILLEGAL",
EOF: "EOF",
NUMBER: "NUMBER",
FLOAT: "FLOAT",
BOOL: "BOOL",
STRING: "STRING",
NULL: "NULL",
LBRACK: "LBRACK",
LBRACE: "LBRACE",
COMMA: "COMMA",
PERIOD: "PERIOD",
RBRACK: "RBRACK",
RBRACE: "RBRACE",
}
// String returns the string corresponding to the token tok.
func (t Type) String() string {
s := ""
if 0 <= t && t < Type(len(tokens)) {
s = tokens[t]
}
if s == "" {
s = "token(" + strconv.Itoa(int(t)) + ")"
}
return s
}
// IsIdentifier returns true for tokens corresponding to identifiers and basic
// type literals; it returns false otherwise.
func (t Type) IsIdentifier() bool { return identifier_beg < t && t < identifier_end }
// IsLiteral returns true for tokens corresponding to basic type literals; it
// returns false otherwise.
func (t Type) IsLiteral() bool { return literal_beg < t && t < literal_end }
// IsOperator returns true for tokens corresponding to operators and
// delimiters; it returns false otherwise.
func (t Type) IsOperator() bool { return operator_beg < t && t < operator_end }
// String returns the token's literal text. Note that this is only
// applicable for certain token types, such as token.IDENT,
// token.STRING, etc..
func (t Token) String() string {
return fmt.Sprintf("%s %s %s", t.Pos.String(), t.Type.String(), t.Text)
}
// Value returns the properly typed value for this token. The type of
// the returned interface{} is guaranteed based on the Type field.
//
// This can only be called for literal types. If it is called for any other
// type, this will panic.
func (t Token) Value() interface{} {
switch t.Type {
case BOOL:
if t.Text == "true" {
return true
} else if t.Text == "false" {
return false
}
panic("unknown bool value: " + t.Text)
case FLOAT:
v, err := strconv.ParseFloat(t.Text, 64)
if err != nil {
panic(err)
}
return float64(v)
case NULL:
return nil
case NUMBER:
v, err := strconv.ParseInt(t.Text, 0, 64)
if err != nil {
panic(err)
}
return int64(v)
case STRING:
v, err := hclstrconv.Unquote(t.Text)
if err != nil {
panic(fmt.Sprintf("unquote %s err: %s", t.Text, err))
}
return v
default:
panic(fmt.Sprintf("unimplemented Value for type: %s", t.Type))
}
}

57
json/token/token_test.go Normal file
View File

@ -0,0 +1,57 @@
package token
import (
"reflect"
"testing"
)
func TestTypeString(t *testing.T) {
var tokens = []struct {
tt Type
str string
}{
{ILLEGAL, "ILLEGAL"},
{EOF, "EOF"},
{NUMBER, "NUMBER"},
{FLOAT, "FLOAT"},
{BOOL, "BOOL"},
{STRING, "STRING"},
{NULL, "NULL"},
{LBRACK, "LBRACK"},
{LBRACE, "LBRACE"},
{COMMA, "COMMA"},
{PERIOD, "PERIOD"},
{RBRACK, "RBRACK"},
{RBRACE, "RBRACE"},
}
for _, token := range tokens {
if token.tt.String() != token.str {
t.Errorf("want: %q got:%q\n", token.str, token.tt)
}
}
}
func TestTokenValue(t *testing.T) {
var tokens = []struct {
tt Token
v interface{}
}{
{Token{Type: BOOL, Text: `true`}, true},
{Token{Type: BOOL, Text: `false`}, false},
{Token{Type: FLOAT, Text: `3.14`}, float64(3.14)},
{Token{Type: NULL, Text: `null`}, nil},
{Token{Type: NUMBER, Text: `42`}, int64(42)},
{Token{Type: STRING, Text: `"foo"`}, "foo"},
{Token{Type: STRING, Text: `"foo\nbar"`}, "foo\nbar"},
}
for _, token := range tokens {
if val := token.tt.Value(); !reflect.DeepEqual(val, token.v) {
t.Errorf("want: %v got:%v\n", token.v, val)
}
}
}