json: scanner and token

2015-11-07 18:14:32 -08:00 · 2015-11-07 18:14:32 -08:00 · 87a91d1019
commit 87a91d1019
parent 338eebdbf7
5 changed files with 1102 additions and 0 deletions
--- a/json/scanner/scanner.go
+++ b/json/scanner/scanner.go
@ -0,0 +1,449 @@
+package scanner
+
+import (
+	"bytes"
+	"fmt"
+	"os"
+	"unicode"
+	"unicode/utf8"
+
+	"github.com/hashicorp/hcl/json/token"
+)
+
+// eof represents a marker rune for the end of the reader.
+const eof = rune(0)
+
+// Scanner defines a lexical scanner
+type Scanner struct {
+	buf *bytes.Buffer // Source buffer for advancing and scanning
+	src []byte        // Source buffer for immutable access
+
+	// Source Position
+	srcPos  token.Pos // current position
+	prevPos token.Pos // previous position, used for peek() method
+
+	lastCharLen int // length of last character in bytes
+	lastLineLen int // length of last line in characters (for correct column reporting)
+
+	tokStart int // token text start position
+	tokEnd   int // token text end  position
+
+	// Error is called for each error encountered. If no Error
+	// function is set, the error is reported to os.Stderr.
+	Error func(pos token.Pos, msg string)
+
+	// ErrorCount is incremented by one for each error encountered.
+	ErrorCount int
+
+	// tokPos is the start position of most recently scanned token; set by
+	// Scan. The Filename field is always left untouched by the Scanner.  If
+	// an error is reported (via Error) and Position is invalid, the scanner is
+	// not inside a token.
+	tokPos token.Pos
+}
+
+// New creates and initializes a new instance of Scanner using src as
+// its source content.
+func New(src []byte) *Scanner {
+	// even though we accept a src, we read from a io.Reader compatible type
+	// (*bytes.Buffer). So in the future we might easily change it to streaming
+	// read.
+	b := bytes.NewBuffer(src)
+	s := &Scanner{
+		buf: b,
+		src: src,
+	}
+
+	// srcPosition always starts with 1
+	s.srcPos.Line = 1
+	return s
+}
+
+// next reads the next rune from the bufferred reader. Returns the rune(0) if
+// an error occurs (or io.EOF is returned).
+func (s *Scanner) next() rune {
+	ch, size, err := s.buf.ReadRune()
+	if err != nil {
+		// advance for error reporting
+		s.srcPos.Column++
+		s.srcPos.Offset += size
+		s.lastCharLen = size
+		return eof
+	}
+
+	if ch == utf8.RuneError && size == 1 {
+		s.srcPos.Column++
+		s.srcPos.Offset += size
+		s.lastCharLen = size
+		s.err("illegal UTF-8 encoding")
+		return ch
+	}
+
+	// remember last position
+	s.prevPos = s.srcPos
+
+	s.srcPos.Column++
+	s.lastCharLen = size
+	s.srcPos.Offset += size
+
+	if ch == '\n' {
+		s.srcPos.Line++
+		s.lastLineLen = s.srcPos.Column
+		s.srcPos.Column = 0
+	}
+
+	// debug
+	// fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
+	return ch
+}
+
+// unread unreads the previous read Rune and updates the source position
+func (s *Scanner) unread() {
+	if err := s.buf.UnreadRune(); err != nil {
+		panic(err) // this is user fault, we should catch it
+	}
+	s.srcPos = s.prevPos // put back last position
+}
+
+// peek returns the next rune without advancing the reader.
+func (s *Scanner) peek() rune {
+	peek, _, err := s.buf.ReadRune()
+	if err != nil {
+		return eof
+	}
+
+	s.buf.UnreadRune()
+	return peek
+}
+
+// Scan scans the next token and returns the token.
+func (s *Scanner) Scan() token.Token {
+	ch := s.next()
+
+	// skip white space
+	for isWhitespace(ch) {
+		ch = s.next()
+	}
+
+	var tok token.Type
+
+	// token text markings
+	s.tokStart = s.srcPos.Offset - s.lastCharLen
+
+	// token position, initial next() is moving the offset by one(size of rune
+	// actually), though we are interested with the starting point
+	s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen
+	if s.srcPos.Column > 0 {
+		// common case: last character was not a '\n'
+		s.tokPos.Line = s.srcPos.Line
+		s.tokPos.Column = s.srcPos.Column
+	} else {
+		// last character was a '\n'
+		// (we cannot be at the beginning of the source
+		// since we have called next() at least once)
+		s.tokPos.Line = s.srcPos.Line - 1
+		s.tokPos.Column = s.lastLineLen
+	}
+
+	switch {
+	case isLetter(ch):
+		lit := s.scanIdentifier()
+		if lit == "true" || lit == "false" {
+			tok = token.BOOL
+		} else if lit == "null" {
+			tok = token.NULL
+		} else {
+			s.err("illegal char")
+		}
+	case isDecimal(ch):
+		tok = s.scanNumber(ch)
+	default:
+		switch ch {
+		case eof:
+			tok = token.EOF
+		case '"':
+			tok = token.STRING
+			s.scanString()
+		case '.':
+			tok = token.PERIOD
+			ch = s.peek()
+			if isDecimal(ch) {
+				tok = token.FLOAT
+				ch = s.scanMantissa(ch)
+				ch = s.scanExponent(ch)
+			}
+		case '[':
+			tok = token.LBRACK
+		case ']':
+			tok = token.RBRACK
+		case '{':
+			tok = token.LBRACE
+		case '}':
+			tok = token.RBRACE
+		case ',':
+			tok = token.COMMA
+		case '-':
+			if isDecimal(s.peek()) {
+				ch := s.next()
+				tok = s.scanNumber(ch)
+			} else {
+				s.err("illegal char")
+			}
+		default:
+			s.err("illegal char")
+		}
+	}
+
+	// finish token ending
+	s.tokEnd = s.srcPos.Offset
+
+	// create token literal
+	var tokenText string
+	if s.tokStart >= 0 {
+		tokenText = string(s.src[s.tokStart:s.tokEnd])
+	}
+	s.tokStart = s.tokEnd // ensure idempotency of tokenText() call
+
+	return token.Token{
+		Type: tok,
+		Pos:  s.tokPos,
+		Text: tokenText,
+	}
+}
+
+// scanNumber scans a HCL number definition starting with the given rune
+func (s *Scanner) scanNumber(ch rune) token.Type {
+	zero := ch == '0'
+	pos := s.srcPos
+
+	s.scanMantissa(ch)
+	ch = s.next() // seek forward
+	if ch == 'e' || ch == 'E' {
+		ch = s.scanExponent(ch)
+		return token.FLOAT
+	}
+
+	if ch == '.' {
+		ch = s.scanFraction(ch)
+		if ch == 'e' || ch == 'E' {
+			ch = s.next()
+			ch = s.scanExponent(ch)
+		}
+		return token.FLOAT
+	}
+
+	if ch != eof {
+		s.unread()
+	}
+
+	// If we have a larger number and this is zero, error
+	if zero && pos != s.srcPos {
+		s.err("numbers cannot start with 0")
+	}
+
+	return token.NUMBER
+}
+
+// scanMantissa scans the mantissa begining from the rune. It returns the next
+// non decimal rune. It's used to determine wheter it's a fraction or exponent.
+func (s *Scanner) scanMantissa(ch rune) rune {
+	scanned := false
+	for isDecimal(ch) {
+		ch = s.next()
+		scanned = true
+	}
+
+	if scanned && ch != eof {
+		s.unread()
+	}
+	return ch
+}
+
+// scanFraction scans the fraction after the '.' rune
+func (s *Scanner) scanFraction(ch rune) rune {
+	if ch == '.' {
+		ch = s.peek() // we peek just to see if we can move forward
+		ch = s.scanMantissa(ch)
+	}
+	return ch
+}
+
+// scanExponent scans the remaining parts of an exponent after the 'e' or 'E'
+// rune.
+func (s *Scanner) scanExponent(ch rune) rune {
+	if ch == 'e' || ch == 'E' {
+		ch = s.next()
+		if ch == '-' || ch == '+' {
+			ch = s.next()
+		}
+		ch = s.scanMantissa(ch)
+	}
+	return ch
+}
+
+// scanString scans a quoted string
+func (s *Scanner) scanString() {
+	braces := 0
+	for {
+		// '"' opening already consumed
+		// read character after quote
+		ch := s.next()
+
+		if ch == '\n' || ch < 0 || ch == eof {
+			s.err("literal not terminated")
+			return
+		}
+
+		if ch == '"' && braces == 0 {
+			break
+		}
+
+		// If we're going into a ${} then we can ignore quotes for awhile
+		if braces == 0 && ch == '$' && s.peek() == '{' {
+			braces++
+			s.next()
+		} else if braces > 0 && ch == '{' {
+			braces++
+		}
+		if braces > 0 && ch == '}' {
+			braces--
+		}
+
+		if ch == '\\' {
+			s.scanEscape()
+		}
+	}
+
+	return
+}
+
+// scanEscape scans an escape sequence
+func (s *Scanner) scanEscape() rune {
+	// http://en.cppreference.com/w/cpp/language/escape
+	ch := s.next() // read character after '/'
+	switch ch {
+	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
+		// nothing to do
+	case '0', '1', '2', '3', '4', '5', '6', '7':
+		// octal notation
+		ch = s.scanDigits(ch, 8, 3)
+	case 'x':
+		// hexademical notation
+		ch = s.scanDigits(s.next(), 16, 2)
+	case 'u':
+		// universal character name
+		ch = s.scanDigits(s.next(), 16, 4)
+	case 'U':
+		// universal character name
+		ch = s.scanDigits(s.next(), 16, 8)
+	default:
+		s.err("illegal char escape")
+	}
+	return ch
+}
+
+// scanDigits scans a rune with the given base for n times. For example an
+// octal notation \184 would yield in scanDigits(ch, 8, 3)
+func (s *Scanner) scanDigits(ch rune, base, n int) rune {
+	for n > 0 && digitVal(ch) < base {
+		ch = s.next()
+		n--
+	}
+	if n > 0 {
+		s.err("illegal char escape")
+	}
+
+	// we scanned all digits, put the last non digit char back
+	s.unread()
+	return ch
+}
+
+// scanIdentifier scans an identifier and returns the literal string
+func (s *Scanner) scanIdentifier() string {
+	offs := s.srcPos.Offset - s.lastCharLen
+	ch := s.next()
+	for isLetter(ch) || isDigit(ch) || ch == '-' {
+		ch = s.next()
+	}
+
+	if ch != eof {
+		s.unread() // we got identifier, put back latest char
+	}
+
+	return string(s.src[offs:s.srcPos.Offset])
+}
+
+// recentPosition returns the position of the character immediately after the
+// character or token returned by the last call to Scan.
+func (s *Scanner) recentPosition() (pos token.Pos) {
+	pos.Offset = s.srcPos.Offset - s.lastCharLen
+	switch {
+	case s.srcPos.Column > 0:
+		// common case: last character was not a '\n'
+		pos.Line = s.srcPos.Line
+		pos.Column = s.srcPos.Column
+	case s.lastLineLen > 0:
+		// last character was a '\n'
+		// (we cannot be at the beginning of the source
+		// since we have called next() at least once)
+		pos.Line = s.srcPos.Line - 1
+		pos.Column = s.lastLineLen
+	default:
+		// at the beginning of the source
+		pos.Line = 1
+		pos.Column = 1
+	}
+	return
+}
+
+// err prints the error of any scanning to s.Error function. If the function is
+// not defined, by default it prints them to os.Stderr
+func (s *Scanner) err(msg string) {
+	s.ErrorCount++
+	pos := s.recentPosition()
+
+	if s.Error != nil {
+		s.Error(pos, msg)
+		return
+	}
+
+	fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
+}
+
+// isHexadecimal returns true if the given rune is a letter
+func isLetter(ch rune) bool {
+	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
+}
+
+// isHexadecimal returns true if the given rune is a decimal digit
+func isDigit(ch rune) bool {
+	return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
+}
+
+// isHexadecimal returns true if the given rune is a decimal number
+func isDecimal(ch rune) bool {
+	return '0' <= ch && ch <= '9'
+}
+
+// isHexadecimal returns true if the given rune is an hexadecimal number
+func isHexadecimal(ch rune) bool {
+	return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
+}
+
+// isWhitespace returns true if the rune is a space, tab, newline or carriage return
+func isWhitespace(ch rune) bool {
+	return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
+}
+
+// digitVal returns the integer value of a given octal,decimal or hexadecimal rune
+func digitVal(ch rune) int {
+	switch {
+	case '0' <= ch && ch <= '9':
+		return int(ch - '0')
+	case 'a' <= ch && ch <= 'f':
+		return int(ch - 'a' + 10)
+	case 'A' <= ch && ch <= 'F':
+		return int(ch - 'A' + 10)
+	}
+	return 16 // larger than any legal digit val
+}
--- a/json/scanner/scanner_test.go
+++ b/json/scanner/scanner_test.go
@ -0,0 +1,411 @@
+package scanner
+
+import (
+	"bytes"
+	"fmt"
+	"testing"
+
+	"github.com/hashicorp/hcl/json/token"
+)
+
+var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+
+type tokenPair struct {
+	tok  token.Type
+	text string
+}
+
+var tokenLists = map[string][]tokenPair{
+	"operator": []tokenPair{
+		{token.LBRACK, "["},
+		{token.LBRACE, "{"},
+		{token.COMMA, ","},
+		{token.PERIOD, "."},
+		{token.RBRACK, "]"},
+		{token.RBRACE, "}"},
+	},
+	"bool": []tokenPair{
+		{token.BOOL, "true"},
+		{token.BOOL, "false"},
+	},
+	"string": []tokenPair{
+		{token.STRING, `" "`},
+		{token.STRING, `"a"`},
+		{token.STRING, `"本"`},
+		{token.STRING, `"${file("foo")}"`},
+		{token.STRING, `"\a"`},
+		{token.STRING, `"\b"`},
+		{token.STRING, `"\f"`},
+		{token.STRING, `"\n"`},
+		{token.STRING, `"\r"`},
+		{token.STRING, `"\t"`},
+		{token.STRING, `"\v"`},
+		{token.STRING, `"\""`},
+		{token.STRING, `"\000"`},
+		{token.STRING, `"\777"`},
+		{token.STRING, `"\x00"`},
+		{token.STRING, `"\xff"`},
+		{token.STRING, `"\u0000"`},
+		{token.STRING, `"\ufA16"`},
+		{token.STRING, `"\U00000000"`},
+		{token.STRING, `"\U0000ffAB"`},
+		{token.STRING, `"` + f100 + `"`},
+	},
+	"number": []tokenPair{
+		{token.NUMBER, "0"},
+		{token.NUMBER, "1"},
+		{token.NUMBER, "9"},
+		{token.NUMBER, "42"},
+		{token.NUMBER, "1234567890"},
+		{token.NUMBER, "-0"},
+		{token.NUMBER, "-1"},
+		{token.NUMBER, "-9"},
+		{token.NUMBER, "-42"},
+		{token.NUMBER, "-1234567890"},
+	},
+	"float": []tokenPair{
+		{token.FLOAT, "0."},
+		{token.FLOAT, "1."},
+		{token.FLOAT, "42."},
+		{token.FLOAT, "01234567890."},
+		{token.FLOAT, ".0"},
+		{token.FLOAT, ".1"},
+		{token.FLOAT, ".42"},
+		{token.FLOAT, ".0123456789"},
+		{token.FLOAT, "0.0"},
+		{token.FLOAT, "1.0"},
+		{token.FLOAT, "42.0"},
+		{token.FLOAT, "01234567890.0"},
+		{token.FLOAT, "0e0"},
+		{token.FLOAT, "1e0"},
+		{token.FLOAT, "42e0"},
+		{token.FLOAT, "01234567890e0"},
+		{token.FLOAT, "0E0"},
+		{token.FLOAT, "1E0"},
+		{token.FLOAT, "42E0"},
+		{token.FLOAT, "01234567890E0"},
+		{token.FLOAT, "0e+10"},
+		{token.FLOAT, "1e-10"},
+		{token.FLOAT, "42e+10"},
+		{token.FLOAT, "01234567890e-10"},
+		{token.FLOAT, "0E+10"},
+		{token.FLOAT, "1E-10"},
+		{token.FLOAT, "42E+10"},
+		{token.FLOAT, "01234567890E-10"},
+		{token.FLOAT, "01.8e0"},
+		{token.FLOAT, "1.4e0"},
+		{token.FLOAT, "42.2e0"},
+		{token.FLOAT, "01234567890.12e0"},
+		{token.FLOAT, "0.E0"},
+		{token.FLOAT, "1.12E0"},
+		{token.FLOAT, "42.123E0"},
+		{token.FLOAT, "01234567890.213E0"},
+		{token.FLOAT, "0.2e+10"},
+		{token.FLOAT, "1.2e-10"},
+		{token.FLOAT, "42.54e+10"},
+		{token.FLOAT, "01234567890.98e-10"},
+		{token.FLOAT, "0.1E+10"},
+		{token.FLOAT, "1.1E-10"},
+		{token.FLOAT, "42.1E+10"},
+		{token.FLOAT, "01234567890.1E-10"},
+		{token.FLOAT, "-0.0"},
+		{token.FLOAT, "-1.0"},
+		{token.FLOAT, "-42.0"},
+		{token.FLOAT, "-01234567890.0"},
+		{token.FLOAT, "-0e0"},
+		{token.FLOAT, "-1e0"},
+		{token.FLOAT, "-42e0"},
+		{token.FLOAT, "-01234567890e0"},
+		{token.FLOAT, "-0E0"},
+		{token.FLOAT, "-1E0"},
+		{token.FLOAT, "-42E0"},
+		{token.FLOAT, "-01234567890E0"},
+		{token.FLOAT, "-0e+10"},
+		{token.FLOAT, "-1e-10"},
+		{token.FLOAT, "-42e+10"},
+		{token.FLOAT, "-01234567890e-10"},
+		{token.FLOAT, "-0E+10"},
+		{token.FLOAT, "-1E-10"},
+		{token.FLOAT, "-42E+10"},
+		{token.FLOAT, "-01234567890E-10"},
+		{token.FLOAT, "-01.8e0"},
+		{token.FLOAT, "-1.4e0"},
+		{token.FLOAT, "-42.2e0"},
+		{token.FLOAT, "-01234567890.12e0"},
+		{token.FLOAT, "-0.E0"},
+		{token.FLOAT, "-1.12E0"},
+		{token.FLOAT, "-42.123E0"},
+		{token.FLOAT, "-01234567890.213E0"},
+		{token.FLOAT, "-0.2e+10"},
+		{token.FLOAT, "-1.2e-10"},
+		{token.FLOAT, "-42.54e+10"},
+		{token.FLOAT, "-01234567890.98e-10"},
+		{token.FLOAT, "-0.1E+10"},
+		{token.FLOAT, "-1.1E-10"},
+		{token.FLOAT, "-42.1E+10"},
+		{token.FLOAT, "-01234567890.1E-10"},
+	},
+}
+
+var orderedTokenLists = []string{
+	"comment",
+	"operator",
+	"bool",
+	"string",
+	"number",
+	"float",
+}
+
+func TestPosition(t *testing.T) {
+	// create artifical source code
+	buf := new(bytes.Buffer)
+
+	for _, listName := range orderedTokenLists {
+		for _, ident := range tokenLists[listName] {
+			fmt.Fprintf(buf, "\t\t\t\t%s\n", ident.text)
+		}
+	}
+
+	s := New(buf.Bytes())
+
+	pos := token.Pos{"", 4, 1, 5}
+	s.Scan()
+	for _, listName := range orderedTokenLists {
+
+		for _, k := range tokenLists[listName] {
+			curPos := s.tokPos
+			// fmt.Printf("[%q] s = %+v:%+v\n", k.text, curPos.Offset, curPos.Column)
+
+			if curPos.Offset != pos.Offset {
+				t.Fatalf("offset = %d, want %d for %q", curPos.Offset, pos.Offset, k.text)
+			}
+			if curPos.Line != pos.Line {
+				t.Fatalf("line = %d, want %d for %q", curPos.Line, pos.Line, k.text)
+			}
+			if curPos.Column != pos.Column {
+				t.Fatalf("column = %d, want %d for %q", curPos.Column, pos.Column, k.text)
+			}
+			pos.Offset += 4 + len(k.text) + 1     // 4 tabs + token bytes + newline
+			pos.Line += countNewlines(k.text) + 1 // each token is on a new line
+
+			s.Error = func(pos token.Pos, msg string) {
+				t.Errorf("error %q for %q", msg, k.text)
+			}
+
+			s.Scan()
+		}
+	}
+	// make sure there were no token-internal errors reported by scanner
+	if s.ErrorCount != 0 {
+		t.Errorf("%d errors", s.ErrorCount)
+	}
+}
+
+func TestComment(t *testing.T) {
+	testTokenList(t, tokenLists["comment"])
+}
+
+func TestOperator(t *testing.T) {
+	testTokenList(t, tokenLists["operator"])
+}
+
+func TestBool(t *testing.T) {
+	testTokenList(t, tokenLists["bool"])
+}
+
+func TestIdent(t *testing.T) {
+	testTokenList(t, tokenLists["ident"])
+}
+
+func TestString(t *testing.T) {
+	testTokenList(t, tokenLists["string"])
+}
+
+func TestNumber(t *testing.T) {
+	testTokenList(t, tokenLists["number"])
+}
+
+func TestFloat(t *testing.T) {
+	testTokenList(t, tokenLists["float"])
+}
+
+/*
+func TestRealExample(t *testing.T) {
+	complexHCL := `// This comes from Terraform, as a test
+	variable "foo" {
+	    default = "bar"
+	    description = "bar"
+	}
+
+	provider "aws" {
+	  access_key = "foo"
+	  secret_key = "bar"
+	}
+
+	resource "aws_security_group" "firewall" {
+	    count = 5
+	}
+
+	resource aws_instance "web" {
+	    ami = "${var.foo}"
+	    security_groups = [
+	        "foo",
+	        "${aws_security_group.firewall.foo}"
+	    ]
+
+	    network_interface {
+	        device_index = 0
+	        description = "Main network interface"
+	    }
+	}`
+
+	literals := []struct {
+		tokenType token.Type
+		literal   string
+	}{
+		{token.COMMENT, `// This comes from Terraform, as a test`},
+		{token.IDENT, `variable`},
+		{token.STRING, `"foo"`},
+		{token.LBRACE, `{`},
+		{token.IDENT, `default`},
+		{token.ASSIGN, `=`},
+		{token.STRING, `"bar"`},
+		{token.IDENT, `description`},
+		{token.ASSIGN, `=`},
+		{token.STRING, `"bar"`},
+		{token.RBRACE, `}`},
+		{token.IDENT, `provider`},
+		{token.STRING, `"aws"`},
+		{token.LBRACE, `{`},
+		{token.IDENT, `access_key`},
+		{token.ASSIGN, `=`},
+		{token.STRING, `"foo"`},
+		{token.IDENT, `secret_key`},
+		{token.ASSIGN, `=`},
+		{token.STRING, `"bar"`},
+		{token.RBRACE, `}`},
+		{token.IDENT, `resource`},
+		{token.STRING, `"aws_security_group"`},
+		{token.STRING, `"firewall"`},
+		{token.LBRACE, `{`},
+		{token.IDENT, `count`},
+		{token.ASSIGN, `=`},
+		{token.NUMBER, `5`},
+		{token.RBRACE, `}`},
+		{token.IDENT, `resource`},
+		{token.IDENT, `aws_instance`},
+		{token.STRING, `"web"`},
+		{token.LBRACE, `{`},
+		{token.IDENT, `ami`},
+		{token.ASSIGN, `=`},
+		{token.STRING, `"${var.foo}"`},
+		{token.IDENT, `security_groups`},
+		{token.ASSIGN, `=`},
+		{token.LBRACK, `[`},
+		{token.STRING, `"foo"`},
+		{token.COMMA, `,`},
+		{token.STRING, `"${aws_security_group.firewall.foo}"`},
+		{token.RBRACK, `]`},
+		{token.IDENT, `network_interface`},
+		{token.LBRACE, `{`},
+		{token.IDENT, `device_index`},
+		{token.ASSIGN, `=`},
+		{token.NUMBER, `0`},
+		{token.IDENT, `description`},
+		{token.ASSIGN, `=`},
+		{token.STRING, `"Main network interface"`},
+		{token.RBRACE, `}`},
+		{token.RBRACE, `}`},
+		{token.EOF, ``},
+	}
+
+	s := New([]byte(complexHCL))
+	for _, l := range literals {
+		tok := s.Scan()
+		if l.tokenType != tok.Type {
+			t.Errorf("got: %s want %s for %s\n", tok, l.tokenType, tok.String())
+		}
+
+		if l.literal != tok.Text {
+			t.Errorf("got: %s want %s\n", tok, l.literal)
+		}
+	}
+
+}
+*/
+
+func TestError(t *testing.T) {
+	testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
+	testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
+
+	testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING)
+	testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING)
+
+	testError(t, `01238`, "1:7", "numbers cannot start with 0", token.NUMBER)
+	testError(t, `01238123`, "1:10", "numbers cannot start with 0", token.NUMBER)
+	testError(t, `'aa'`, "1:1", "illegal char", token.ILLEGAL)
+
+	testError(t, `"`, "1:2", "literal not terminated", token.STRING)
+	testError(t, `"abc`, "1:5", "literal not terminated", token.STRING)
+	testError(t, `"abc`+"\n", "1:5", "literal not terminated", token.STRING)
+}
+
+func testError(t *testing.T, src, pos, msg string, tok token.Type) {
+	s := New([]byte(src))
+
+	errorCalled := false
+	s.Error = func(p token.Pos, m string) {
+		if !errorCalled {
+			if pos != p.String() {
+				t.Errorf("pos = %q, want %q for %q", p, pos, src)
+			}
+
+			if m != msg {
+				t.Errorf("msg = %q, want %q for %q", m, msg, src)
+			}
+			errorCalled = true
+		}
+	}
+
+	tk := s.Scan()
+	if tk.Type != tok {
+		t.Errorf("tok = %s, want %s for %q", tk, tok, src)
+	}
+	if !errorCalled {
+		t.Errorf("error handler not called for %q", src)
+	}
+	if s.ErrorCount == 0 {
+		t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src)
+	}
+}
+
+func testTokenList(t *testing.T, tokenList []tokenPair) {
+	// create artifical source code
+	buf := new(bytes.Buffer)
+	for _, ident := range tokenList {
+		fmt.Fprintf(buf, "%s\n", ident.text)
+	}
+
+	s := New(buf.Bytes())
+	for _, ident := range tokenList {
+		tok := s.Scan()
+		if tok.Type != ident.tok {
+			t.Errorf("tok = %q want %q for %q\n", tok, ident.tok, ident.text)
+		}
+
+		if tok.Text != ident.text {
+			t.Errorf("text = %q want %q", tok.String(), ident.text)
+		}
+
+	}
+}
+
+func countNewlines(s string) int {
+	n := 0
+	for _, ch := range s {
+		if ch == '\n' {
+			n++
+		}
+	}
+	return n
+}
--- a/json/token/position.go
+++ b/json/token/position.go
@ -0,0 +1,46 @@
+package token
+
+import "fmt"
+
+// Pos describes an arbitrary source position
+// including the file, line, and column location.
+// A Position is valid if the line number is > 0.
+type Pos struct {
+	Filename string // filename, if any
+	Offset   int    // offset, starting at 0
+	Line     int    // line number, starting at 1
+	Column   int    // column number, starting at 1 (character count)
+}
+
+// IsValid returns true if the position is valid.
+func (p *Pos) IsValid() bool { return p.Line > 0 }
+
+// String returns a string in one of several forms:
+//
+//	file:line:column    valid position with file name
+//	line:column         valid position without file name
+//	file                invalid position with file name
+//	-                   invalid position without file name
+func (p Pos) String() string {
+	s := p.Filename
+	if p.IsValid() {
+		if s != "" {
+			s += ":"
+		}
+		s += fmt.Sprintf("%d:%d", p.Line, p.Column)
+	}
+	if s == "" {
+		s = "-"
+	}
+	return s
+}
+
+// Before reports whether the position p is before u.
+func (p Pos) Before(u Pos) bool {
+	return u.Offset > p.Offset || u.Line > p.Line
+}
+
+// After reports whether the position p is after u.
+func (p Pos) After(u Pos) bool {
+	return u.Offset < p.Offset || u.Line < p.Line
+}
--- a/json/token/token.go
+++ b/json/token/token.go
@ -0,0 +1,139 @@
+package token
+
+import (
+	"fmt"
+	"strconv"
+
+	hclstrconv "github.com/hashicorp/hcl/hcl/strconv"
+)
+
+// Token defines a single HCL token which can be obtained via the Scanner
+type Token struct {
+	Type Type
+	Pos  Pos
+	Text string
+}
+
+// Type is the set of lexical tokens of the HCL (HashiCorp Configuration Language)
+type Type int
+
+const (
+	// Special tokens
+	ILLEGAL Type = iota
+	EOF
+
+	identifier_beg
+	literal_beg
+	NUMBER // 12345
+	FLOAT  // 123.45
+	BOOL   // true,false
+	STRING // "abc"
+	NULL   // null
+	literal_end
+	identifier_end
+
+	operator_beg
+	LBRACK // [
+	LBRACE // {
+	COMMA  // ,
+	PERIOD // .
+
+	RBRACK // ]
+	RBRACE // }
+
+	operator_end
+)
+
+var tokens = [...]string{
+	ILLEGAL: "ILLEGAL",
+
+	EOF: "EOF",
+
+	NUMBER: "NUMBER",
+	FLOAT:  "FLOAT",
+	BOOL:   "BOOL",
+	STRING: "STRING",
+	NULL:   "NULL",
+
+	LBRACK: "LBRACK",
+	LBRACE: "LBRACE",
+	COMMA:  "COMMA",
+	PERIOD: "PERIOD",
+
+	RBRACK: "RBRACK",
+	RBRACE: "RBRACE",
+}
+
+// String returns the string corresponding to the token tok.
+func (t Type) String() string {
+	s := ""
+	if 0 <= t && t < Type(len(tokens)) {
+		s = tokens[t]
+	}
+	if s == "" {
+		s = "token(" + strconv.Itoa(int(t)) + ")"
+	}
+	return s
+}
+
+// IsIdentifier returns true for tokens corresponding to identifiers and basic
+// type literals; it returns false otherwise.
+func (t Type) IsIdentifier() bool { return identifier_beg < t && t < identifier_end }
+
+// IsLiteral returns true for tokens corresponding to basic type literals; it
+// returns false otherwise.
+func (t Type) IsLiteral() bool { return literal_beg < t && t < literal_end }
+
+// IsOperator returns true for tokens corresponding to operators and
+// delimiters; it returns false otherwise.
+func (t Type) IsOperator() bool { return operator_beg < t && t < operator_end }
+
+// String returns the token's literal text. Note that this is only
+// applicable for certain token types, such as token.IDENT,
+// token.STRING, etc..
+func (t Token) String() string {
+	return fmt.Sprintf("%s %s %s", t.Pos.String(), t.Type.String(), t.Text)
+}
+
+// Value returns the properly typed value for this token. The type of
+// the returned interface{} is guaranteed based on the Type field.
+//
+// This can only be called for literal types. If it is called for any other
+// type, this will panic.
+func (t Token) Value() interface{} {
+	switch t.Type {
+	case BOOL:
+		if t.Text == "true" {
+			return true
+		} else if t.Text == "false" {
+			return false
+		}
+
+		panic("unknown bool value: " + t.Text)
+	case FLOAT:
+		v, err := strconv.ParseFloat(t.Text, 64)
+		if err != nil {
+			panic(err)
+		}
+
+		return float64(v)
+	case NULL:
+		return nil
+	case NUMBER:
+		v, err := strconv.ParseInt(t.Text, 0, 64)
+		if err != nil {
+			panic(err)
+		}
+
+		return int64(v)
+	case STRING:
+		v, err := hclstrconv.Unquote(t.Text)
+		if err != nil {
+			panic(fmt.Sprintf("unquote %s err: %s", t.Text, err))
+		}
+
+		return v
+	default:
+		panic(fmt.Sprintf("unimplemented Value for type: %s", t.Type))
+	}
+}
--- a/json/token/token_test.go
+++ b/json/token/token_test.go
@ -0,0 +1,57 @@
+package token
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestTypeString(t *testing.T) {
+	var tokens = []struct {
+		tt  Type
+		str string
+	}{
+		{ILLEGAL, "ILLEGAL"},
+		{EOF, "EOF"},
+		{NUMBER, "NUMBER"},
+		{FLOAT, "FLOAT"},
+		{BOOL, "BOOL"},
+		{STRING, "STRING"},
+		{NULL, "NULL"},
+		{LBRACK, "LBRACK"},
+		{LBRACE, "LBRACE"},
+		{COMMA, "COMMA"},
+		{PERIOD, "PERIOD"},
+		{RBRACK, "RBRACK"},
+		{RBRACE, "RBRACE"},
+	}
+
+	for _, token := range tokens {
+		if token.tt.String() != token.str {
+			t.Errorf("want: %q got:%q\n", token.str, token.tt)
+
+		}
+	}
+
+}
+
+func TestTokenValue(t *testing.T) {
+	var tokens = []struct {
+		tt Token
+		v  interface{}
+	}{
+		{Token{Type: BOOL, Text: `true`}, true},
+		{Token{Type: BOOL, Text: `false`}, false},
+		{Token{Type: FLOAT, Text: `3.14`}, float64(3.14)},
+		{Token{Type: NULL, Text: `null`}, nil},
+		{Token{Type: NUMBER, Text: `42`}, int64(42)},
+		{Token{Type: STRING, Text: `"foo"`}, "foo"},
+		{Token{Type: STRING, Text: `"foo\nbar"`}, "foo\nbar"},
+	}
+
+	for _, token := range tokens {
+		if val := token.tt.Value(); !reflect.DeepEqual(val, token.v) {
+			t.Errorf("want: %v got:%v\n", token.v, val)
+		}
+	}
+
+}