json: scanner and token
This commit is contained in:
parent
338eebdbf7
commit
87a91d1019
449
json/scanner/scanner.go
Normal file
449
json/scanner/scanner.go
Normal file
@ -0,0 +1,449 @@
|
||||
package scanner
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/hashicorp/hcl/json/token"
|
||||
)
|
||||
|
||||
// eof represents a marker rune for the end of the reader.
|
||||
const eof = rune(0)
|
||||
|
||||
// Scanner defines a lexical scanner
|
||||
type Scanner struct {
|
||||
buf *bytes.Buffer // Source buffer for advancing and scanning
|
||||
src []byte // Source buffer for immutable access
|
||||
|
||||
// Source Position
|
||||
srcPos token.Pos // current position
|
||||
prevPos token.Pos // previous position, used for peek() method
|
||||
|
||||
lastCharLen int // length of last character in bytes
|
||||
lastLineLen int // length of last line in characters (for correct column reporting)
|
||||
|
||||
tokStart int // token text start position
|
||||
tokEnd int // token text end position
|
||||
|
||||
// Error is called for each error encountered. If no Error
|
||||
// function is set, the error is reported to os.Stderr.
|
||||
Error func(pos token.Pos, msg string)
|
||||
|
||||
// ErrorCount is incremented by one for each error encountered.
|
||||
ErrorCount int
|
||||
|
||||
// tokPos is the start position of most recently scanned token; set by
|
||||
// Scan. The Filename field is always left untouched by the Scanner. If
|
||||
// an error is reported (via Error) and Position is invalid, the scanner is
|
||||
// not inside a token.
|
||||
tokPos token.Pos
|
||||
}
|
||||
|
||||
// New creates and initializes a new instance of Scanner using src as
|
||||
// its source content.
|
||||
func New(src []byte) *Scanner {
|
||||
// even though we accept a src, we read from a io.Reader compatible type
|
||||
// (*bytes.Buffer). So in the future we might easily change it to streaming
|
||||
// read.
|
||||
b := bytes.NewBuffer(src)
|
||||
s := &Scanner{
|
||||
buf: b,
|
||||
src: src,
|
||||
}
|
||||
|
||||
// srcPosition always starts with 1
|
||||
s.srcPos.Line = 1
|
||||
return s
|
||||
}
|
||||
|
||||
// next reads the next rune from the bufferred reader. Returns the rune(0) if
|
||||
// an error occurs (or io.EOF is returned).
|
||||
func (s *Scanner) next() rune {
|
||||
ch, size, err := s.buf.ReadRune()
|
||||
if err != nil {
|
||||
// advance for error reporting
|
||||
s.srcPos.Column++
|
||||
s.srcPos.Offset += size
|
||||
s.lastCharLen = size
|
||||
return eof
|
||||
}
|
||||
|
||||
if ch == utf8.RuneError && size == 1 {
|
||||
s.srcPos.Column++
|
||||
s.srcPos.Offset += size
|
||||
s.lastCharLen = size
|
||||
s.err("illegal UTF-8 encoding")
|
||||
return ch
|
||||
}
|
||||
|
||||
// remember last position
|
||||
s.prevPos = s.srcPos
|
||||
|
||||
s.srcPos.Column++
|
||||
s.lastCharLen = size
|
||||
s.srcPos.Offset += size
|
||||
|
||||
if ch == '\n' {
|
||||
s.srcPos.Line++
|
||||
s.lastLineLen = s.srcPos.Column
|
||||
s.srcPos.Column = 0
|
||||
}
|
||||
|
||||
// debug
|
||||
// fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
|
||||
return ch
|
||||
}
|
||||
|
||||
// unread unreads the previous read Rune and updates the source position
|
||||
func (s *Scanner) unread() {
|
||||
if err := s.buf.UnreadRune(); err != nil {
|
||||
panic(err) // this is user fault, we should catch it
|
||||
}
|
||||
s.srcPos = s.prevPos // put back last position
|
||||
}
|
||||
|
||||
// peek returns the next rune without advancing the reader.
|
||||
func (s *Scanner) peek() rune {
|
||||
peek, _, err := s.buf.ReadRune()
|
||||
if err != nil {
|
||||
return eof
|
||||
}
|
||||
|
||||
s.buf.UnreadRune()
|
||||
return peek
|
||||
}
|
||||
|
||||
// Scan scans the next token and returns the token.
|
||||
func (s *Scanner) Scan() token.Token {
|
||||
ch := s.next()
|
||||
|
||||
// skip white space
|
||||
for isWhitespace(ch) {
|
||||
ch = s.next()
|
||||
}
|
||||
|
||||
var tok token.Type
|
||||
|
||||
// token text markings
|
||||
s.tokStart = s.srcPos.Offset - s.lastCharLen
|
||||
|
||||
// token position, initial next() is moving the offset by one(size of rune
|
||||
// actually), though we are interested with the starting point
|
||||
s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen
|
||||
if s.srcPos.Column > 0 {
|
||||
// common case: last character was not a '\n'
|
||||
s.tokPos.Line = s.srcPos.Line
|
||||
s.tokPos.Column = s.srcPos.Column
|
||||
} else {
|
||||
// last character was a '\n'
|
||||
// (we cannot be at the beginning of the source
|
||||
// since we have called next() at least once)
|
||||
s.tokPos.Line = s.srcPos.Line - 1
|
||||
s.tokPos.Column = s.lastLineLen
|
||||
}
|
||||
|
||||
switch {
|
||||
case isLetter(ch):
|
||||
lit := s.scanIdentifier()
|
||||
if lit == "true" || lit == "false" {
|
||||
tok = token.BOOL
|
||||
} else if lit == "null" {
|
||||
tok = token.NULL
|
||||
} else {
|
||||
s.err("illegal char")
|
||||
}
|
||||
case isDecimal(ch):
|
||||
tok = s.scanNumber(ch)
|
||||
default:
|
||||
switch ch {
|
||||
case eof:
|
||||
tok = token.EOF
|
||||
case '"':
|
||||
tok = token.STRING
|
||||
s.scanString()
|
||||
case '.':
|
||||
tok = token.PERIOD
|
||||
ch = s.peek()
|
||||
if isDecimal(ch) {
|
||||
tok = token.FLOAT
|
||||
ch = s.scanMantissa(ch)
|
||||
ch = s.scanExponent(ch)
|
||||
}
|
||||
case '[':
|
||||
tok = token.LBRACK
|
||||
case ']':
|
||||
tok = token.RBRACK
|
||||
case '{':
|
||||
tok = token.LBRACE
|
||||
case '}':
|
||||
tok = token.RBRACE
|
||||
case ',':
|
||||
tok = token.COMMA
|
||||
case '-':
|
||||
if isDecimal(s.peek()) {
|
||||
ch := s.next()
|
||||
tok = s.scanNumber(ch)
|
||||
} else {
|
||||
s.err("illegal char")
|
||||
}
|
||||
default:
|
||||
s.err("illegal char")
|
||||
}
|
||||
}
|
||||
|
||||
// finish token ending
|
||||
s.tokEnd = s.srcPos.Offset
|
||||
|
||||
// create token literal
|
||||
var tokenText string
|
||||
if s.tokStart >= 0 {
|
||||
tokenText = string(s.src[s.tokStart:s.tokEnd])
|
||||
}
|
||||
s.tokStart = s.tokEnd // ensure idempotency of tokenText() call
|
||||
|
||||
return token.Token{
|
||||
Type: tok,
|
||||
Pos: s.tokPos,
|
||||
Text: tokenText,
|
||||
}
|
||||
}
|
||||
|
||||
// scanNumber scans a HCL number definition starting with the given rune
|
||||
func (s *Scanner) scanNumber(ch rune) token.Type {
|
||||
zero := ch == '0'
|
||||
pos := s.srcPos
|
||||
|
||||
s.scanMantissa(ch)
|
||||
ch = s.next() // seek forward
|
||||
if ch == 'e' || ch == 'E' {
|
||||
ch = s.scanExponent(ch)
|
||||
return token.FLOAT
|
||||
}
|
||||
|
||||
if ch == '.' {
|
||||
ch = s.scanFraction(ch)
|
||||
if ch == 'e' || ch == 'E' {
|
||||
ch = s.next()
|
||||
ch = s.scanExponent(ch)
|
||||
}
|
||||
return token.FLOAT
|
||||
}
|
||||
|
||||
if ch != eof {
|
||||
s.unread()
|
||||
}
|
||||
|
||||
// If we have a larger number and this is zero, error
|
||||
if zero && pos != s.srcPos {
|
||||
s.err("numbers cannot start with 0")
|
||||
}
|
||||
|
||||
return token.NUMBER
|
||||
}
|
||||
|
||||
// scanMantissa scans the mantissa begining from the rune. It returns the next
|
||||
// non decimal rune. It's used to determine wheter it's a fraction or exponent.
|
||||
func (s *Scanner) scanMantissa(ch rune) rune {
|
||||
scanned := false
|
||||
for isDecimal(ch) {
|
||||
ch = s.next()
|
||||
scanned = true
|
||||
}
|
||||
|
||||
if scanned && ch != eof {
|
||||
s.unread()
|
||||
}
|
||||
return ch
|
||||
}
|
||||
|
||||
// scanFraction scans the fraction after the '.' rune
|
||||
func (s *Scanner) scanFraction(ch rune) rune {
|
||||
if ch == '.' {
|
||||
ch = s.peek() // we peek just to see if we can move forward
|
||||
ch = s.scanMantissa(ch)
|
||||
}
|
||||
return ch
|
||||
}
|
||||
|
||||
// scanExponent scans the remaining parts of an exponent after the 'e' or 'E'
|
||||
// rune.
|
||||
func (s *Scanner) scanExponent(ch rune) rune {
|
||||
if ch == 'e' || ch == 'E' {
|
||||
ch = s.next()
|
||||
if ch == '-' || ch == '+' {
|
||||
ch = s.next()
|
||||
}
|
||||
ch = s.scanMantissa(ch)
|
||||
}
|
||||
return ch
|
||||
}
|
||||
|
||||
// scanString scans a quoted string
|
||||
func (s *Scanner) scanString() {
|
||||
braces := 0
|
||||
for {
|
||||
// '"' opening already consumed
|
||||
// read character after quote
|
||||
ch := s.next()
|
||||
|
||||
if ch == '\n' || ch < 0 || ch == eof {
|
||||
s.err("literal not terminated")
|
||||
return
|
||||
}
|
||||
|
||||
if ch == '"' && braces == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
// If we're going into a ${} then we can ignore quotes for awhile
|
||||
if braces == 0 && ch == '$' && s.peek() == '{' {
|
||||
braces++
|
||||
s.next()
|
||||
} else if braces > 0 && ch == '{' {
|
||||
braces++
|
||||
}
|
||||
if braces > 0 && ch == '}' {
|
||||
braces--
|
||||
}
|
||||
|
||||
if ch == '\\' {
|
||||
s.scanEscape()
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// scanEscape scans an escape sequence
|
||||
func (s *Scanner) scanEscape() rune {
|
||||
// http://en.cppreference.com/w/cpp/language/escape
|
||||
ch := s.next() // read character after '/'
|
||||
switch ch {
|
||||
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
|
||||
// nothing to do
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7':
|
||||
// octal notation
|
||||
ch = s.scanDigits(ch, 8, 3)
|
||||
case 'x':
|
||||
// hexademical notation
|
||||
ch = s.scanDigits(s.next(), 16, 2)
|
||||
case 'u':
|
||||
// universal character name
|
||||
ch = s.scanDigits(s.next(), 16, 4)
|
||||
case 'U':
|
||||
// universal character name
|
||||
ch = s.scanDigits(s.next(), 16, 8)
|
||||
default:
|
||||
s.err("illegal char escape")
|
||||
}
|
||||
return ch
|
||||
}
|
||||
|
||||
// scanDigits scans a rune with the given base for n times. For example an
|
||||
// octal notation \184 would yield in scanDigits(ch, 8, 3)
|
||||
func (s *Scanner) scanDigits(ch rune, base, n int) rune {
|
||||
for n > 0 && digitVal(ch) < base {
|
||||
ch = s.next()
|
||||
n--
|
||||
}
|
||||
if n > 0 {
|
||||
s.err("illegal char escape")
|
||||
}
|
||||
|
||||
// we scanned all digits, put the last non digit char back
|
||||
s.unread()
|
||||
return ch
|
||||
}
|
||||
|
||||
// scanIdentifier scans an identifier and returns the literal string
|
||||
func (s *Scanner) scanIdentifier() string {
|
||||
offs := s.srcPos.Offset - s.lastCharLen
|
||||
ch := s.next()
|
||||
for isLetter(ch) || isDigit(ch) || ch == '-' {
|
||||
ch = s.next()
|
||||
}
|
||||
|
||||
if ch != eof {
|
||||
s.unread() // we got identifier, put back latest char
|
||||
}
|
||||
|
||||
return string(s.src[offs:s.srcPos.Offset])
|
||||
}
|
||||
|
||||
// recentPosition returns the position of the character immediately after the
|
||||
// character or token returned by the last call to Scan.
|
||||
func (s *Scanner) recentPosition() (pos token.Pos) {
|
||||
pos.Offset = s.srcPos.Offset - s.lastCharLen
|
||||
switch {
|
||||
case s.srcPos.Column > 0:
|
||||
// common case: last character was not a '\n'
|
||||
pos.Line = s.srcPos.Line
|
||||
pos.Column = s.srcPos.Column
|
||||
case s.lastLineLen > 0:
|
||||
// last character was a '\n'
|
||||
// (we cannot be at the beginning of the source
|
||||
// since we have called next() at least once)
|
||||
pos.Line = s.srcPos.Line - 1
|
||||
pos.Column = s.lastLineLen
|
||||
default:
|
||||
// at the beginning of the source
|
||||
pos.Line = 1
|
||||
pos.Column = 1
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// err prints the error of any scanning to s.Error function. If the function is
|
||||
// not defined, by default it prints them to os.Stderr
|
||||
func (s *Scanner) err(msg string) {
|
||||
s.ErrorCount++
|
||||
pos := s.recentPosition()
|
||||
|
||||
if s.Error != nil {
|
||||
s.Error(pos, msg)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
|
||||
}
|
||||
|
||||
// isHexadecimal returns true if the given rune is a letter
|
||||
func isLetter(ch rune) bool {
|
||||
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
|
||||
}
|
||||
|
||||
// isHexadecimal returns true if the given rune is a decimal digit
|
||||
func isDigit(ch rune) bool {
|
||||
return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
|
||||
}
|
||||
|
||||
// isHexadecimal returns true if the given rune is a decimal number
|
||||
func isDecimal(ch rune) bool {
|
||||
return '0' <= ch && ch <= '9'
|
||||
}
|
||||
|
||||
// isHexadecimal returns true if the given rune is an hexadecimal number
|
||||
func isHexadecimal(ch rune) bool {
|
||||
return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
|
||||
}
|
||||
|
||||
// isWhitespace returns true if the rune is a space, tab, newline or carriage return
|
||||
func isWhitespace(ch rune) bool {
|
||||
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
|
||||
}
|
||||
|
||||
// digitVal returns the integer value of a given octal,decimal or hexadecimal rune
|
||||
func digitVal(ch rune) int {
|
||||
switch {
|
||||
case '0' <= ch && ch <= '9':
|
||||
return int(ch - '0')
|
||||
case 'a' <= ch && ch <= 'f':
|
||||
return int(ch - 'a' + 10)
|
||||
case 'A' <= ch && ch <= 'F':
|
||||
return int(ch - 'A' + 10)
|
||||
}
|
||||
return 16 // larger than any legal digit val
|
||||
}
|
411
json/scanner/scanner_test.go
Normal file
411
json/scanner/scanner_test.go
Normal file
@ -0,0 +1,411 @@
|
||||
package scanner
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/hcl/json/token"
|
||||
)
|
||||
|
||||
var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
|
||||
|
||||
type tokenPair struct {
|
||||
tok token.Type
|
||||
text string
|
||||
}
|
||||
|
||||
var tokenLists = map[string][]tokenPair{
|
||||
"operator": []tokenPair{
|
||||
{token.LBRACK, "["},
|
||||
{token.LBRACE, "{"},
|
||||
{token.COMMA, ","},
|
||||
{token.PERIOD, "."},
|
||||
{token.RBRACK, "]"},
|
||||
{token.RBRACE, "}"},
|
||||
},
|
||||
"bool": []tokenPair{
|
||||
{token.BOOL, "true"},
|
||||
{token.BOOL, "false"},
|
||||
},
|
||||
"string": []tokenPair{
|
||||
{token.STRING, `" "`},
|
||||
{token.STRING, `"a"`},
|
||||
{token.STRING, `"本"`},
|
||||
{token.STRING, `"${file("foo")}"`},
|
||||
{token.STRING, `"\a"`},
|
||||
{token.STRING, `"\b"`},
|
||||
{token.STRING, `"\f"`},
|
||||
{token.STRING, `"\n"`},
|
||||
{token.STRING, `"\r"`},
|
||||
{token.STRING, `"\t"`},
|
||||
{token.STRING, `"\v"`},
|
||||
{token.STRING, `"\""`},
|
||||
{token.STRING, `"\000"`},
|
||||
{token.STRING, `"\777"`},
|
||||
{token.STRING, `"\x00"`},
|
||||
{token.STRING, `"\xff"`},
|
||||
{token.STRING, `"\u0000"`},
|
||||
{token.STRING, `"\ufA16"`},
|
||||
{token.STRING, `"\U00000000"`},
|
||||
{token.STRING, `"\U0000ffAB"`},
|
||||
{token.STRING, `"` + f100 + `"`},
|
||||
},
|
||||
"number": []tokenPair{
|
||||
{token.NUMBER, "0"},
|
||||
{token.NUMBER, "1"},
|
||||
{token.NUMBER, "9"},
|
||||
{token.NUMBER, "42"},
|
||||
{token.NUMBER, "1234567890"},
|
||||
{token.NUMBER, "-0"},
|
||||
{token.NUMBER, "-1"},
|
||||
{token.NUMBER, "-9"},
|
||||
{token.NUMBER, "-42"},
|
||||
{token.NUMBER, "-1234567890"},
|
||||
},
|
||||
"float": []tokenPair{
|
||||
{token.FLOAT, "0."},
|
||||
{token.FLOAT, "1."},
|
||||
{token.FLOAT, "42."},
|
||||
{token.FLOAT, "01234567890."},
|
||||
{token.FLOAT, ".0"},
|
||||
{token.FLOAT, ".1"},
|
||||
{token.FLOAT, ".42"},
|
||||
{token.FLOAT, ".0123456789"},
|
||||
{token.FLOAT, "0.0"},
|
||||
{token.FLOAT, "1.0"},
|
||||
{token.FLOAT, "42.0"},
|
||||
{token.FLOAT, "01234567890.0"},
|
||||
{token.FLOAT, "0e0"},
|
||||
{token.FLOAT, "1e0"},
|
||||
{token.FLOAT, "42e0"},
|
||||
{token.FLOAT, "01234567890e0"},
|
||||
{token.FLOAT, "0E0"},
|
||||
{token.FLOAT, "1E0"},
|
||||
{token.FLOAT, "42E0"},
|
||||
{token.FLOAT, "01234567890E0"},
|
||||
{token.FLOAT, "0e+10"},
|
||||
{token.FLOAT, "1e-10"},
|
||||
{token.FLOAT, "42e+10"},
|
||||
{token.FLOAT, "01234567890e-10"},
|
||||
{token.FLOAT, "0E+10"},
|
||||
{token.FLOAT, "1E-10"},
|
||||
{token.FLOAT, "42E+10"},
|
||||
{token.FLOAT, "01234567890E-10"},
|
||||
{token.FLOAT, "01.8e0"},
|
||||
{token.FLOAT, "1.4e0"},
|
||||
{token.FLOAT, "42.2e0"},
|
||||
{token.FLOAT, "01234567890.12e0"},
|
||||
{token.FLOAT, "0.E0"},
|
||||
{token.FLOAT, "1.12E0"},
|
||||
{token.FLOAT, "42.123E0"},
|
||||
{token.FLOAT, "01234567890.213E0"},
|
||||
{token.FLOAT, "0.2e+10"},
|
||||
{token.FLOAT, "1.2e-10"},
|
||||
{token.FLOAT, "42.54e+10"},
|
||||
{token.FLOAT, "01234567890.98e-10"},
|
||||
{token.FLOAT, "0.1E+10"},
|
||||
{token.FLOAT, "1.1E-10"},
|
||||
{token.FLOAT, "42.1E+10"},
|
||||
{token.FLOAT, "01234567890.1E-10"},
|
||||
{token.FLOAT, "-0.0"},
|
||||
{token.FLOAT, "-1.0"},
|
||||
{token.FLOAT, "-42.0"},
|
||||
{token.FLOAT, "-01234567890.0"},
|
||||
{token.FLOAT, "-0e0"},
|
||||
{token.FLOAT, "-1e0"},
|
||||
{token.FLOAT, "-42e0"},
|
||||
{token.FLOAT, "-01234567890e0"},
|
||||
{token.FLOAT, "-0E0"},
|
||||
{token.FLOAT, "-1E0"},
|
||||
{token.FLOAT, "-42E0"},
|
||||
{token.FLOAT, "-01234567890E0"},
|
||||
{token.FLOAT, "-0e+10"},
|
||||
{token.FLOAT, "-1e-10"},
|
||||
{token.FLOAT, "-42e+10"},
|
||||
{token.FLOAT, "-01234567890e-10"},
|
||||
{token.FLOAT, "-0E+10"},
|
||||
{token.FLOAT, "-1E-10"},
|
||||
{token.FLOAT, "-42E+10"},
|
||||
{token.FLOAT, "-01234567890E-10"},
|
||||
{token.FLOAT, "-01.8e0"},
|
||||
{token.FLOAT, "-1.4e0"},
|
||||
{token.FLOAT, "-42.2e0"},
|
||||
{token.FLOAT, "-01234567890.12e0"},
|
||||
{token.FLOAT, "-0.E0"},
|
||||
{token.FLOAT, "-1.12E0"},
|
||||
{token.FLOAT, "-42.123E0"},
|
||||
{token.FLOAT, "-01234567890.213E0"},
|
||||
{token.FLOAT, "-0.2e+10"},
|
||||
{token.FLOAT, "-1.2e-10"},
|
||||
{token.FLOAT, "-42.54e+10"},
|
||||
{token.FLOAT, "-01234567890.98e-10"},
|
||||
{token.FLOAT, "-0.1E+10"},
|
||||
{token.FLOAT, "-1.1E-10"},
|
||||
{token.FLOAT, "-42.1E+10"},
|
||||
{token.FLOAT, "-01234567890.1E-10"},
|
||||
},
|
||||
}
|
||||
|
||||
var orderedTokenLists = []string{
|
||||
"comment",
|
||||
"operator",
|
||||
"bool",
|
||||
"string",
|
||||
"number",
|
||||
"float",
|
||||
}
|
||||
|
||||
func TestPosition(t *testing.T) {
|
||||
// create artifical source code
|
||||
buf := new(bytes.Buffer)
|
||||
|
||||
for _, listName := range orderedTokenLists {
|
||||
for _, ident := range tokenLists[listName] {
|
||||
fmt.Fprintf(buf, "\t\t\t\t%s\n", ident.text)
|
||||
}
|
||||
}
|
||||
|
||||
s := New(buf.Bytes())
|
||||
|
||||
pos := token.Pos{"", 4, 1, 5}
|
||||
s.Scan()
|
||||
for _, listName := range orderedTokenLists {
|
||||
|
||||
for _, k := range tokenLists[listName] {
|
||||
curPos := s.tokPos
|
||||
// fmt.Printf("[%q] s = %+v:%+v\n", k.text, curPos.Offset, curPos.Column)
|
||||
|
||||
if curPos.Offset != pos.Offset {
|
||||
t.Fatalf("offset = %d, want %d for %q", curPos.Offset, pos.Offset, k.text)
|
||||
}
|
||||
if curPos.Line != pos.Line {
|
||||
t.Fatalf("line = %d, want %d for %q", curPos.Line, pos.Line, k.text)
|
||||
}
|
||||
if curPos.Column != pos.Column {
|
||||
t.Fatalf("column = %d, want %d for %q", curPos.Column, pos.Column, k.text)
|
||||
}
|
||||
pos.Offset += 4 + len(k.text) + 1 // 4 tabs + token bytes + newline
|
||||
pos.Line += countNewlines(k.text) + 1 // each token is on a new line
|
||||
|
||||
s.Error = func(pos token.Pos, msg string) {
|
||||
t.Errorf("error %q for %q", msg, k.text)
|
||||
}
|
||||
|
||||
s.Scan()
|
||||
}
|
||||
}
|
||||
// make sure there were no token-internal errors reported by scanner
|
||||
if s.ErrorCount != 0 {
|
||||
t.Errorf("%d errors", s.ErrorCount)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComment(t *testing.T) {
|
||||
testTokenList(t, tokenLists["comment"])
|
||||
}
|
||||
|
||||
func TestOperator(t *testing.T) {
|
||||
testTokenList(t, tokenLists["operator"])
|
||||
}
|
||||
|
||||
func TestBool(t *testing.T) {
|
||||
testTokenList(t, tokenLists["bool"])
|
||||
}
|
||||
|
||||
func TestIdent(t *testing.T) {
|
||||
testTokenList(t, tokenLists["ident"])
|
||||
}
|
||||
|
||||
func TestString(t *testing.T) {
|
||||
testTokenList(t, tokenLists["string"])
|
||||
}
|
||||
|
||||
func TestNumber(t *testing.T) {
|
||||
testTokenList(t, tokenLists["number"])
|
||||
}
|
||||
|
||||
func TestFloat(t *testing.T) {
|
||||
testTokenList(t, tokenLists["float"])
|
||||
}
|
||||
|
||||
/*
|
||||
func TestRealExample(t *testing.T) {
|
||||
complexHCL := `// This comes from Terraform, as a test
|
||||
variable "foo" {
|
||||
default = "bar"
|
||||
description = "bar"
|
||||
}
|
||||
|
||||
provider "aws" {
|
||||
access_key = "foo"
|
||||
secret_key = "bar"
|
||||
}
|
||||
|
||||
resource "aws_security_group" "firewall" {
|
||||
count = 5
|
||||
}
|
||||
|
||||
resource aws_instance "web" {
|
||||
ami = "${var.foo}"
|
||||
security_groups = [
|
||||
"foo",
|
||||
"${aws_security_group.firewall.foo}"
|
||||
]
|
||||
|
||||
network_interface {
|
||||
device_index = 0
|
||||
description = "Main network interface"
|
||||
}
|
||||
}`
|
||||
|
||||
literals := []struct {
|
||||
tokenType token.Type
|
||||
literal string
|
||||
}{
|
||||
{token.COMMENT, `// This comes from Terraform, as a test`},
|
||||
{token.IDENT, `variable`},
|
||||
{token.STRING, `"foo"`},
|
||||
{token.LBRACE, `{`},
|
||||
{token.IDENT, `default`},
|
||||
{token.ASSIGN, `=`},
|
||||
{token.STRING, `"bar"`},
|
||||
{token.IDENT, `description`},
|
||||
{token.ASSIGN, `=`},
|
||||
{token.STRING, `"bar"`},
|
||||
{token.RBRACE, `}`},
|
||||
{token.IDENT, `provider`},
|
||||
{token.STRING, `"aws"`},
|
||||
{token.LBRACE, `{`},
|
||||
{token.IDENT, `access_key`},
|
||||
{token.ASSIGN, `=`},
|
||||
{token.STRING, `"foo"`},
|
||||
{token.IDENT, `secret_key`},
|
||||
{token.ASSIGN, `=`},
|
||||
{token.STRING, `"bar"`},
|
||||
{token.RBRACE, `}`},
|
||||
{token.IDENT, `resource`},
|
||||
{token.STRING, `"aws_security_group"`},
|
||||
{token.STRING, `"firewall"`},
|
||||
{token.LBRACE, `{`},
|
||||
{token.IDENT, `count`},
|
||||
{token.ASSIGN, `=`},
|
||||
{token.NUMBER, `5`},
|
||||
{token.RBRACE, `}`},
|
||||
{token.IDENT, `resource`},
|
||||
{token.IDENT, `aws_instance`},
|
||||
{token.STRING, `"web"`},
|
||||
{token.LBRACE, `{`},
|
||||
{token.IDENT, `ami`},
|
||||
{token.ASSIGN, `=`},
|
||||
{token.STRING, `"${var.foo}"`},
|
||||
{token.IDENT, `security_groups`},
|
||||
{token.ASSIGN, `=`},
|
||||
{token.LBRACK, `[`},
|
||||
{token.STRING, `"foo"`},
|
||||
{token.COMMA, `,`},
|
||||
{token.STRING, `"${aws_security_group.firewall.foo}"`},
|
||||
{token.RBRACK, `]`},
|
||||
{token.IDENT, `network_interface`},
|
||||
{token.LBRACE, `{`},
|
||||
{token.IDENT, `device_index`},
|
||||
{token.ASSIGN, `=`},
|
||||
{token.NUMBER, `0`},
|
||||
{token.IDENT, `description`},
|
||||
{token.ASSIGN, `=`},
|
||||
{token.STRING, `"Main network interface"`},
|
||||
{token.RBRACE, `}`},
|
||||
{token.RBRACE, `}`},
|
||||
{token.EOF, ``},
|
||||
}
|
||||
|
||||
s := New([]byte(complexHCL))
|
||||
for _, l := range literals {
|
||||
tok := s.Scan()
|
||||
if l.tokenType != tok.Type {
|
||||
t.Errorf("got: %s want %s for %s\n", tok, l.tokenType, tok.String())
|
||||
}
|
||||
|
||||
if l.literal != tok.Text {
|
||||
t.Errorf("got: %s want %s\n", tok, l.literal)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
*/
|
||||
|
||||
func TestError(t *testing.T) {
|
||||
testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
|
||||
testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
|
||||
|
||||
testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING)
|
||||
testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING)
|
||||
|
||||
testError(t, `01238`, "1:7", "numbers cannot start with 0", token.NUMBER)
|
||||
testError(t, `01238123`, "1:10", "numbers cannot start with 0", token.NUMBER)
|
||||
testError(t, `'aa'`, "1:1", "illegal char", token.ILLEGAL)
|
||||
|
||||
testError(t, `"`, "1:2", "literal not terminated", token.STRING)
|
||||
testError(t, `"abc`, "1:5", "literal not terminated", token.STRING)
|
||||
testError(t, `"abc`+"\n", "1:5", "literal not terminated", token.STRING)
|
||||
}
|
||||
|
||||
func testError(t *testing.T, src, pos, msg string, tok token.Type) {
|
||||
s := New([]byte(src))
|
||||
|
||||
errorCalled := false
|
||||
s.Error = func(p token.Pos, m string) {
|
||||
if !errorCalled {
|
||||
if pos != p.String() {
|
||||
t.Errorf("pos = %q, want %q for %q", p, pos, src)
|
||||
}
|
||||
|
||||
if m != msg {
|
||||
t.Errorf("msg = %q, want %q for %q", m, msg, src)
|
||||
}
|
||||
errorCalled = true
|
||||
}
|
||||
}
|
||||
|
||||
tk := s.Scan()
|
||||
if tk.Type != tok {
|
||||
t.Errorf("tok = %s, want %s for %q", tk, tok, src)
|
||||
}
|
||||
if !errorCalled {
|
||||
t.Errorf("error handler not called for %q", src)
|
||||
}
|
||||
if s.ErrorCount == 0 {
|
||||
t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src)
|
||||
}
|
||||
}
|
||||
|
||||
func testTokenList(t *testing.T, tokenList []tokenPair) {
|
||||
// create artifical source code
|
||||
buf := new(bytes.Buffer)
|
||||
for _, ident := range tokenList {
|
||||
fmt.Fprintf(buf, "%s\n", ident.text)
|
||||
}
|
||||
|
||||
s := New(buf.Bytes())
|
||||
for _, ident := range tokenList {
|
||||
tok := s.Scan()
|
||||
if tok.Type != ident.tok {
|
||||
t.Errorf("tok = %q want %q for %q\n", tok, ident.tok, ident.text)
|
||||
}
|
||||
|
||||
if tok.Text != ident.text {
|
||||
t.Errorf("text = %q want %q", tok.String(), ident.text)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func countNewlines(s string) int {
|
||||
n := 0
|
||||
for _, ch := range s {
|
||||
if ch == '\n' {
|
||||
n++
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
46
json/token/position.go
Normal file
46
json/token/position.go
Normal file
@ -0,0 +1,46 @@
|
||||
package token
|
||||
|
||||
import "fmt"
|
||||
|
||||
// Pos describes an arbitrary source position
|
||||
// including the file, line, and column location.
|
||||
// A Position is valid if the line number is > 0.
|
||||
type Pos struct {
|
||||
Filename string // filename, if any
|
||||
Offset int // offset, starting at 0
|
||||
Line int // line number, starting at 1
|
||||
Column int // column number, starting at 1 (character count)
|
||||
}
|
||||
|
||||
// IsValid returns true if the position is valid.
|
||||
func (p *Pos) IsValid() bool { return p.Line > 0 }
|
||||
|
||||
// String returns a string in one of several forms:
|
||||
//
|
||||
// file:line:column valid position with file name
|
||||
// line:column valid position without file name
|
||||
// file invalid position with file name
|
||||
// - invalid position without file name
|
||||
func (p Pos) String() string {
|
||||
s := p.Filename
|
||||
if p.IsValid() {
|
||||
if s != "" {
|
||||
s += ":"
|
||||
}
|
||||
s += fmt.Sprintf("%d:%d", p.Line, p.Column)
|
||||
}
|
||||
if s == "" {
|
||||
s = "-"
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Before reports whether the position p is before u.
|
||||
func (p Pos) Before(u Pos) bool {
|
||||
return u.Offset > p.Offset || u.Line > p.Line
|
||||
}
|
||||
|
||||
// After reports whether the position p is after u.
|
||||
func (p Pos) After(u Pos) bool {
|
||||
return u.Offset < p.Offset || u.Line < p.Line
|
||||
}
|
139
json/token/token.go
Normal file
139
json/token/token.go
Normal file
@ -0,0 +1,139 @@
|
||||
package token
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
hclstrconv "github.com/hashicorp/hcl/hcl/strconv"
|
||||
)
|
||||
|
||||
// Token defines a single HCL token which can be obtained via the Scanner
|
||||
type Token struct {
|
||||
Type Type
|
||||
Pos Pos
|
||||
Text string
|
||||
}
|
||||
|
||||
// Type is the set of lexical tokens of the HCL (HashiCorp Configuration Language)
|
||||
type Type int
|
||||
|
||||
const (
|
||||
// Special tokens
|
||||
ILLEGAL Type = iota
|
||||
EOF
|
||||
|
||||
identifier_beg
|
||||
literal_beg
|
||||
NUMBER // 12345
|
||||
FLOAT // 123.45
|
||||
BOOL // true,false
|
||||
STRING // "abc"
|
||||
NULL // null
|
||||
literal_end
|
||||
identifier_end
|
||||
|
||||
operator_beg
|
||||
LBRACK // [
|
||||
LBRACE // {
|
||||
COMMA // ,
|
||||
PERIOD // .
|
||||
|
||||
RBRACK // ]
|
||||
RBRACE // }
|
||||
|
||||
operator_end
|
||||
)
|
||||
|
||||
var tokens = [...]string{
|
||||
ILLEGAL: "ILLEGAL",
|
||||
|
||||
EOF: "EOF",
|
||||
|
||||
NUMBER: "NUMBER",
|
||||
FLOAT: "FLOAT",
|
||||
BOOL: "BOOL",
|
||||
STRING: "STRING",
|
||||
NULL: "NULL",
|
||||
|
||||
LBRACK: "LBRACK",
|
||||
LBRACE: "LBRACE",
|
||||
COMMA: "COMMA",
|
||||
PERIOD: "PERIOD",
|
||||
|
||||
RBRACK: "RBRACK",
|
||||
RBRACE: "RBRACE",
|
||||
}
|
||||
|
||||
// String returns the string corresponding to the token tok.
|
||||
func (t Type) String() string {
|
||||
s := ""
|
||||
if 0 <= t && t < Type(len(tokens)) {
|
||||
s = tokens[t]
|
||||
}
|
||||
if s == "" {
|
||||
s = "token(" + strconv.Itoa(int(t)) + ")"
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// IsIdentifier returns true for tokens corresponding to identifiers and basic
|
||||
// type literals; it returns false otherwise.
|
||||
func (t Type) IsIdentifier() bool { return identifier_beg < t && t < identifier_end }
|
||||
|
||||
// IsLiteral returns true for tokens corresponding to basic type literals; it
|
||||
// returns false otherwise.
|
||||
func (t Type) IsLiteral() bool { return literal_beg < t && t < literal_end }
|
||||
|
||||
// IsOperator returns true for tokens corresponding to operators and
|
||||
// delimiters; it returns false otherwise.
|
||||
func (t Type) IsOperator() bool { return operator_beg < t && t < operator_end }
|
||||
|
||||
// String returns the token's literal text. Note that this is only
|
||||
// applicable for certain token types, such as token.IDENT,
|
||||
// token.STRING, etc..
|
||||
func (t Token) String() string {
|
||||
return fmt.Sprintf("%s %s %s", t.Pos.String(), t.Type.String(), t.Text)
|
||||
}
|
||||
|
||||
// Value returns the properly typed value for this token. The type of
|
||||
// the returned interface{} is guaranteed based on the Type field.
|
||||
//
|
||||
// This can only be called for literal types. If it is called for any other
|
||||
// type, this will panic.
|
||||
func (t Token) Value() interface{} {
|
||||
switch t.Type {
|
||||
case BOOL:
|
||||
if t.Text == "true" {
|
||||
return true
|
||||
} else if t.Text == "false" {
|
||||
return false
|
||||
}
|
||||
|
||||
panic("unknown bool value: " + t.Text)
|
||||
case FLOAT:
|
||||
v, err := strconv.ParseFloat(t.Text, 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return float64(v)
|
||||
case NULL:
|
||||
return nil
|
||||
case NUMBER:
|
||||
v, err := strconv.ParseInt(t.Text, 0, 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return int64(v)
|
||||
case STRING:
|
||||
v, err := hclstrconv.Unquote(t.Text)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("unquote %s err: %s", t.Text, err))
|
||||
}
|
||||
|
||||
return v
|
||||
default:
|
||||
panic(fmt.Sprintf("unimplemented Value for type: %s", t.Type))
|
||||
}
|
||||
}
|
57
json/token/token_test.go
Normal file
57
json/token/token_test.go
Normal file
@ -0,0 +1,57 @@
|
||||
package token
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTypeString(t *testing.T) {
|
||||
var tokens = []struct {
|
||||
tt Type
|
||||
str string
|
||||
}{
|
||||
{ILLEGAL, "ILLEGAL"},
|
||||
{EOF, "EOF"},
|
||||
{NUMBER, "NUMBER"},
|
||||
{FLOAT, "FLOAT"},
|
||||
{BOOL, "BOOL"},
|
||||
{STRING, "STRING"},
|
||||
{NULL, "NULL"},
|
||||
{LBRACK, "LBRACK"},
|
||||
{LBRACE, "LBRACE"},
|
||||
{COMMA, "COMMA"},
|
||||
{PERIOD, "PERIOD"},
|
||||
{RBRACK, "RBRACK"},
|
||||
{RBRACE, "RBRACE"},
|
||||
}
|
||||
|
||||
for _, token := range tokens {
|
||||
if token.tt.String() != token.str {
|
||||
t.Errorf("want: %q got:%q\n", token.str, token.tt)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestTokenValue(t *testing.T) {
|
||||
var tokens = []struct {
|
||||
tt Token
|
||||
v interface{}
|
||||
}{
|
||||
{Token{Type: BOOL, Text: `true`}, true},
|
||||
{Token{Type: BOOL, Text: `false`}, false},
|
||||
{Token{Type: FLOAT, Text: `3.14`}, float64(3.14)},
|
||||
{Token{Type: NULL, Text: `null`}, nil},
|
||||
{Token{Type: NUMBER, Text: `42`}, int64(42)},
|
||||
{Token{Type: STRING, Text: `"foo"`}, "foo"},
|
||||
{Token{Type: STRING, Text: `"foo\nbar"`}, "foo\nbar"},
|
||||
}
|
||||
|
||||
for _, token := range tokens {
|
||||
if val := token.tt.Value(); !reflect.DeepEqual(val, token.v) {
|
||||
t.Errorf("want: %v got:%v\n", token.v, val)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user