lexMode detects JSON

This commit is contained in:
Mitchell Hashimoto 2014-08-01 12:54:53 -07:00
parent 9fee7c831b
commit e0d7c7b378
4 changed files with 367 additions and 303 deletions

256
lex.go
View File

@ -1,257 +1,31 @@
package hcl
import (
"bytes"
"fmt"
"strconv"
"unicode"
"unicode/utf8"
)
// The parser expects the lexer to return 0 on EOF.
const lexEOF = 0
type lexModeValue byte
// The parser uses the type <prefix>Lex as a lexer. It must provide
// the methods Lex(*<prefix>SymType) int and Error(string).
type hclLex struct {
Input string
const (
lexModeUnknown lexModeValue = iota
lexModeHcl
lexModeJson
)
pos int
width int
col, line int
err error
}
// The parser calls this method to get each new token.
func (x *hclLex) Lex(yylval *hclSymType) int {
for {
c := x.next()
if c == lexEOF {
return lexEOF
}
// Ignore all whitespace except a newline which we handle
// specially later.
if unicode.IsSpace(c) {
// lexMode returns whether we're going to be parsing in JSON
// mode or HCL mode.
func lexMode(v string) lexModeValue {
for _, r := range v {
if unicode.IsSpace(r) {
continue
}
// Consume all comments
switch c {
case '#':
fallthrough
case '/':
// Starting comment
if !x.consumeComment(c) {
return lexEOF
}
continue
}
// If it is a number, lex the number
if c >= '0' && c <= '9' {
x.backup()
return x.lexNumber(yylval)
}
switch c {
case ',':
return COMMA
case '=':
return EQUAL
case '[':
return LEFTBRACKET
case ']':
return RIGHTBRACKET
case '{':
return LEFTBRACE
case '}':
return RIGHTBRACE
case '"':
return x.lexString(yylval)
default:
x.backup()
return x.lexId(yylval)
}
}
}
func (x *hclLex) consumeComment(c rune) bool {
single := c == '#'
if !single {
c = x.next()
if c != '/' && c != '*' {
x.backup()
x.createErr(fmt.Sprintf("comment expected, got '%c'", c))
return false
}
single = c == '/'
}
nested := 1
for {
c = x.next()
if c == lexEOF {
x.backup()
return true
}
// Single line comments continue until a '\n'
if single {
if c == '\n' {
return true
}
continue
}
// Multi-line comments continue until a '*/'
switch c {
case '/':
c = x.next()
if c == '*' {
nested++
if r == '{' {
return lexModeJson
} else {
x.backup()
return lexModeHcl
}
case '*':
c = x.next()
if c == '/' {
nested--
} else {
x.backup()
}
default:
// Continue
}
// If we're done with the comment, return!
if nested == 0 {
return true
}
}
}
// lexId lexes an identifier
func (x *hclLex) lexId(yylval *hclSymType) int {
var b bytes.Buffer
for {
c := x.next()
if c == lexEOF {
break
}
if unicode.IsSpace(c) {
x.backup()
break
}
if _, err := b.WriteRune(c); err != nil {
return lexEOF
}
}
yylval.str = b.String()
return IDENTIFIER
}
// lexNumber lexes out a number
func (x *hclLex) lexNumber(yylval *hclSymType) int {
var b bytes.Buffer
for {
c := x.next()
if c == lexEOF {
break
}
// No more numeric characters
if c < '0' || c > '9' {
x.backup()
break
}
if _, err := b.WriteRune(c); err != nil {
x.createErr(fmt.Sprintf("Internal error: %s", err))
return lexEOF
}
}
v, err := strconv.ParseInt(b.String(), 0, 0)
if err != nil {
x.createErr(fmt.Sprintf("Expected number: %s", err))
return lexEOF
}
yylval.num = int(v)
return NUMBER
}
// lexString extracts a string from the input
func (x *hclLex) lexString(yylval *hclSymType) int {
var b bytes.Buffer
for {
c := x.next()
if c == lexEOF {
break
}
// String end
if c == '"' {
break
}
if _, err := b.WriteRune(c); err != nil {
return lexEOF
}
}
yylval.str = b.String()
return STRING
}
// Return the next rune for the lexer.
func (x *hclLex) next() rune {
if int(x.pos) >= len(x.Input) {
x.width = 0
return lexEOF
}
r, w := utf8.DecodeRuneInString(x.Input[x.pos:])
x.width = w
x.pos += x.width
x.col += 1
if x.line == 0 {
x.line = 1
}
if r == '\n' {
x.line += 1
x.col = 0
}
return r
}
// peek returns but does not consume the next rune in the input
func (x *hclLex) peek() rune {
r := x.next()
x.backup()
return r
}
// backup steps back one rune. Can only be called once per next.
func (x *hclLex) backup() {
x.col -= 1
x.pos -= x.width
}
// createErr records the given error
func (x *hclLex) createErr(msg string) {
x.err = fmt.Errorf("Line %d, column %d: %s", x.line, x.col, msg)
}
// The parser calls this method on a parse error.
func (x *hclLex) Error(s string) {
x.createErr(s)
return lexModeUnknown
}

257
lex_hcl.go Normal file
View File

@ -0,0 +1,257 @@
package hcl
import (
"bytes"
"fmt"
"strconv"
"unicode"
"unicode/utf8"
)
// The parser expects the lexer to return 0 on EOF.
const lexEOF = 0
// The parser uses the type <prefix>Lex as a lexer. It must provide
// the methods Lex(*<prefix>SymType) int and Error(string).
type hclLex struct {
Input string
pos int
width int
col, line int
err error
}
// The parser calls this method to get each new token.
func (x *hclLex) Lex(yylval *hclSymType) int {
for {
c := x.next()
if c == lexEOF {
return lexEOF
}
// Ignore all whitespace except a newline which we handle
// specially later.
if unicode.IsSpace(c) {
continue
}
// Consume all comments
switch c {
case '#':
fallthrough
case '/':
// Starting comment
if !x.consumeComment(c) {
return lexEOF
}
continue
}
// If it is a number, lex the number
if c >= '0' && c <= '9' {
x.backup()
return x.lexNumber(yylval)
}
switch c {
case ',':
return COMMA
case '=':
return EQUAL
case '[':
return LEFTBRACKET
case ']':
return RIGHTBRACKET
case '{':
return LEFTBRACE
case '}':
return RIGHTBRACE
case '"':
return x.lexString(yylval)
default:
x.backup()
return x.lexId(yylval)
}
}
}
func (x *hclLex) consumeComment(c rune) bool {
single := c == '#'
if !single {
c = x.next()
if c != '/' && c != '*' {
x.backup()
x.createErr(fmt.Sprintf("comment expected, got '%c'", c))
return false
}
single = c == '/'
}
nested := 1
for {
c = x.next()
if c == lexEOF {
x.backup()
return true
}
// Single line comments continue until a '\n'
if single {
if c == '\n' {
return true
}
continue
}
// Multi-line comments continue until a '*/'
switch c {
case '/':
c = x.next()
if c == '*' {
nested++
} else {
x.backup()
}
case '*':
c = x.next()
if c == '/' {
nested--
} else {
x.backup()
}
default:
// Continue
}
// If we're done with the comment, return!
if nested == 0 {
return true
}
}
}
// lexId lexes an identifier
func (x *hclLex) lexId(yylval *hclSymType) int {
var b bytes.Buffer
for {
c := x.next()
if c == lexEOF {
break
}
if unicode.IsSpace(c) {
x.backup()
break
}
if _, err := b.WriteRune(c); err != nil {
return lexEOF
}
}
yylval.str = b.String()
return IDENTIFIER
}
// lexNumber lexes out a number
func (x *hclLex) lexNumber(yylval *hclSymType) int {
var b bytes.Buffer
for {
c := x.next()
if c == lexEOF {
break
}
// No more numeric characters
if c < '0' || c > '9' {
x.backup()
break
}
if _, err := b.WriteRune(c); err != nil {
x.createErr(fmt.Sprintf("Internal error: %s", err))
return lexEOF
}
}
v, err := strconv.ParseInt(b.String(), 0, 0)
if err != nil {
x.createErr(fmt.Sprintf("Expected number: %s", err))
return lexEOF
}
yylval.num = int(v)
return NUMBER
}
// lexString extracts a string from the input
func (x *hclLex) lexString(yylval *hclSymType) int {
var b bytes.Buffer
for {
c := x.next()
if c == lexEOF {
break
}
// String end
if c == '"' {
break
}
if _, err := b.WriteRune(c); err != nil {
return lexEOF
}
}
yylval.str = b.String()
return STRING
}
// Return the next rune for the lexer.
func (x *hclLex) next() rune {
if int(x.pos) >= len(x.Input) {
x.width = 0
return lexEOF
}
r, w := utf8.DecodeRuneInString(x.Input[x.pos:])
x.width = w
x.pos += x.width
x.col += 1
if x.line == 0 {
x.line = 1
}
if r == '\n' {
x.line += 1
x.col = 0
}
return r
}
// peek returns but does not consume the next rune in the input
func (x *hclLex) peek() rune {
r := x.next()
x.backup()
return r
}
// backup steps back one rune. Can only be called once per next.
func (x *hclLex) backup() {
x.col -= 1
x.pos -= x.width
}
// createErr records the given error
func (x *hclLex) createErr(msg string) {
x.err = fmt.Errorf("Line %d, column %d: %s", x.line, x.col, msg)
}
// The parser calls this method on a parse error.
func (x *hclLex) Error(s string) {
x.createErr(s)
}

81
lex_hcl_test.go Normal file
View File

@ -0,0 +1,81 @@
package hcl
import (
"io/ioutil"
"path/filepath"
"reflect"
"testing"
)
func TestLex(t *testing.T) {
cases := []struct {
Input string
Output []int
}{
{
"comment.hcl",
[]int{IDENTIFIER, EQUAL, STRING, lexEOF},
},
{
"multiple.hcl",
[]int{
IDENTIFIER, EQUAL, STRING,
IDENTIFIER, EQUAL, NUMBER,
lexEOF,
},
},
{
"list.hcl",
[]int{
IDENTIFIER, EQUAL, LEFTBRACKET,
NUMBER, COMMA, NUMBER, COMMA, STRING,
RIGHTBRACKET, lexEOF,
},
},
{
"structure_basic.hcl",
[]int{
IDENTIFIER, LEFTBRACE,
IDENTIFIER, EQUAL, NUMBER,
RIGHTBRACE, lexEOF,
},
},
{
"structure.hcl",
[]int{
IDENTIFIER, IDENTIFIER, STRING, LEFTBRACE,
IDENTIFIER, EQUAL, NUMBER,
IDENTIFIER, EQUAL, STRING,
RIGHTBRACE, lexEOF,
},
},
}
for _, tc := range cases {
d, err := ioutil.ReadFile(filepath.Join(fixtureDir, tc.Input))
if err != nil {
t.Fatalf("err: %s", err)
}
l := &hclLex{Input: string(d)}
var actual []int
for {
token := l.Lex(new(hclSymType))
actual = append(actual, token)
if token == lexEOF {
break
}
if len(actual) > 500 {
t.Fatalf("Input:%s\n\nExausted.", tc.Input)
}
}
if !reflect.DeepEqual(actual, tc.Output) {
t.Fatalf(
"Input: %s\n\nBad: %#v\n\nExpected: %#v",
tc.Input, actual, tc.Output)
}
}
}

View File

@ -1,81 +1,33 @@
package hcl
import (
"io/ioutil"
"path/filepath"
"reflect"
"testing"
)
func TestLex(t *testing.T) {
func TestLexMode(t *testing.T) {
cases := []struct {
Input string
Output []int
Mode lexModeValue
}{
{
"comment.hcl",
[]int{IDENTIFIER, EQUAL, STRING, lexEOF},
"foo",
lexModeHcl,
},
{
"multiple.hcl",
[]int{
IDENTIFIER, EQUAL, STRING,
IDENTIFIER, EQUAL, NUMBER,
lexEOF,
},
"{}",
lexModeJson,
},
{
"list.hcl",
[]int{
IDENTIFIER, EQUAL, LEFTBRACKET,
NUMBER, COMMA, NUMBER, COMMA, STRING,
RIGHTBRACKET, lexEOF,
},
},
{
"structure_basic.hcl",
[]int{
IDENTIFIER, LEFTBRACE,
IDENTIFIER, EQUAL, NUMBER,
RIGHTBRACE, lexEOF,
},
},
{
"structure.hcl",
[]int{
IDENTIFIER, IDENTIFIER, STRING, LEFTBRACE,
IDENTIFIER, EQUAL, NUMBER,
IDENTIFIER, EQUAL, STRING,
RIGHTBRACE, lexEOF,
},
" {}",
lexModeJson,
},
}
for _, tc := range cases {
d, err := ioutil.ReadFile(filepath.Join(fixtureDir, tc.Input))
if err != nil {
t.Fatalf("err: %s", err)
}
for i, tc := range cases {
actual := lexMode(tc.Input)
l := &hclLex{Input: string(d)}
var actual []int
for {
token := l.Lex(new(hclSymType))
actual = append(actual, token)
if token == lexEOF {
break
}
if len(actual) > 500 {
t.Fatalf("Input:%s\n\nExausted.", tc.Input)
}
}
if !reflect.DeepEqual(actual, tc.Output) {
t.Fatalf(
"Input: %s\n\nBad: %#v\n\nExpected: %#v",
tc.Input, actual, tc.Output)
if actual != tc.Mode {
t.Fatalf("%d: %#v", i, actual)
}
}
}