From f5a27d4be4ee8ae949168902e16ddb849783d540 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sun, 8 Nov 2015 15:50:37 -0800 Subject: [PATCH] json/parser --- json/parser/parser.go | 286 ++++++++++++++++++++++++++ json/parser/parser_test.go | 261 +++++++++++++++++++++++ json/parser/test-fixtures/array.json | 4 + json/parser/test-fixtures/basic.json | 3 + json/parser/test-fixtures/object.json | 5 + json/parser/test-fixtures/types.json | 10 + json/token/token.go | 21 ++ 7 files changed, 590 insertions(+) create mode 100644 json/parser/parser.go create mode 100644 json/parser/parser_test.go create mode 100644 json/parser/test-fixtures/array.json create mode 100644 json/parser/test-fixtures/basic.json create mode 100644 json/parser/test-fixtures/object.json create mode 100644 json/parser/test-fixtures/types.json diff --git a/json/parser/parser.go b/json/parser/parser.go new file mode 100644 index 0000000..1f7db62 --- /dev/null +++ b/json/parser/parser.go @@ -0,0 +1,286 @@ +package parser + +import ( + "errors" + "fmt" + + "github.com/hashicorp/hcl/hcl/ast" + "github.com/hashicorp/hcl/json/scanner" + "github.com/hashicorp/hcl/json/token" +) + +type Parser struct { + sc *scanner.Scanner + + // Last read token + tok token.Token + commaPrev token.Token + + enableTrace bool + indent int + n int // buffer size (max = 1) +} + +func newParser(src []byte) *Parser { + return &Parser{ + sc: scanner.New(src), + } +} + +// Parse returns the fully parsed source and returns the abstract syntax tree. +func Parse(src []byte) (*ast.File, error) { + p := newParser(src) + return p.Parse() +} + +var errEofToken = errors.New("EOF token found") + +// Parse returns the fully parsed source and returns the abstract syntax tree. +func (p *Parser) Parse() (*ast.File, error) { + f := &ast.File{} + var err, scerr error + p.sc.Error = func(pos token.Pos, msg string) { + scerr = fmt.Errorf("%s: %s", pos, msg) + } + + // The root must be an object in JSON + object, err := p.object() + if scerr != nil { + return nil, scerr + } + if err != nil { + return nil, err + } + + // We make our final node an object list so it is more HCL compatible + f.Node = object.List + return f, nil +} + +func (p *Parser) objectList() (*ast.ObjectList, error) { + defer un(trace(p, "ParseObjectList")) + node := &ast.ObjectList{} + + for { + n, err := p.objectItem() + if err == errEofToken { + break // we are finished + } + + // we don't return a nil node, because might want to use already + // collected items. + if err != nil { + return node, err + } + + node.Add(n) + + // Check for a followup comma. If it isn't a comma, then we're done + if tok := p.scan(); tok.Type != token.COMMA { + p.unscan() + break + } + } + return node, nil +} + +// objectItem parses a single object item +func (p *Parser) objectItem() (*ast.ObjectItem, error) { + defer un(trace(p, "ParseObjectItem")) + + keys, err := p.objectKey() + if err != nil { + return nil, err + } + + o := &ast.ObjectItem{ + Keys: keys, + } + + switch p.tok.Type { + case token.COLON: + o.Val, err = p.objectValue() + if err != nil { + return nil, err + } + } + + return o, nil +} + +// objectKey parses an object key and returns a ObjectKey AST +func (p *Parser) objectKey() ([]*ast.ObjectKey, error) { + keyCount := 0 + keys := make([]*ast.ObjectKey, 0) + + for { + tok := p.scan() + switch tok.Type { + case token.EOF: + return nil, errEofToken + case token.STRING: + keyCount++ + keys = append(keys, &ast.ObjectKey{ + Token: p.tok.HCLToken(), + }) + case token.COLON: + // Done + return keys, nil + case token.ILLEGAL: + fmt.Println("illegal") + default: + return nil, fmt.Errorf("expected: STRING got: %s", p.tok.Type) + } + } +} + +// object parses any type of object, such as number, bool, string, object or +// list. +func (p *Parser) objectValue() (ast.Node, error) { + defer un(trace(p, "ParseObjectValue")) + tok := p.scan() + + switch tok.Type { + case token.NUMBER, token.FLOAT, token.BOOL, token.NULL, token.STRING: + return p.literalType() + case token.LBRACE: + return p.objectType() + case token.LBRACK: + return p.listType() + case token.EOF: + return nil, errEofToken + } + + return nil, fmt.Errorf("Expected object value, got unknown token: %+v", tok) +} + +// object parses any type of object, such as number, bool, string, object or +// list. +func (p *Parser) object() (*ast.ObjectType, error) { + defer un(trace(p, "ParseType")) + tok := p.scan() + + switch tok.Type { + case token.LBRACE: + return p.objectType() + case token.EOF: + return nil, errEofToken + } + + return nil, fmt.Errorf("Expected object, got unknown token: %+v", tok) +} + +// objectType parses an object type and returns a ObjectType AST +func (p *Parser) objectType() (*ast.ObjectType, error) { + defer un(trace(p, "ParseObjectType")) + + // we assume that the currently scanned token is a LBRACE + o := &ast.ObjectType{} + + l, err := p.objectList() + + // if we hit RBRACE, we are good to go (means we parsed all Items), if it's + // not a RBRACE, it's an syntax error and we just return it. + if err != nil && p.tok.Type != token.RBRACE { + return nil, err + } + + o.List = l + return o, nil +} + +// listType parses a list type and returns a ListType AST +func (p *Parser) listType() (*ast.ListType, error) { + defer un(trace(p, "ParseListType")) + + // we assume that the currently scanned token is a LBRACK + l := &ast.ListType{} + + for { + tok := p.scan() + switch tok.Type { + case token.NUMBER, token.FLOAT, token.STRING: + node, err := p.literalType() + if err != nil { + return nil, err + } + + l.Add(node) + case token.COMMA: + continue + case token.BOOL: + // TODO(arslan) should we support? not supported by HCL yet + case token.LBRACK: + // TODO(arslan) should we support nested lists? Even though it's + // written in README of HCL, it's not a part of the grammar + // (not defined in parse.y) + case token.RBRACK: + // finished + return l, nil + default: + return nil, fmt.Errorf("unexpected token while parsing list: %s", tok.Type) + } + + } +} + +// literalType parses a literal type and returns a LiteralType AST +func (p *Parser) literalType() (*ast.LiteralType, error) { + defer un(trace(p, "ParseLiteral")) + + return &ast.LiteralType{ + Token: p.tok.HCLToken(), + }, nil +} + +// scan returns the next token from the underlying scanner. If a token has +// been unscanned then read that instead. +func (p *Parser) scan() token.Token { + // If we have a token on the buffer, then return it. + if p.n != 0 { + p.n = 0 + return p.tok + } + + p.tok = p.sc.Scan() + return p.tok +} + +// unscan pushes the previously read token back onto the buffer. +func (p *Parser) unscan() { + p.n = 1 +} + +// ---------------------------------------------------------------------------- +// Parsing support + +func (p *Parser) printTrace(a ...interface{}) { + if !p.enableTrace { + return + } + + const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " + const n = len(dots) + fmt.Printf("%5d:%3d: ", p.tok.Pos.Line, p.tok.Pos.Column) + + i := 2 * p.indent + for i > n { + fmt.Print(dots) + i -= n + } + // i <= n + fmt.Print(dots[0:i]) + fmt.Println(a...) +} + +func trace(p *Parser, msg string) *Parser { + p.printTrace(msg, "(") + p.indent++ + return p +} + +// Usage pattern: defer un(trace(p, "...")) +func un(p *Parser) { + p.indent-- + p.printTrace(")") +} diff --git a/json/parser/parser_test.go b/json/parser/parser_test.go new file mode 100644 index 0000000..9c77daf --- /dev/null +++ b/json/parser/parser_test.go @@ -0,0 +1,261 @@ +package parser + +import ( + "fmt" + "io/ioutil" + "path/filepath" + "reflect" + "runtime" + "testing" + + "github.com/hashicorp/hcl/hcl/ast" + "github.com/hashicorp/hcl/hcl/token" +) + +func TestType(t *testing.T) { + var literals = []struct { + typ token.Type + src string + }{ + {token.STRING, `"foo": "bar"`}, + {token.NUMBER, `"foo": 123`}, + {token.FLOAT, `"foo": 123.12`}, + {token.FLOAT, `"foo": -123.12`}, + {token.BOOL, `"foo": true`}, + {token.STRING, `"foo": null`}, + } + + for _, l := range literals { + t.Logf("Testing: %s", l.src) + + p := newParser([]byte(l.src)) + item, err := p.objectItem() + if err != nil { + t.Error(err) + } + + lit, ok := item.Val.(*ast.LiteralType) + if !ok { + t.Errorf("node should be of type LiteralType, got: %T", item.Val) + } + + if lit.Token.Type != l.typ { + t.Errorf("want: %s, got: %s", l.typ, lit.Token.Type) + } + } +} + +func TestListType(t *testing.T) { + var literals = []struct { + src string + tokens []token.Type + }{ + { + `"foo": ["123", 123]`, + []token.Type{token.STRING, token.NUMBER}, + }, + { + `"foo": [123, "123",]`, + []token.Type{token.NUMBER, token.STRING}, + }, + { + `"foo": []`, + []token.Type{}, + }, + { + `"foo": ["123", 123]`, + []token.Type{token.STRING, token.NUMBER}, + }, + } + + for _, l := range literals { + t.Logf("Testing: %s", l.src) + + p := newParser([]byte(l.src)) + item, err := p.objectItem() + if err != nil { + t.Error(err) + } + + list, ok := item.Val.(*ast.ListType) + if !ok { + t.Errorf("node should be of type LiteralType, got: %T", item.Val) + } + + tokens := []token.Type{} + for _, li := range list.List { + if tp, ok := li.(*ast.LiteralType); ok { + tokens = append(tokens, tp.Token.Type) + } + } + + equals(t, l.tokens, tokens) + } +} + +func TestObjectType(t *testing.T) { + var literals = []struct { + src string + nodeType []ast.Node + itemLen int + }{ + { + `"foo": {}`, + nil, + 0, + }, + { + `"foo": { + "bar": "fatih" + }`, + []ast.Node{&ast.LiteralType{}}, + 1, + }, + { + `"foo": { + "bar": "fatih", + "baz": ["arslan"] + }`, + []ast.Node{ + &ast.LiteralType{}, + &ast.ListType{}, + }, + 2, + }, + { + `"foo": { + "bar": {} + }`, + []ast.Node{ + &ast.ObjectType{}, + }, + 1, + }, + { + `"foo": { + "bar": {}, + "foo": true + }`, + []ast.Node{ + &ast.ObjectType{}, + &ast.LiteralType{}, + }, + 2, + }, + } + + for _, l := range literals { + t.Logf("Testing:\n%s\n", l.src) + + p := newParser([]byte(l.src)) + // p.enableTrace = true + item, err := p.objectItem() + if err != nil { + t.Error(err) + } + + // we know that the ObjectKey name is foo for all cases, what matters + // is the object + obj, ok := item.Val.(*ast.ObjectType) + if !ok { + t.Errorf("node should be of type LiteralType, got: %T", item.Val) + } + + // check if the total length of items are correct + equals(t, l.itemLen, len(obj.List.Items)) + + // check if the types are correct + for i, item := range obj.List.Items { + equals(t, reflect.TypeOf(l.nodeType[i]), reflect.TypeOf(item.Val)) + } + } +} + +func TestObjectKey(t *testing.T) { + keys := []struct { + exp []token.Type + src string + }{ + {[]token.Type{token.STRING}, `"foo": {}`}, + } + + for _, k := range keys { + p := newParser([]byte(k.src)) + keys, err := p.objectKey() + if err != nil { + t.Fatal(err) + } + + tokens := []token.Type{} + for _, o := range keys { + tokens = append(tokens, o.Token.Type) + } + + equals(t, k.exp, tokens) + } + + errKeys := []struct { + src string + }{ + {`foo 12 {}`}, + {`foo bar = {}`}, + {`foo []`}, + {`12 {}`}, + } + + for _, k := range errKeys { + p := newParser([]byte(k.src)) + _, err := p.objectKey() + if err == nil { + t.Errorf("case '%s' should give an error", k.src) + } + } +} + +// Official HCL tests +func TestParse(t *testing.T) { + cases := []struct { + Name string + Err bool + }{ + { + "basic.json", + false, + }, + { + "object.json", + false, + }, + { + "array.json", + false, + }, + { + "types.json", + false, + }, + } + + const fixtureDir = "./test-fixtures" + + for _, tc := range cases { + d, err := ioutil.ReadFile(filepath.Join(fixtureDir, tc.Name)) + if err != nil { + t.Fatalf("err: %s", err) + } + + _, err = Parse(d) + if (err != nil) != tc.Err { + t.Fatalf("Input: %s\n\nError: %s", tc.Name, err) + } + } +} + +// equals fails the test if exp is not equal to act. +func equals(tb testing.TB, exp, act interface{}) { + if !reflect.DeepEqual(exp, act) { + _, file, line, _ := runtime.Caller(1) + fmt.Printf("\033[31m%s:%d:\n\n\texp: %#v\n\n\tgot: %#v\033[39m\n\n", filepath.Base(file), line, exp, act) + tb.FailNow() + } +} diff --git a/json/parser/test-fixtures/array.json b/json/parser/test-fixtures/array.json new file mode 100644 index 0000000..e320f17 --- /dev/null +++ b/json/parser/test-fixtures/array.json @@ -0,0 +1,4 @@ +{ + "foo": [1, 2, "bar"], + "bar": "baz" +} diff --git a/json/parser/test-fixtures/basic.json b/json/parser/test-fixtures/basic.json new file mode 100644 index 0000000..b54bde9 --- /dev/null +++ b/json/parser/test-fixtures/basic.json @@ -0,0 +1,3 @@ +{ + "foo": "bar" +} diff --git a/json/parser/test-fixtures/object.json b/json/parser/test-fixtures/object.json new file mode 100644 index 0000000..72168a3 --- /dev/null +++ b/json/parser/test-fixtures/object.json @@ -0,0 +1,5 @@ +{ + "foo": { + "bar": [1,2] + } +} diff --git a/json/parser/test-fixtures/types.json b/json/parser/test-fixtures/types.json new file mode 100644 index 0000000..9a142a6 --- /dev/null +++ b/json/parser/test-fixtures/types.json @@ -0,0 +1,10 @@ +{ + "foo": "bar", + "bar": 7, + "baz": [1,2,3], + "foo": -12, + "bar": 3.14159, + "foo": true, + "bar": false, + "foo": null +} diff --git a/json/token/token.go b/json/token/token.go index d6d23e0..034adcc 100644 --- a/json/token/token.go +++ b/json/token/token.go @@ -5,6 +5,7 @@ import ( "strconv" hclstrconv "github.com/hashicorp/hcl/hcl/strconv" + hcltoken "github.com/hashicorp/hcl/hcl/token" ) // Token defines a single HCL token which can be obtained via the Scanner @@ -139,3 +140,23 @@ func (t Token) Value() interface{} { panic(fmt.Sprintf("unimplemented Value for type: %s", t.Type)) } } + +// HCLToken converts this token to an HCL token. +// +// The token type must be a literal type or this will panic. +func (t Token) HCLToken() hcltoken.Token { + switch t.Type { + case BOOL: + return hcltoken.Token{Type: hcltoken.BOOL, Text: t.Text} + case FLOAT: + return hcltoken.Token{Type: hcltoken.FLOAT, Text: t.Text} + case NULL: + return hcltoken.Token{Type: hcltoken.STRING, Text: ""} + case NUMBER: + return hcltoken.Token{Type: hcltoken.NUMBER, Text: t.Text} + case STRING: + return hcltoken.Token{Type: hcltoken.STRING, Text: t.Text} + default: + panic(fmt.Sprintf("unimplemented HCLToken for type: %s", t.Type)) + } +}