diff --git a/hcl/ast/ast.go b/hcl/ast/ast.go new file mode 100644 index 0000000..82a6b83 --- /dev/null +++ b/hcl/ast/ast.go @@ -0,0 +1,141 @@ +// Package ast declares the types used to represent syntax trees for HCL +// (HashiCorp Configuration Language) +package ast + +import ( + "github.com/hashicorp/hcl/hcl/token" +) + +// Node is an element in the abstract syntax tree. +type Node interface { + node() + Pos() token.Pos +} + +func (File) node() {} +func (ObjectList) node() {} +func (ObjectKey) node() {} +func (ObjectItem) node() {} + +func (Comment) node() {} +func (CommentGroup) node() {} +func (ObjectType) node() {} +func (LiteralType) node() {} +func (ListType) node() {} + +// File represents a single HCL file +type File struct { + Node Node // usually a *ObjectList + Comments []*CommentGroup // list of all comments in the source +} + +func (f *File) Pos() token.Pos { + return f.Node.Pos() +} + +// ObjectList represents a list of ObjectItems. An HCL file itself is an +// ObjectList. +type ObjectList struct { + Items []*ObjectItem +} + +func (o *ObjectList) Add(item *ObjectItem) { + o.Items = append(o.Items, item) +} + +func (o *ObjectList) Pos() token.Pos { + // always returns the uninitiliazed position + return o.Items[0].Pos() +} + +// ObjectItem represents a HCL Object Item. An item is represented with a key +// (or keys). It can be an assignment or an object (both normal and nested) +type ObjectItem struct { + // keys is only one length long if it's of type assignment. If it's a + // nested object it can be larger than one. In that case "assign" is + // invalid as there is no assignments for a nested object. + Keys []*ObjectKey + + // assign contains the position of "=", if any + Assign token.Pos + + // val is the item itself. It can be an object,list, number, bool or a + // string. If key length is larger than one, val can be only of type + // Object. + Val Node + + LeadComment *CommentGroup // associated lead comment + LineComment *CommentGroup // associated line comment +} + +func (o *ObjectItem) Pos() token.Pos { + return o.Keys[0].Pos() +} + +// ObjectKeys are either an identifier or of type string. +type ObjectKey struct { + Token token.Token +} + +func (o *ObjectKey) Pos() token.Pos { + return o.Token.Pos +} + +// LiteralType represents a literal of basic type. Valid types are: +// token.NUMBER, token.FLOAT, token.BOOL and token.STRING +type LiteralType struct { + Token token.Token + + // associated line comment, only when used in a list + LineComment *CommentGroup +} + +func (l *LiteralType) Pos() token.Pos { + return l.Token.Pos +} + +// ListStatement represents a HCL List type +type ListType struct { + Lbrack token.Pos // position of "[" + Rbrack token.Pos // position of "]" + List []Node // the elements in lexical order +} + +func (l *ListType) Pos() token.Pos { + return l.Lbrack +} + +func (l *ListType) Add(node Node) { + l.List = append(l.List, node) +} + +// ObjectType represents a HCL Object Type +type ObjectType struct { + Lbrace token.Pos // position of "{" + Rbrace token.Pos // position of "}" + List *ObjectList // the nodes in lexical order +} + +func (o *ObjectType) Pos() token.Pos { + return o.Lbrace +} + +// Comment node represents a single //, # style or /*- style commment +type Comment struct { + Start token.Pos // position of / or # + Text string +} + +func (c *Comment) Pos() token.Pos { + return c.Start +} + +// CommentGroup node represents a sequence of comments with no other tokens and +// no empty lines between. +type CommentGroup struct { + List []*Comment // len(List) > 0 +} + +func (c *CommentGroup) Pos() token.Pos { + return c.List[0].Pos() +} diff --git a/hcl/ast/walk.go b/hcl/ast/walk.go new file mode 100644 index 0000000..c6dc75a --- /dev/null +++ b/hcl/ast/walk.go @@ -0,0 +1,42 @@ +package ast + +import "fmt" + +// Walk traverses an AST in depth-first order: It starts by calling fn(node); +// node must not be nil. If f returns true, Walk invokes f recursively for +// each of the non-nil children of node, followed by a call of f(nil). +func Walk(node Node, fn func(Node) bool) { + if !fn(node) { + return + } + + switch n := node.(type) { + case *File: + Walk(n.Node, fn) + case *ObjectList: + for _, item := range n.Items { + Walk(item, fn) + } + case *ObjectKey: + // nothing to do + case *ObjectItem: + for _, k := range n.Keys { + Walk(k, fn) + } + Walk(n.Val, fn) + case *LiteralType: + // nothing to do + case *ListType: + for _, l := range n.List { + Walk(l, fn) + } + case *ObjectType: + for _, l := range n.List.Items { + Walk(l, fn) + } + default: + fmt.Printf(" unknown type: %T\n", n) + } + + fn(nil) +} diff --git a/hcl/parser/parser.go b/hcl/parser/parser.go new file mode 100644 index 0000000..24e878d --- /dev/null +++ b/hcl/parser/parser.go @@ -0,0 +1,385 @@ +// Package parser implements a parser for HCL (HashiCorp Configuration +// Language) +package parser + +import ( + "errors" + "fmt" + + "github.com/hashicorp/hcl/hcl/ast" + "github.com/hashicorp/hcl/hcl/scanner" + "github.com/hashicorp/hcl/hcl/token" +) + +type Parser struct { + sc *scanner.Scanner + + // Last read token + tok token.Token + commaPrev token.Token + + comments []*ast.CommentGroup + leadComment *ast.CommentGroup // last lead comment + lineComment *ast.CommentGroup // last line comment + + enableTrace bool + indent int + n int // buffer size (max = 1) +} + +func newParser(src []byte) *Parser { + return &Parser{ + sc: scanner.New(src), + } +} + +// Parse returns the fully parsed source and returns the abstract syntax tree. +func Parse(src []byte) (*ast.File, error) { + p := newParser(src) + return p.Parse() +} + +var errEofToken = errors.New("EOF token found") + +// Parse returns the fully parsed source and returns the abstract syntax tree. +func (p *Parser) Parse() (*ast.File, error) { + f := &ast.File{} + var err error + f.Node, err = p.objectList() + if err != nil { + return nil, err + } + + f.Comments = p.comments + return f, nil +} + +func (p *Parser) objectList() (*ast.ObjectList, error) { + defer un(trace(p, "ParseObjectList")) + node := &ast.ObjectList{} + + for { + n, err := p.objectItem() + if err == errEofToken { + break // we are finished + } + + // we don't return a nil node, because might want to use already + // collected items. + if err != nil { + return node, err + } + + node.Add(n) + } + return node, nil +} + +func (p *Parser) consumeComment() (comment *ast.Comment, endline int) { + endline = p.tok.Pos.Line + + // count the endline if it's multiline comment, ie starting with /* + if len(p.tok.Text) > 1 && p.tok.Text[1] == '*' { + // don't use range here - no need to decode Unicode code points + for i := 0; i < len(p.tok.Text); i++ { + if p.tok.Text[i] == '\n' { + endline++ + } + } + } + + comment = &ast.Comment{Start: p.tok.Pos, Text: p.tok.Text} + p.tok = p.sc.Scan() + return +} + +func (p *Parser) consumeCommentGroup(n int) (comments *ast.CommentGroup, endline int) { + var list []*ast.Comment + endline = p.tok.Pos.Line + + for p.tok.Type == token.COMMENT && p.tok.Pos.Line <= endline+n { + var comment *ast.Comment + comment, endline = p.consumeComment() + list = append(list, comment) + } + + // add comment group to the comments list + comments = &ast.CommentGroup{List: list} + p.comments = append(p.comments, comments) + + return +} + +// objectItem parses a single object item +func (p *Parser) objectItem() (*ast.ObjectItem, error) { + defer un(trace(p, "ParseObjectItem")) + + keys, err := p.objectKey() + if err != nil { + return nil, err + } + + o := &ast.ObjectItem{ + Keys: keys, + } + + if p.leadComment != nil { + o.LeadComment = p.leadComment + p.leadComment = nil + } + + switch p.tok.Type { + case token.ASSIGN: + o.Assign = p.tok.Pos + o.Val, err = p.object() + if err != nil { + return nil, err + } + case token.LBRACE: + o.Val, err = p.objectType() + if err != nil { + return nil, err + } + } + + // do a look-ahead for line comment + p.scan() + if o.Val.Pos().Line == keys[0].Pos().Line && p.lineComment != nil { + o.LineComment = p.lineComment + p.lineComment = nil + } + p.unscan() + return o, nil +} + +// objectKey parses an object key and returns a ObjectKey AST +func (p *Parser) objectKey() ([]*ast.ObjectKey, error) { + keyCount := 0 + keys := make([]*ast.ObjectKey, 0) + + for { + tok := p.scan() + switch tok.Type { + case token.EOF: + return nil, errEofToken + case token.ASSIGN: + // assignment or object only, but not nested objects. this is not + // allowed: `foo bar = {}` + if keyCount > 1 { + return nil, fmt.Errorf("nested object expected: LBRACE got: %s", p.tok.Type) + } + + if keyCount == 0 { + return nil, errors.New("no keys found!!!") + } + + return keys, nil + case token.LBRACE: + // object + return keys, nil + case token.IDENT, token.STRING: + keyCount++ + keys = append(keys, &ast.ObjectKey{Token: p.tok}) + case token.ILLEGAL: + fmt.Println("illegal") + default: + return nil, fmt.Errorf("expected: IDENT | STRING | ASSIGN | LBRACE got: %s", p.tok.Type) + } + } +} + +// object parses any type of object, such as number, bool, string, object or +// list. +func (p *Parser) object() (ast.Node, error) { + defer un(trace(p, "ParseType")) + tok := p.scan() + + switch tok.Type { + case token.NUMBER, token.FLOAT, token.BOOL, token.STRING: + return p.literalType() + case token.LBRACE: + return p.objectType() + case token.LBRACK: + return p.listType() + case token.COMMENT: + // implement comment + case token.EOF: + return nil, errEofToken + } + + return nil, fmt.Errorf("Unknown token: %+v", tok) +} + +// objectType parses an object type and returns a ObjectType AST +func (p *Parser) objectType() (*ast.ObjectType, error) { + defer un(trace(p, "ParseObjectType")) + + // we assume that the currently scanned token is a LBRACE + o := &ast.ObjectType{ + Lbrace: p.tok.Pos, + } + + l, err := p.objectList() + + // if we hit RBRACE, we are good to go (means we parsed all Items), if it's + // not a RBRACE, it's an syntax error and we just return it. + if err != nil && p.tok.Type != token.RBRACE { + return nil, err + } + + o.List = l + o.Rbrace = p.tok.Pos // advanced via parseObjectList + return o, nil +} + +// listType parses a list type and returns a ListType AST +func (p *Parser) listType() (*ast.ListType, error) { + defer un(trace(p, "ParseListType")) + + // we assume that the currently scanned token is a LBRACK + l := &ast.ListType{ + Lbrack: p.tok.Pos, + } + + for { + tok := p.scan() + switch tok.Type { + case token.NUMBER, token.FLOAT, token.STRING: + node, err := p.literalType() + if err != nil { + return nil, err + } + + l.Add(node) + case token.COMMA: + // get next list item or we are at the end + // do a look-ahead for line comment + p.scan() + if p.lineComment != nil { + lit, ok := l.List[len(l.List)-1].(*ast.LiteralType) + if ok { + lit.LineComment = p.lineComment + l.List[len(l.List)-1] = lit + p.lineComment = nil + } + } + p.unscan() + continue + case token.BOOL: + // TODO(arslan) should we support? not supported by HCL yet + case token.LBRACK: + // TODO(arslan) should we support nested lists? Even though it's + // written in README of HCL, it's not a part of the grammar + // (not defined in parse.y) + case token.RBRACK: + // finished + l.Rbrack = p.tok.Pos + return l, nil + default: + return nil, fmt.Errorf("unexpected token while parsing list: %s", tok.Type) + } + + } +} + +// literalType parses a literal type and returns a LiteralType AST +func (p *Parser) literalType() (*ast.LiteralType, error) { + defer un(trace(p, "ParseLiteral")) + + return &ast.LiteralType{ + Token: p.tok, + }, nil +} + +// scan returns the next token from the underlying scanner. If a token has +// been unscanned then read that instead. In the process, it collects any +// comment groups encountered, and remembers the last lead and line comments. +func (p *Parser) scan() token.Token { + // If we have a token on the buffer, then return it. + if p.n != 0 { + p.n = 0 + return p.tok + } + + // Otherwise read the next token from the scanner and Save it to the buffer + // in case we unscan later. + prev := p.tok + p.tok = p.sc.Scan() + + if p.tok.Type == token.COMMENT { + var comment *ast.CommentGroup + var endline int + + // fmt.Printf("p.tok.Pos.Line = %+v prev: %d endline %d \n", + // p.tok.Pos.Line, prev.Pos.Line, endline) + if p.tok.Pos.Line == prev.Pos.Line { + // The comment is on same line as the previous token; it + // cannot be a lead comment but may be a line comment. + comment, endline = p.consumeCommentGroup(0) + if p.tok.Pos.Line != endline { + // The next token is on a different line, thus + // the last comment group is a line comment. + p.lineComment = comment + } + } + + // consume successor comments, if any + endline = -1 + for p.tok.Type == token.COMMENT { + comment, endline = p.consumeCommentGroup(1) + } + + if endline+1 == p.tok.Pos.Line && p.tok.Type != token.RBRACE { + switch p.tok.Type { + case token.RBRACE, token.RBRACK: + // Do not count for these cases + default: + // The next token is following on the line immediately after the + // comment group, thus the last comment group is a lead comment. + p.leadComment = comment + } + } + + } + + return p.tok +} + +// unscan pushes the previously read token back onto the buffer. +func (p *Parser) unscan() { + p.n = 1 +} + +// ---------------------------------------------------------------------------- +// Parsing support + +func (p *Parser) printTrace(a ...interface{}) { + if !p.enableTrace { + return + } + + const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " + const n = len(dots) + fmt.Printf("%5d:%3d: ", p.tok.Pos.Line, p.tok.Pos.Column) + + i := 2 * p.indent + for i > n { + fmt.Print(dots) + i -= n + } + // i <= n + fmt.Print(dots[0:i]) + fmt.Println(a...) +} + +func trace(p *Parser, msg string) *Parser { + p.printTrace(msg, "(") + p.indent++ + return p +} + +// Usage pattern: defer un(trace(p, "...")) +func un(p *Parser) { + p.indent-- + p.printTrace(")") +} diff --git a/hcl/parser/parser_test.go b/hcl/parser/parser_test.go new file mode 100644 index 0000000..ad423f8 --- /dev/null +++ b/hcl/parser/parser_test.go @@ -0,0 +1,301 @@ +package parser + +import ( + "fmt" + "io/ioutil" + "path/filepath" + "reflect" + "runtime" + "testing" + + "github.com/hashicorp/hcl/hcl/ast" + "github.com/hashicorp/hcl/hcl/token" +) + +func TestType(t *testing.T) { + var literals = []struct { + typ token.Type + src string + }{ + {token.STRING, `foo = "foo"`}, + {token.NUMBER, `foo = 123`}, + {token.FLOAT, `foo = 123.12`}, + {token.FLOAT, `foo = -123.12`}, + {token.BOOL, `foo = true`}, + } + + for _, l := range literals { + p := newParser([]byte(l.src)) + item, err := p.objectItem() + if err != nil { + t.Error(err) + } + + lit, ok := item.Val.(*ast.LiteralType) + if !ok { + t.Errorf("node should be of type LiteralType, got: %T", item.Val) + } + + if lit.Token.Type != l.typ { + t.Errorf("want: %s, got: %s", l.typ, lit.Token.Type) + } + } +} + +func TestListType(t *testing.T) { + var literals = []struct { + src string + tokens []token.Type + }{ + { + `foo = ["123", 123]`, + []token.Type{token.STRING, token.NUMBER}, + }, + { + `foo = [123, "123",]`, + []token.Type{token.NUMBER, token.STRING}, + }, + { + `foo = []`, + []token.Type{}, + }, + { + `foo = ["123", 123]`, + []token.Type{token.STRING, token.NUMBER}, + }, + } + + for _, l := range literals { + p := newParser([]byte(l.src)) + item, err := p.objectItem() + if err != nil { + t.Error(err) + } + + list, ok := item.Val.(*ast.ListType) + if !ok { + t.Errorf("node should be of type LiteralType, got: %T", item.Val) + } + + tokens := []token.Type{} + for _, li := range list.List { + if tp, ok := li.(*ast.LiteralType); ok { + tokens = append(tokens, tp.Token.Type) + } + } + + equals(t, l.tokens, tokens) + } +} + +func TestObjectType(t *testing.T) { + var literals = []struct { + src string + nodeType []ast.Node + itemLen int + }{ + { + `foo = {}`, + nil, + 0, + }, + { + `foo = { + bar = "fatih" + }`, + []ast.Node{&ast.LiteralType{}}, + 1, + }, + { + `foo = { + bar = "fatih" + baz = ["arslan"] + }`, + []ast.Node{ + &ast.LiteralType{}, + &ast.ListType{}, + }, + 2, + }, + { + `foo = { + bar {} + }`, + []ast.Node{ + &ast.ObjectType{}, + }, + 1, + }, + { + `foo { + bar {} + foo = true + }`, + []ast.Node{ + &ast.ObjectType{}, + &ast.LiteralType{}, + }, + 2, + }, + } + + for _, l := range literals { + p := newParser([]byte(l.src)) + // p.enableTrace = true + item, err := p.objectItem() + if err != nil { + t.Error(err) + } + + // we know that the ObjectKey name is foo for all cases, what matters + // is the object + obj, ok := item.Val.(*ast.ObjectType) + if !ok { + t.Errorf("node should be of type LiteralType, got: %T", item.Val) + } + + // check if the total length of items are correct + equals(t, l.itemLen, len(obj.List.Items)) + + // check if the types are correct + for i, item := range obj.List.Items { + equals(t, reflect.TypeOf(l.nodeType[i]), reflect.TypeOf(item.Val)) + } + } +} + +func TestObjectKey(t *testing.T) { + keys := []struct { + exp []token.Type + src string + }{ + {[]token.Type{token.IDENT}, `foo {}`}, + {[]token.Type{token.IDENT}, `foo = {}`}, + {[]token.Type{token.IDENT}, `foo = bar`}, + {[]token.Type{token.IDENT}, `foo = 123`}, + {[]token.Type{token.IDENT}, `foo = "${var.bar}`}, + {[]token.Type{token.STRING}, `"foo" {}`}, + {[]token.Type{token.STRING}, `"foo" = {}`}, + {[]token.Type{token.STRING}, `"foo" = "${var.bar}`}, + {[]token.Type{token.IDENT, token.IDENT}, `foo bar {}`}, + {[]token.Type{token.IDENT, token.STRING}, `foo "bar" {}`}, + {[]token.Type{token.STRING, token.IDENT}, `"foo" bar {}`}, + {[]token.Type{token.IDENT, token.IDENT, token.IDENT}, `foo bar baz {}`}, + } + + for _, k := range keys { + p := newParser([]byte(k.src)) + keys, err := p.objectKey() + if err != nil { + t.Fatal(err) + } + + tokens := []token.Type{} + for _, o := range keys { + tokens = append(tokens, o.Token.Type) + } + + equals(t, k.exp, tokens) + } + + errKeys := []struct { + src string + }{ + {`foo 12 {}`}, + {`foo bar = {}`}, + {`foo []`}, + {`12 {}`}, + } + + for _, k := range errKeys { + p := newParser([]byte(k.src)) + _, err := p.objectKey() + if err == nil { + t.Errorf("case '%s' should give an error", k.src) + } + } +} + +// Official HCL tests +func TestParse(t *testing.T) { + cases := []struct { + Name string + Err bool + }{ + { + "assign_colon.hcl", + true, + }, + { + "comment.hcl", + false, + }, + { + "comment_single.hcl", + false, + }, + { + "empty.hcl", + false, + }, + { + "list_comma.hcl", + false, + }, + { + "multiple.hcl", + false, + }, + { + "structure.hcl", + false, + }, + { + "structure_basic.hcl", + false, + }, + { + "structure_empty.hcl", + false, + }, + { + "complex.hcl", + false, + }, + { + "assign_deep.hcl", + true, + }, + { + "types.hcl", + false, + }, + { + "array_comment.hcl", + false, + }, + } + + const fixtureDir = "./test-fixtures" + + for _, tc := range cases { + d, err := ioutil.ReadFile(filepath.Join(fixtureDir, tc.Name)) + if err != nil { + t.Fatalf("err: %s", err) + } + + _, err = Parse(d) + if (err != nil) != tc.Err { + t.Fatalf("Input: %s\n\nError: %s", tc.Name, err) + } + } +} + +// equals fails the test if exp is not equal to act. +func equals(tb testing.TB, exp, act interface{}) { + if !reflect.DeepEqual(exp, act) { + _, file, line, _ := runtime.Caller(1) + fmt.Printf("\033[31m%s:%d:\n\n\texp: %#v\n\n\tgot: %#v\033[39m\n\n", filepath.Base(file), line, exp, act) + tb.FailNow() + } +} diff --git a/hcl/parser/test-fixtures/array_comment.hcl b/hcl/parser/test-fixtures/array_comment.hcl new file mode 100644 index 0000000..78c2675 --- /dev/null +++ b/hcl/parser/test-fixtures/array_comment.hcl @@ -0,0 +1,4 @@ +foo = [ + "1", + "2", # comment +] diff --git a/hcl/parser/test-fixtures/assign_colon.hcl b/hcl/parser/test-fixtures/assign_colon.hcl new file mode 100644 index 0000000..eb5a99a --- /dev/null +++ b/hcl/parser/test-fixtures/assign_colon.hcl @@ -0,0 +1,6 @@ +resource = [{ + "foo": { + "bar": {}, + "baz": [1, 2, "foo"], + } +}] diff --git a/hcl/parser/test-fixtures/assign_deep.hcl b/hcl/parser/test-fixtures/assign_deep.hcl new file mode 100644 index 0000000..dd3151c --- /dev/null +++ b/hcl/parser/test-fixtures/assign_deep.hcl @@ -0,0 +1,5 @@ +resource = [{ + foo = [{ + bar = {} + }] +}] diff --git a/hcl/parser/test-fixtures/comment.hcl b/hcl/parser/test-fixtures/comment.hcl new file mode 100644 index 0000000..1ff7f29 --- /dev/null +++ b/hcl/parser/test-fixtures/comment.hcl @@ -0,0 +1,15 @@ +// Foo + +/* Bar */ + +/* +/* +Baz +*/ + +# Another + +# Multiple +# Lines + +foo = "bar" diff --git a/hcl/parser/test-fixtures/comment_single.hcl b/hcl/parser/test-fixtures/comment_single.hcl new file mode 100644 index 0000000..fec5601 --- /dev/null +++ b/hcl/parser/test-fixtures/comment_single.hcl @@ -0,0 +1 @@ +# Hello diff --git a/hcl/parser/test-fixtures/complex.hcl b/hcl/parser/test-fixtures/complex.hcl new file mode 100644 index 0000000..46981bb --- /dev/null +++ b/hcl/parser/test-fixtures/complex.hcl @@ -0,0 +1,39 @@ +variable "foo" { + default = "bar" + description = "bar" +} + +provider "aws" { + access_key = "foo" + secret_key = "bar" +} + +provider "do" { + api_key = "${var.foo}" +} + +resource "aws_security_group" "firewall" { + count = 5 +} + +resource aws_instance "web" { + ami = "${var.foo}" + security_groups = [ + "foo", + "${aws_security_group.firewall.foo}", + ] + network_interface = { + device_index = 0 + description = "Main network interface" + } +} + +resource "aws_instance" "db" { + security_groups = "${aws_security_group.firewall.*.id}" + VPC = "foo" + depends_on = ["aws_instance.web"] +} + +output "web_ip" { + value = "${aws_instance.web.private_ip}" +} diff --git a/hcl/parser/test-fixtures/complex_key.hcl b/hcl/parser/test-fixtures/complex_key.hcl new file mode 100644 index 0000000..0007aaf --- /dev/null +++ b/hcl/parser/test-fixtures/complex_key.hcl @@ -0,0 +1 @@ +foo.bar = "baz" diff --git a/hcl/parser/test-fixtures/empty.hcl b/hcl/parser/test-fixtures/empty.hcl new file mode 100644 index 0000000..e69de29 diff --git a/hcl/parser/test-fixtures/list.hcl b/hcl/parser/test-fixtures/list.hcl new file mode 100644 index 0000000..059d4ce --- /dev/null +++ b/hcl/parser/test-fixtures/list.hcl @@ -0,0 +1 @@ +foo = [1, 2, "foo"] diff --git a/hcl/parser/test-fixtures/list_comma.hcl b/hcl/parser/test-fixtures/list_comma.hcl new file mode 100644 index 0000000..50f4218 --- /dev/null +++ b/hcl/parser/test-fixtures/list_comma.hcl @@ -0,0 +1 @@ +foo = [1, 2, "foo",] diff --git a/hcl/parser/test-fixtures/multiple.hcl b/hcl/parser/test-fixtures/multiple.hcl new file mode 100644 index 0000000..029c54b --- /dev/null +++ b/hcl/parser/test-fixtures/multiple.hcl @@ -0,0 +1,2 @@ +foo = "bar" +key = 7 diff --git a/hcl/parser/test-fixtures/old.hcl b/hcl/parser/test-fixtures/old.hcl new file mode 100644 index 0000000..e9f77ca --- /dev/null +++ b/hcl/parser/test-fixtures/old.hcl @@ -0,0 +1,3 @@ +default = { + "eu-west-1": "ami-b1cf19c6", +} diff --git a/hcl/parser/test-fixtures/structure.hcl b/hcl/parser/test-fixtures/structure.hcl new file mode 100644 index 0000000..92592fb --- /dev/null +++ b/hcl/parser/test-fixtures/structure.hcl @@ -0,0 +1,5 @@ +// This is a test structure for the lexer +foo bar "baz" { + key = 7 + foo = "bar" +} diff --git a/hcl/parser/test-fixtures/structure_basic.hcl b/hcl/parser/test-fixtures/structure_basic.hcl new file mode 100644 index 0000000..7229a1f --- /dev/null +++ b/hcl/parser/test-fixtures/structure_basic.hcl @@ -0,0 +1,5 @@ +foo { + value = 7 + "value" = 8 + "complex::value" = 9 +} diff --git a/hcl/parser/test-fixtures/structure_empty.hcl b/hcl/parser/test-fixtures/structure_empty.hcl new file mode 100644 index 0000000..4d156dd --- /dev/null +++ b/hcl/parser/test-fixtures/structure_empty.hcl @@ -0,0 +1 @@ +resource "foo" "bar" {} diff --git a/hcl/parser/test-fixtures/types.hcl b/hcl/parser/test-fixtures/types.hcl new file mode 100644 index 0000000..cf2747e --- /dev/null +++ b/hcl/parser/test-fixtures/types.hcl @@ -0,0 +1,7 @@ +foo = "bar" +bar = 7 +baz = [1,2,3] +foo = -12 +bar = 3.14159 +foo = true +bar = false diff --git a/hcl/printer/nodes.go b/hcl/printer/nodes.go new file mode 100644 index 0000000..e40765a --- /dev/null +++ b/hcl/printer/nodes.go @@ -0,0 +1,509 @@ +package printer + +import ( + "bytes" + "fmt" + "sort" + + "github.com/hashicorp/hcl/hcl/ast" + "github.com/hashicorp/hcl/hcl/token" +) + +const ( + blank = byte(' ') + newline = byte('\n') + tab = byte('\t') + infinity = 1 << 30 // offset or line +) + +type printer struct { + cfg Config + prev token.Pos + + comments []*ast.CommentGroup // may be nil, contains all comments + standaloneComments []*ast.CommentGroup // contains all standalone comments (not assigned to any node) + + enableTrace bool + indentTrace int +} + +type ByPosition []*ast.CommentGroup + +func (b ByPosition) Len() int { return len(b) } +func (b ByPosition) Swap(i, j int) { b[i], b[j] = b[j], b[i] } +func (b ByPosition) Less(i, j int) bool { return b[i].Pos().Before(b[j].Pos()) } + +// collectComments comments all standalone comments which are not lead or line +// comment +func (p *printer) collectComments(node ast.Node) { + // first collect all comments. This is already stored in + // ast.File.(comments) + ast.Walk(node, func(nn ast.Node) bool { + switch t := nn.(type) { + case *ast.File: + p.comments = t.Comments + return false + } + return true + }) + + standaloneComments := make(map[token.Pos]*ast.CommentGroup, 0) + for _, c := range p.comments { + standaloneComments[c.Pos()] = c + } + + // next remove all lead and line comments from the overall comment map. + // This will give us comments which are standalone, comments which are not + // assigned to any kind of node. + ast.Walk(node, func(nn ast.Node) bool { + switch t := nn.(type) { + case *ast.LiteralType: + if t.LineComment != nil { + for _, comment := range t.LineComment.List { + if _, ok := standaloneComments[comment.Pos()]; ok { + delete(standaloneComments, comment.Pos()) + } + } + } + case *ast.ObjectItem: + if t.LeadComment != nil { + for _, comment := range t.LeadComment.List { + if _, ok := standaloneComments[comment.Pos()]; ok { + delete(standaloneComments, comment.Pos()) + } + } + } + + if t.LineComment != nil { + for _, comment := range t.LineComment.List { + if _, ok := standaloneComments[comment.Pos()]; ok { + delete(standaloneComments, comment.Pos()) + } + } + } + } + + return true + }) + + for _, c := range standaloneComments { + p.standaloneComments = append(p.standaloneComments, c) + } + + sort.Sort(ByPosition(p.standaloneComments)) + +} + +// output prints creates b printable HCL output and returns it. +func (p *printer) output(n interface{}) []byte { + var buf bytes.Buffer + + switch t := n.(type) { + case *ast.File: + return p.output(t.Node) + case *ast.ObjectList: + var index int + var nextItem token.Pos + var commented bool + for { + // TODO(arslan): refactor below comment printing, we have the same in objectType + for _, c := range p.standaloneComments { + for _, comment := range c.List { + if index != len(t.Items) { + nextItem = t.Items[index].Pos() + } else { + nextItem = token.Pos{Offset: infinity, Line: infinity} + } + + if comment.Pos().After(p.prev) && comment.Pos().Before(nextItem) { + // if we hit the end add newlines so we can print the comment + if index == len(t.Items) { + buf.Write([]byte{newline, newline}) + } + + buf.WriteString(comment.Text) + + buf.WriteByte(newline) + if index != len(t.Items) { + buf.WriteByte(newline) + } + } + } + } + + if index == len(t.Items) { + break + } + + buf.Write(p.output(t.Items[index])) + if !commented && index != len(t.Items)-1 { + buf.Write([]byte{newline, newline}) + } + index++ + } + case *ast.ObjectKey: + buf.WriteString(t.Token.Text) + case *ast.ObjectItem: + p.prev = t.Pos() + buf.Write(p.objectItem(t)) + case *ast.LiteralType: + buf.WriteString(t.Token.Text) + case *ast.ListType: + buf.Write(p.list(t)) + case *ast.ObjectType: + buf.Write(p.objectType(t)) + default: + fmt.Printf(" unknown type: %T\n", n) + } + + return buf.Bytes() +} + +// objectItem returns the printable HCL form of an object item. An object type +// starts with one/multiple keys and has a value. The value might be of any +// type. +func (p *printer) objectItem(o *ast.ObjectItem) []byte { + defer un(trace(p, fmt.Sprintf("ObjectItem: %s", o.Keys[0].Token.Text))) + var buf bytes.Buffer + + if o.LeadComment != nil { + for _, comment := range o.LeadComment.List { + buf.WriteString(comment.Text) + buf.WriteByte(newline) + } + } + + for i, k := range o.Keys { + buf.WriteString(k.Token.Text) + buf.WriteByte(blank) + + // reach end of key + if i == len(o.Keys)-1 && len(o.Keys) == 1 { + buf.WriteString("=") + buf.WriteByte(blank) + } + } + + buf.Write(p.output(o.Val)) + + if o.Val.Pos().Line == o.Keys[0].Pos().Line && o.LineComment != nil { + buf.WriteByte(blank) + for _, comment := range o.LineComment.List { + buf.WriteString(comment.Text) + } + } + + return buf.Bytes() +} + +// objectType returns the printable HCL form of an object type. An object type +// begins with a brace and ends with a brace. +func (p *printer) objectType(o *ast.ObjectType) []byte { + defer un(trace(p, "ObjectType")) + var buf bytes.Buffer + buf.WriteString("{") + buf.WriteByte(newline) + + var index int + var nextItem token.Pos + var commented bool + for { + // Print stand alone comments + for _, c := range p.standaloneComments { + for _, comment := range c.List { + // if we hit the end, last item should be the brace + if index != len(o.List.Items) { + nextItem = o.List.Items[index].Pos() + } else { + nextItem = o.Rbrace + } + + if comment.Pos().After(p.prev) && comment.Pos().Before(nextItem) { + // add newline if it's between other printed nodes + if index > 0 { + commented = true + buf.WriteByte(newline) + } + + buf.Write(p.indent([]byte(comment.Text))) + buf.WriteByte(newline) + if index != len(o.List.Items) { + buf.WriteByte(newline) // do not print on the end + } + } + } + } + + if index == len(o.List.Items) { + p.prev = o.Rbrace + break + } + + // check if we have adjacent one liner items. If yes we'll going to align + // the comments. + var aligned []*ast.ObjectItem + for _, item := range o.List.Items[index:] { + // we don't group one line lists + if len(o.List.Items) == 1 { + break + } + + // one means a oneliner with out any lead comment + // two means a oneliner with lead comment + // anything else might be something else + cur := lines(string(p.objectItem(item))) + if cur > 2 { + break + } + + curPos := item.Pos() + + nextPos := token.Pos{} + if index != len(o.List.Items)-1 { + nextPos = o.List.Items[index+1].Pos() + } + + prevPos := token.Pos{} + if index != 0 { + prevPos = o.List.Items[index-1].Pos() + } + + // fmt.Println("DEBUG ----------------") + // fmt.Printf("prev = %+v prevPos: %s\n", prev, prevPos) + // fmt.Printf("cur = %+v curPos: %s\n", cur, curPos) + // fmt.Printf("next = %+v nextPos: %s\n", next, nextPos) + + if curPos.Line+1 == nextPos.Line { + aligned = append(aligned, item) + index++ + continue + } + + if curPos.Line-1 == prevPos.Line { + aligned = append(aligned, item) + index++ + + // finish if we have a new line or comment next. This happens + // if the next item is not adjacent + if curPos.Line+1 != nextPos.Line { + break + } + continue + } + + break + } + + // put newlines if the items are between other non aligned items. + // newlines are also added if there is a standalone comment already, so + // check it too + if !commented && index != len(aligned) { + buf.WriteByte(newline) + } + + if len(aligned) >= 1 { + p.prev = aligned[len(aligned)-1].Pos() + + items := p.alignedItems(aligned) + buf.Write(p.indent(items)) + } else { + p.prev = o.List.Items[index].Pos() + + buf.Write(p.indent(p.objectItem(o.List.Items[index]))) + index++ + } + + buf.WriteByte(newline) + } + + buf.WriteString("}") + return buf.Bytes() +} + +func (p *printer) alignedItems(items []*ast.ObjectItem) []byte { + var buf bytes.Buffer + + // find the longest key and value length, needed for alignment + var longestKeyLen int // longest key length + var longestValLen int // longest value length + for _, item := range items { + key := len(item.Keys[0].Token.Text) + val := len(p.output(item.Val)) + + if key > longestKeyLen { + longestKeyLen = key + } + + if val > longestValLen { + longestValLen = val + } + } + + for i, item := range items { + if item.LeadComment != nil { + for _, comment := range item.LeadComment.List { + buf.WriteString(comment.Text) + buf.WriteByte(newline) + } + } + + for i, k := range item.Keys { + keyLen := len(k.Token.Text) + buf.WriteString(k.Token.Text) + for i := 0; i < longestKeyLen-keyLen+1; i++ { + buf.WriteByte(blank) + } + + // reach end of key + if i == len(item.Keys)-1 && len(item.Keys) == 1 { + buf.WriteString("=") + buf.WriteByte(blank) + } + } + + val := p.output(item.Val) + valLen := len(val) + buf.Write(val) + + if item.Val.Pos().Line == item.Keys[0].Pos().Line && item.LineComment != nil { + for i := 0; i < longestValLen-valLen+1; i++ { + buf.WriteByte(blank) + } + + for _, comment := range item.LineComment.List { + buf.WriteString(comment.Text) + } + } + + // do not print for the last item + if i != len(items)-1 { + buf.WriteByte(newline) + } + } + + return buf.Bytes() +} + +// list returns the printable HCL form of an list type. +func (p *printer) list(l *ast.ListType) []byte { + var buf bytes.Buffer + buf.WriteString("[") + + var longestLine int + for _, item := range l.List { + // for now we assume that the list only contains literal types + if lit, ok := item.(*ast.LiteralType); ok { + lineLen := len(lit.Token.Text) + if lineLen > longestLine { + longestLine = lineLen + } + } + } + + for i, item := range l.List { + if item.Pos().Line != l.Lbrack.Line { + // multiline list, add newline before we add each item + buf.WriteByte(newline) + // also indent each line + val := p.output(item) + curLen := len(val) + buf.Write(p.indent(val)) + buf.WriteString(",") + + if lit, ok := item.(*ast.LiteralType); ok && lit.LineComment != nil { + // if the next item doesn't have any comments, do not align + buf.WriteByte(blank) // align one space + if i != len(l.List)-1 { + if lit, ok := l.List[i+1].(*ast.LiteralType); ok && lit.LineComment != nil { + for i := 0; i < longestLine-curLen; i++ { + buf.WriteByte(blank) + } + } + } + + for _, comment := range lit.LineComment.List { + buf.WriteString(comment.Text) + } + } + + if i == len(l.List)-1 { + buf.WriteByte(newline) + } + } else { + buf.Write(p.output(item)) + if i != len(l.List)-1 { + buf.WriteString(",") + buf.WriteByte(blank) + } + } + + } + + buf.WriteString("]") + return buf.Bytes() +} + +// indent indents the lines of the given buffer for each non-empty line +func (p *printer) indent(buf []byte) []byte { + var prefix []byte + if p.cfg.SpacesWidth != 0 { + for i := 0; i < p.cfg.SpacesWidth; i++ { + prefix = append(prefix, blank) + } + } else { + prefix = []byte{tab} + } + + var res []byte + bol := true + for _, c := range buf { + if bol && c != '\n' { + res = append(res, prefix...) + } + res = append(res, c) + bol = c == '\n' + } + return res +} + +func lines(txt string) int { + endline := 1 + for i := 0; i < len(txt); i++ { + if txt[i] == '\n' { + endline++ + } + } + return endline +} + +// ---------------------------------------------------------------------------- +// Tracing support + +func (p *printer) printTrace(a ...interface{}) { + if !p.enableTrace { + return + } + + const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " + const n = len(dots) + i := 2 * p.indentTrace + for i > n { + fmt.Print(dots) + i -= n + } + // i <= n + fmt.Print(dots[0:i]) + fmt.Println(a...) +} + +func trace(p *printer, msg string) *printer { + p.printTrace(msg, "(") + p.indentTrace++ + return p +} + +// Usage pattern: defer un(trace(p, "...")) +func un(p *printer) { + p.indentTrace-- + p.printTrace(")") +} diff --git a/hcl/printer/printer.go b/hcl/printer/printer.go new file mode 100644 index 0000000..38266ad --- /dev/null +++ b/hcl/printer/printer.go @@ -0,0 +1,64 @@ +// Package printer implements printing of AST nodes to HCL format. +package printer + +import ( + "bytes" + "io" + "text/tabwriter" + + "github.com/hashicorp/hcl/hcl/ast" + "github.com/hashicorp/hcl/hcl/parser" +) + +var DefaultConfig = Config{ + SpacesWidth: 2, +} + +// A Config node controls the output of Fprint. +type Config struct { + SpacesWidth int // if set, it will use spaces instead of tabs for alignment +} + +func (c *Config) Fprint(output io.Writer, node ast.Node) error { + p := &printer{ + cfg: *c, + comments: make([]*ast.CommentGroup, 0), + standaloneComments: make([]*ast.CommentGroup, 0), + // enableTrace: true, + } + + p.collectComments(node) + + if _, err := output.Write(p.output(node)); err != nil { + return err + } + + // flush tabwriter, if any + var err error + if tw, _ := output.(*tabwriter.Writer); tw != nil { + err = tw.Flush() + } + + return err +} + +// Fprint "pretty-prints" an HCL node to output +// It calls Config.Fprint with default settings. +func Fprint(output io.Writer, node ast.Node) error { + return DefaultConfig.Fprint(output, node) +} + +// Format formats src HCL and returns the result. +func Format(src []byte) ([]byte, error) { + node, err := parser.Parse(src) + if err != nil { + return nil, err + } + + var buf bytes.Buffer + if err := DefaultConfig.Fprint(&buf, node); err != nil { + return nil, err + } + + return buf.Bytes(), nil +} diff --git a/hcl/printer/printer_test.go b/hcl/printer/printer_test.go new file mode 100644 index 0000000..86aa946 --- /dev/null +++ b/hcl/printer/printer_test.go @@ -0,0 +1,143 @@ +package printer + +import ( + "bytes" + "errors" + "flag" + "fmt" + "io/ioutil" + "path/filepath" + "testing" + + "github.com/hashicorp/hcl/hcl/parser" +) + +var update = flag.Bool("update", false, "update golden files") + +const ( + dataDir = "testdata" +) + +type entry struct { + source, golden string +} + +// Use go test -update to create/update the respective golden files. +var data = []entry{ + {"complexhcl.input", "complexhcl.golden"}, + {"list.input", "list.golden"}, + {"comment.input", "comment.golden"}, + {"comment_aligned.input", "comment_aligned.golden"}, + {"comment_standalone.input", "comment_standalone.golden"}, +} + +func TestFiles(t *testing.T) { + for _, e := range data { + source := filepath.Join(dataDir, e.source) + golden := filepath.Join(dataDir, e.golden) + check(t, source, golden) + } +} + +func check(t *testing.T, source, golden string) { + src, err := ioutil.ReadFile(source) + if err != nil { + t.Error(err) + return + } + + res, err := format(src) + if err != nil { + t.Error(err) + return + } + + // update golden files if necessary + if *update { + if err := ioutil.WriteFile(golden, res, 0644); err != nil { + t.Error(err) + } + return + } + + // get golden + gld, err := ioutil.ReadFile(golden) + if err != nil { + t.Error(err) + return + } + + // formatted source and golden must be the same + if err := diff(source, golden, res, gld); err != nil { + t.Error(err) + return + } +} + +// diff compares a and b. +func diff(aname, bname string, a, b []byte) error { + var buf bytes.Buffer // holding long error message + + // compare lengths + if len(a) != len(b) { + fmt.Fprintf(&buf, "\nlength changed: len(%s) = %d, len(%s) = %d", aname, len(a), bname, len(b)) + } + + // compare contents + line := 1 + offs := 1 + for i := 0; i < len(a) && i < len(b); i++ { + ch := a[i] + if ch != b[i] { + fmt.Fprintf(&buf, "\n%s:%d:%d: %s", aname, line, i-offs+1, lineAt(a, offs)) + fmt.Fprintf(&buf, "\n%s:%d:%d: %s", bname, line, i-offs+1, lineAt(b, offs)) + fmt.Fprintf(&buf, "\n\n") + break + } + if ch == '\n' { + line++ + offs = i + 1 + } + } + + if buf.Len() > 0 { + return errors.New(buf.String()) + } + return nil +} + +// format parses src, prints the corresponding AST, verifies the resulting +// src is syntactically correct, and returns the resulting src or an error +// if any. +func format(src []byte) ([]byte, error) { + // parse src + node, err := parser.Parse(src) + if err != nil { + return nil, fmt.Errorf("parse: %s\n%s", err, src) + } + + var buf bytes.Buffer + + cfg := &Config{} + if err := cfg.Fprint(&buf, node); err != nil { + return nil, fmt.Errorf("print: %s", err) + } + + // make sure formatted output is syntactically correct + res := buf.Bytes() + + if _, err := parser.Parse(src); err != nil { + return nil, fmt.Errorf("parse: %s\n%s", err, src) + } + + return res, nil +} + +// lineAt returns the line in text starting at offset offs. +func lineAt(text []byte, offs int) []byte { + i := offs + for i < len(text) && text[i] != '\n' { + i++ + } + return text[offs:i] +} diff --git a/hcl/printer/testdata/comment.golden b/hcl/printer/testdata/comment.golden new file mode 100644 index 0000000..8263733 --- /dev/null +++ b/hcl/printer/testdata/comment.golden @@ -0,0 +1,36 @@ +// A standalone comment is a comment which is not attached to any kind of node + +// This comes from Terraform, as a test +variable "foo" { + # Standalone comment should be still here + + default = "bar" + description = "bar" # yooo +} + +/* This is a multi line standalone +comment*/ + +// fatih arslan +/* This is a developer test +account and a multine comment */ +developer = ["fatih", "arslan"] // fatih arslan + +# One line here +numbers = [1, 2] // another line here + +# Another comment +variable = { + description = "bar" # another yooo + + foo = { + # Nested standalone + + bar = "fatih" + } +} + +// lead comment +foo = { + bar = "fatih" // line comment 2 +} // line comment 3 \ No newline at end of file diff --git a/hcl/printer/testdata/comment.input b/hcl/printer/testdata/comment.input new file mode 100644 index 0000000..57c37ac --- /dev/null +++ b/hcl/printer/testdata/comment.input @@ -0,0 +1,37 @@ +// A standalone comment is a comment which is not attached to any kind of node + + // This comes from Terraform, as a test +variable "foo" { + # Standalone comment should be still here + + default = "bar" + description = "bar" # yooo +} + +/* This is a multi line standalone +comment*/ + + +// fatih arslan +/* This is a developer test +account and a multine comment */ +developer = [ "fatih", "arslan"] // fatih arslan + +# One line here +numbers = [1,2] // another line here + + # Another comment +variable = { + description = "bar" # another yooo + foo { + # Nested standalone + + bar = "fatih" + } +} + + // lead comment +foo { + bar = "fatih" // line comment 2 +} // line comment 3 + diff --git a/hcl/printer/testdata/comment_aligned.golden b/hcl/printer/testdata/comment_aligned.golden new file mode 100644 index 0000000..da8f9d0 --- /dev/null +++ b/hcl/printer/testdata/comment_aligned.golden @@ -0,0 +1,25 @@ +aligned = { + # We have some aligned items below + foo = "fatih" # yoo1 + default = "bar" # yoo2 + bar = "bar and foo" # yoo3 + + default = { + bar = "example" + } + + #deneme arslan + fatih = ["fatih"] # yoo4 + + #fatih arslan + fatiharslan = ["arslan"] // yoo5 + + default = { + bar = "example" + } + + security_groups = [ + "foo", # kenya 1 + "${aws_security_group.firewall.foo}", # kenya 2 + ] +} \ No newline at end of file diff --git a/hcl/printer/testdata/comment_aligned.input b/hcl/printer/testdata/comment_aligned.input new file mode 100644 index 0000000..1c6ba0d --- /dev/null +++ b/hcl/printer/testdata/comment_aligned.input @@ -0,0 +1,21 @@ +aligned { +# We have some aligned items below + foo = "fatih" # yoo1 + default = "bar" # yoo2 + bar = "bar and foo" # yoo3 + default = { + bar = "example" + } + #deneme arslan + fatih = ["fatih"] # yoo4 + #fatih arslan + fatiharslan = ["arslan"] // yoo5 + default = { + bar = "example" + } + +security_groups = [ + "foo", # kenya 1 + "${aws_security_group.firewall.foo}", # kenya 2 +] +} diff --git a/hcl/printer/testdata/comment_standalone.golden b/hcl/printer/testdata/comment_standalone.golden new file mode 100644 index 0000000..a1d28ef --- /dev/null +++ b/hcl/printer/testdata/comment_standalone.golden @@ -0,0 +1,16 @@ +// A standalone comment + +aligned = { + # Standalone 1 + + a = "bar" # yoo1 + default = "bar" # yoo2 + + # Standalone 2 +} + +# Standalone 3 + +numbers = [1, 2] // another line here + +# Standalone 4 diff --git a/hcl/printer/testdata/comment_standalone.input b/hcl/printer/testdata/comment_standalone.input new file mode 100644 index 0000000..4436cb1 --- /dev/null +++ b/hcl/printer/testdata/comment_standalone.input @@ -0,0 +1,16 @@ +// A standalone comment + +aligned { + # Standalone 1 + + a = "bar" # yoo1 + default = "bar" # yoo2 + + # Standalone 2 +} + + # Standalone 3 + +numbers = [1,2] // another line here + + # Standalone 4 diff --git a/hcl/printer/testdata/complexhcl.golden b/hcl/printer/testdata/complexhcl.golden new file mode 100644 index 0000000..1639944 --- /dev/null +++ b/hcl/printer/testdata/complexhcl.golden @@ -0,0 +1,44 @@ +variable "foo" { + default = "bar" + description = "bar" +} + +developer = ["fatih", "arslan"] + +provider "aws" { + access_key = "foo" + secret_key = "bar" +} + +provider "do" { + api_key = "${var.foo}" +} + +resource "aws_security_group" "firewall" { + count = 5 +} + +resource aws_instance "web" { + ami = "${var.foo}" + + security_groups = [ + "foo", + "${aws_security_group.firewall.foo}", + ] + + network_interface = { + device_index = 0 + description = "Main network interface" + } +} + +resource "aws_instance" "db" { + security_groups = "${aws_security_group.firewall.*.id}" + VPC = "foo" + + depends_on = ["aws_instance.web"] +} + +output "web_ip" { + value = "${aws_instance.web.private_ip}" +} \ No newline at end of file diff --git a/hcl/printer/testdata/complexhcl.input b/hcl/printer/testdata/complexhcl.input new file mode 100644 index 0000000..899f5fc --- /dev/null +++ b/hcl/printer/testdata/complexhcl.input @@ -0,0 +1,44 @@ +variable "foo" { + default = "bar" + description = "bar" +} + +developer = [ "fatih", "arslan"] + +provider "aws" { + access_key ="foo" + secret_key = "bar" +} + + provider "do" { + api_key = "${var.foo}" +} + +resource "aws_security_group" "firewall" { + count = 5 + } + + resource aws_instance "web" { + ami = "${var.foo}" + security_groups = [ + "foo", + "${aws_security_group.firewall.foo}" + ] + + network_interface { + device_index = 0 + description = "Main network interface" + } + } + +resource "aws_instance" "db" { + security_groups = "${aws_security_group.firewall.*.id}" + VPC = "foo" + + depends_on = ["aws_instance.web"] +} + +output "web_ip" { + + value="${aws_instance.web.private_ip}" +} diff --git a/hcl/printer/testdata/list.golden b/hcl/printer/testdata/list.golden new file mode 100644 index 0000000..385912f --- /dev/null +++ b/hcl/printer/testdata/list.golden @@ -0,0 +1,27 @@ +foo = ["fatih", "arslan"] + +foo = ["bar", "qaz"] + +foo = ["zeynep", + "arslan", +] + +foo = ["fatih", "zeynep", + "arslan", +] + +foo = [ + "vim-go", + "golang", + "hcl", +] + +foo = [] + +foo = [1, 2, 3, 4] + +foo = [ + "kenya", + "ethiopia", + "columbia", +] \ No newline at end of file diff --git a/hcl/printer/testdata/list.input b/hcl/printer/testdata/list.input new file mode 100644 index 0000000..dd68fdd --- /dev/null +++ b/hcl/printer/testdata/list.input @@ -0,0 +1,21 @@ +foo = ["fatih", "arslan" ] + +foo = [ "bar", "qaz", ] + +foo = [ "zeynep", +"arslan", ] + +foo = ["fatih", "zeynep", +"arslan", ] + +foo = [ + "vim-go", + "golang", "hcl"] + +foo = [] + +foo = [1, 2,3, 4] + +foo = [ + "kenya", "ethiopia", + "columbia"] diff --git a/hcl/scanner/scanner.go b/hcl/scanner/scanner.go new file mode 100644 index 0000000..32a1864 --- /dev/null +++ b/hcl/scanner/scanner.go @@ -0,0 +1,529 @@ +// Package scanner implements a scanner for HCL (HashiCorp Configuration +// Language) source text. +package scanner + +import ( + "bytes" + "fmt" + "os" + "unicode" + "unicode/utf8" + + "github.com/hashicorp/hcl/hcl/token" +) + +// eof represents a marker rune for the end of the reader. +const eof = rune(0) + +// Scanner defines a lexical scanner +type Scanner struct { + buf *bytes.Buffer // Source buffer for advancing and scanning + src []byte // Source buffer for immutable access + + // Source Position + srcPos token.Pos // current position + prevPos token.Pos // previous position, used for peek() method + + lastCharLen int // length of last character in bytes + lastLineLen int // length of last line in characters (for correct column reporting) + + tokStart int // token text start position + tokEnd int // token text end position + + // Error is called for each error encountered. If no Error + // function is set, the error is reported to os.Stderr. + Error func(pos token.Pos, msg string) + + // ErrorCount is incremented by one for each error encountered. + ErrorCount int + + // tokPos is the start position of most recently scanned token; set by + // Scan. The Filename field is always left untouched by the Scanner. If + // an error is reported (via Error) and Position is invalid, the scanner is + // not inside a token. + tokPos token.Pos +} + +// New creates and initializes a new instance of Scanner using src as +// its source content. +func New(src []byte) *Scanner { + // even though we accept a src, we read from a io.Reader compatible type + // (*bytes.Buffer). So in the future we might easily change it to streaming + // read. + b := bytes.NewBuffer(src) + s := &Scanner{ + buf: b, + src: src, + } + + // srcPosition always starts with 1 + s.srcPos.Line = 1 + return s +} + +// next reads the next rune from the bufferred reader. Returns the rune(0) if +// an error occurs (or io.EOF is returned). +func (s *Scanner) next() rune { + ch, size, err := s.buf.ReadRune() + if err != nil { + // advance for error reporting + s.srcPos.Column++ + s.srcPos.Offset += size + s.lastCharLen = size + return eof + } + + if ch == utf8.RuneError && size == 1 { + s.srcPos.Column++ + s.srcPos.Offset += size + s.lastCharLen = size + s.err("illegal UTF-8 encoding") + return ch + } + + // remember last position + s.prevPos = s.srcPos + + s.srcPos.Column++ + s.lastCharLen = size + s.srcPos.Offset += size + + if ch == '\n' { + s.srcPos.Line++ + s.lastLineLen = s.srcPos.Column + s.srcPos.Column = 0 + } + + // debug + // fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column) + return ch +} + +// unread unreads the previous read Rune and updates the source position +func (s *Scanner) unread() { + if err := s.buf.UnreadRune(); err != nil { + panic(err) // this is user fault, we should catch it + } + s.srcPos = s.prevPos // put back last position +} + +// peek returns the next rune without advancing the reader. +func (s *Scanner) peek() rune { + peek, _, err := s.buf.ReadRune() + if err != nil { + return eof + } + + s.buf.UnreadRune() + return peek +} + +// Scan scans the next token and returns the token. +func (s *Scanner) Scan() token.Token { + ch := s.next() + + // skip white space + for isWhitespace(ch) { + ch = s.next() + } + + var tok token.Type + + // token text markings + s.tokStart = s.srcPos.Offset - s.lastCharLen + + // token position, initial next() is moving the offset by one(size of rune + // actually), though we are interested with the starting point + s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen + if s.srcPos.Column > 0 { + // common case: last character was not a '\n' + s.tokPos.Line = s.srcPos.Line + s.tokPos.Column = s.srcPos.Column + } else { + // last character was a '\n' + // (we cannot be at the beginning of the source + // since we have called next() at least once) + s.tokPos.Line = s.srcPos.Line - 1 + s.tokPos.Column = s.lastLineLen + } + + switch { + case isLetter(ch): + tok = token.IDENT + lit := s.scanIdentifier() + if lit == "true" || lit == "false" { + tok = token.BOOL + } + case isDecimal(ch): + tok = s.scanNumber(ch) + default: + switch ch { + case eof: + tok = token.EOF + case '"': + tok = token.STRING + s.scanString() + case '#', '/': + tok = token.COMMENT + s.scanComment(ch) + case '.': + tok = token.PERIOD + ch = s.peek() + if isDecimal(ch) { + tok = token.FLOAT + ch = s.scanMantissa(ch) + ch = s.scanExponent(ch) + } + case '[': + tok = token.LBRACK + case ']': + tok = token.RBRACK + case '{': + tok = token.LBRACE + case '}': + tok = token.RBRACE + case ',': + tok = token.COMMA + case '=': + tok = token.ASSIGN + case '+': + tok = token.ADD + case '-': + if isDecimal(s.peek()) { + ch := s.next() + tok = s.scanNumber(ch) + } else { + tok = token.SUB + } + default: + s.err("illegal char") + } + } + + // finish token ending + s.tokEnd = s.srcPos.Offset + + // create token literal + var tokenText string + if s.tokStart >= 0 { + tokenText = string(s.src[s.tokStart:s.tokEnd]) + } + s.tokStart = s.tokEnd // ensure idempotency of tokenText() call + + return token.Token{ + Type: tok, + Pos: s.tokPos, + Text: tokenText, + } +} + +func (s *Scanner) scanComment(ch rune) { + // single line comments + if ch == '#' || (ch == '/' && s.peek() != '*') { + ch = s.next() + for ch != '\n' && ch >= 0 { + ch = s.next() + } + s.unread() + return + } + + // be sure we get the character after /* This allows us to find comment's + // that are not erminated + if ch == '/' { + s.next() + ch = s.next() // read character after "/*" + } + + // look for /* - style comments + for { + if ch < 0 || ch == eof { + s.err("comment not terminated") + break + } + + ch0 := ch + ch = s.next() + if ch0 == '*' && ch == '/' { + break + } + } +} + +// scanNumber scans a HCL number definition starting with the given rune +func (s *Scanner) scanNumber(ch rune) token.Type { + if ch == '0' { + // check for hexadecimal, octal or float + ch = s.next() + if ch == 'x' || ch == 'X' { + // hexadecimal + ch = s.next() + found := false + for isHexadecimal(ch) { + ch = s.next() + found = true + } + + if !found { + s.err("illegal hexadecimal number") + } + + if ch != eof { + s.unread() + } + + return token.NUMBER + } + + // now it's either something like: 0421(octal) or 0.1231(float) + illegalOctal := false + for isDecimal(ch) { + ch = s.next() + if ch == '8' || ch == '9' { + // this is just a possibility. For example 0159 is illegal, but + // 0159.23 is valid. So we mark a possible illegal octal. If + // the next character is not a period, we'll print the error. + illegalOctal = true + } + } + + // literals of form 01e10 are treates as Numbers in HCL, which differs from Go. + if ch == 'e' || ch == 'E' { + ch = s.scanExponent(ch) + return token.NUMBER + } + + if ch == '.' { + ch = s.scanFraction(ch) + + if ch == 'e' || ch == 'E' { + ch = s.next() + ch = s.scanExponent(ch) + } + return token.FLOAT + } + + if illegalOctal { + s.err("illegal octal number") + } + + if ch != eof { + s.unread() + } + return token.NUMBER + } + + s.scanMantissa(ch) + ch = s.next() // seek forward + // literals of form 1e10 are treates as Numbers in HCL, which differs from Go. + if ch == 'e' || ch == 'E' { + ch = s.scanExponent(ch) + return token.NUMBER + } + + if ch == '.' { + ch = s.scanFraction(ch) + if ch == 'e' || ch == 'E' { + ch = s.next() + ch = s.scanExponent(ch) + } + return token.FLOAT + } + + if ch != eof { + s.unread() + } + return token.NUMBER +} + +// scanMantissa scans the mantissa begining from the rune. It returns the next +// non decimal rune. It's used to determine wheter it's a fraction or exponent. +func (s *Scanner) scanMantissa(ch rune) rune { + scanned := false + for isDecimal(ch) { + ch = s.next() + scanned = true + } + + if scanned && ch != eof { + s.unread() + } + return ch +} + +// scanFraction scans the fraction after the '.' rune +func (s *Scanner) scanFraction(ch rune) rune { + if ch == '.' { + ch = s.peek() // we peek just to see if we can move forward + ch = s.scanMantissa(ch) + } + return ch +} + +// scanExponent scans the remaining parts of an exponent after the 'e' or 'E' +// rune. +func (s *Scanner) scanExponent(ch rune) rune { + if ch == 'e' || ch == 'E' { + ch = s.next() + if ch == '-' || ch == '+' { + ch = s.next() + } + ch = s.scanMantissa(ch) + } + return ch +} + +// scanString scans a quoted string +func (s *Scanner) scanString() { + for { + // '"' opening already consumed + // read character after quote + ch := s.next() + + if ch == '\n' || ch < 0 || ch == eof { + s.err("literal not terminated") + return + } + + if ch == '"' { + break + } + + if ch == '\\' { + s.scanEscape() + } + } + + return +} + +// scanEscape scans an escape sequence +func (s *Scanner) scanEscape() rune { + // http://en.cppreference.com/w/cpp/language/escape + ch := s.next() // read character after '/' + switch ch { + case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"': + // nothing to do + case '0', '1', '2', '3', '4', '5', '6', '7': + // octal notation + ch = s.scanDigits(ch, 8, 3) + case 'x': + // hexademical notation + ch = s.scanDigits(s.next(), 16, 2) + case 'u': + // universal character name + ch = s.scanDigits(s.next(), 16, 4) + case 'U': + // universal character name + ch = s.scanDigits(s.next(), 16, 8) + default: + s.err("illegal char escape") + } + return ch +} + +// scanDigits scans a rune with the given base for n times. For example an +// octal notation \184 would yield in scanDigits(ch, 8, 3) +func (s *Scanner) scanDigits(ch rune, base, n int) rune { + for n > 0 && digitVal(ch) < base { + ch = s.next() + n-- + } + if n > 0 { + s.err("illegal char escape") + } + + // we scanned all digits, put the last non digit char back + s.unread() + return ch +} + +// scanIdentifier scans an identifier and returns the literal string +func (s *Scanner) scanIdentifier() string { + offs := s.srcPos.Offset - s.lastCharLen + ch := s.next() + for isLetter(ch) || isDigit(ch) { + ch = s.next() + } + + if ch != eof { + s.unread() // we got identifier, put back latest char + } + + return string(s.src[offs:s.srcPos.Offset]) +} + +// recentPosition returns the position of the character immediately after the +// character or token returned by the last call to Scan. +func (s *Scanner) recentPosition() (pos token.Pos) { + pos.Offset = s.srcPos.Offset - s.lastCharLen + switch { + case s.srcPos.Column > 0: + // common case: last character was not a '\n' + pos.Line = s.srcPos.Line + pos.Column = s.srcPos.Column + case s.lastLineLen > 0: + // last character was a '\n' + // (we cannot be at the beginning of the source + // since we have called next() at least once) + pos.Line = s.srcPos.Line - 1 + pos.Column = s.lastLineLen + default: + // at the beginning of the source + pos.Line = 1 + pos.Column = 1 + } + return +} + +// err prints the error of any scanning to s.Error function. If the function is +// not defined, by default it prints them to os.Stderr +func (s *Scanner) err(msg string) { + s.ErrorCount++ + pos := s.recentPosition() + + if s.Error != nil { + s.Error(pos, msg) + return + } + + fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg) +} + +// isHexadecimal returns true if the given rune is a letter +func isLetter(ch rune) bool { + return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch) +} + +// isHexadecimal returns true if the given rune is a decimal digit +func isDigit(ch rune) bool { + return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch) +} + +// isHexadecimal returns true if the given rune is a decimal number +func isDecimal(ch rune) bool { + return '0' <= ch && ch <= '9' +} + +// isHexadecimal returns true if the given rune is an hexadecimal number +func isHexadecimal(ch rune) bool { + return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F' +} + +// isWhitespace returns true if the rune is a space, tab, newline or carriage return +func isWhitespace(ch rune) bool { + return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' +} + +// digitVal returns the integer value of a given octal,decimal or hexadecimal rune +func digitVal(ch rune) int { + switch { + case '0' <= ch && ch <= '9': + return int(ch - '0') + case 'a' <= ch && ch <= 'f': + return int(ch - 'a' + 10) + case 'A' <= ch && ch <= 'F': + return int(ch - 'A' + 10) + } + return 16 // larger than any legal digit val +} diff --git a/hcl/scanner/scanner_test.go b/hcl/scanner/scanner_test.go new file mode 100644 index 0000000..9f27b0d --- /dev/null +++ b/hcl/scanner/scanner_test.go @@ -0,0 +1,486 @@ +package scanner + +import ( + "bytes" + "fmt" + "testing" + + "github.com/hashicorp/hcl/hcl/token" +) + +var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + +type tokenPair struct { + tok token.Type + text string +} + +var tokenLists = map[string][]tokenPair{ + "comment": []tokenPair{ + {token.COMMENT, "//"}, + {token.COMMENT, "////"}, + {token.COMMENT, "// comment"}, + {token.COMMENT, "// /* comment */"}, + {token.COMMENT, "// // comment //"}, + {token.COMMENT, "//" + f100}, + {token.COMMENT, "#"}, + {token.COMMENT, "##"}, + {token.COMMENT, "# comment"}, + {token.COMMENT, "# /* comment */"}, + {token.COMMENT, "# # comment #"}, + {token.COMMENT, "#" + f100}, + {token.COMMENT, "/**/"}, + {token.COMMENT, "/***/"}, + {token.COMMENT, "/* comment */"}, + {token.COMMENT, "/* // comment */"}, + {token.COMMENT, "/* /* comment */"}, + {token.COMMENT, "/*\n comment\n*/"}, + {token.COMMENT, "/*" + f100 + "*/"}, + }, + "operator": []tokenPair{ + {token.LBRACK, "["}, + {token.LBRACE, "{"}, + {token.COMMA, ","}, + {token.PERIOD, "."}, + {token.RBRACK, "]"}, + {token.RBRACE, "}"}, + {token.ASSIGN, "="}, + {token.ADD, "+"}, + {token.SUB, "-"}, + }, + "bool": []tokenPair{ + {token.BOOL, "true"}, + {token.BOOL, "false"}, + }, + "ident": []tokenPair{ + {token.IDENT, "a"}, + {token.IDENT, "a0"}, + {token.IDENT, "foobar"}, + {token.IDENT, "abc123"}, + {token.IDENT, "LGTM"}, + {token.IDENT, "_"}, + {token.IDENT, "_abc123"}, + {token.IDENT, "abc123_"}, + {token.IDENT, "_abc_123_"}, + {token.IDENT, "_äöü"}, + {token.IDENT, "_本"}, + {token.IDENT, "äöü"}, + {token.IDENT, "本"}, + {token.IDENT, "a۰۱۸"}, + {token.IDENT, "foo६४"}, + {token.IDENT, "bar9876"}, + }, + "string": []tokenPair{ + {token.STRING, `" "`}, + {token.STRING, `"a"`}, + {token.STRING, `"本"`}, + {token.STRING, `"\a"`}, + {token.STRING, `"\b"`}, + {token.STRING, `"\f"`}, + {token.STRING, `"\n"`}, + {token.STRING, `"\r"`}, + {token.STRING, `"\t"`}, + {token.STRING, `"\v"`}, + {token.STRING, `"\""`}, + {token.STRING, `"\000"`}, + {token.STRING, `"\777"`}, + {token.STRING, `"\x00"`}, + {token.STRING, `"\xff"`}, + {token.STRING, `"\u0000"`}, + {token.STRING, `"\ufA16"`}, + {token.STRING, `"\U00000000"`}, + {token.STRING, `"\U0000ffAB"`}, + {token.STRING, `"` + f100 + `"`}, + }, + "number": []tokenPair{ + {token.NUMBER, "0"}, + {token.NUMBER, "1"}, + {token.NUMBER, "9"}, + {token.NUMBER, "42"}, + {token.NUMBER, "1234567890"}, + {token.NUMBER, "00"}, + {token.NUMBER, "01"}, + {token.NUMBER, "07"}, + {token.NUMBER, "042"}, + {token.NUMBER, "01234567"}, + {token.NUMBER, "0x0"}, + {token.NUMBER, "0x1"}, + {token.NUMBER, "0xf"}, + {token.NUMBER, "0x42"}, + {token.NUMBER, "0x123456789abcDEF"}, + {token.NUMBER, "0x" + f100}, + {token.NUMBER, "0X0"}, + {token.NUMBER, "0X1"}, + {token.NUMBER, "0XF"}, + {token.NUMBER, "0X42"}, + {token.NUMBER, "0X123456789abcDEF"}, + {token.NUMBER, "0X" + f100}, + {token.NUMBER, "0e0"}, + {token.NUMBER, "1e0"}, + {token.NUMBER, "42e0"}, + {token.NUMBER, "01234567890e0"}, + {token.NUMBER, "0E0"}, + {token.NUMBER, "1E0"}, + {token.NUMBER, "42E0"}, + {token.NUMBER, "01234567890E0"}, + {token.NUMBER, "0e+10"}, + {token.NUMBER, "1e-10"}, + {token.NUMBER, "42e+10"}, + {token.NUMBER, "01234567890e-10"}, + {token.NUMBER, "0E+10"}, + {token.NUMBER, "1E-10"}, + {token.NUMBER, "42E+10"}, + {token.NUMBER, "01234567890E-10"}, + {token.NUMBER, "-0"}, + {token.NUMBER, "-1"}, + {token.NUMBER, "-9"}, + {token.NUMBER, "-42"}, + {token.NUMBER, "-1234567890"}, + {token.NUMBER, "-00"}, + {token.NUMBER, "-01"}, + {token.NUMBER, "-07"}, + {token.NUMBER, "-042"}, + {token.NUMBER, "-01234567"}, + {token.NUMBER, "-0x0"}, + {token.NUMBER, "-0x1"}, + {token.NUMBER, "-0xf"}, + {token.NUMBER, "-0x42"}, + {token.NUMBER, "-0x123456789abcDEF"}, + {token.NUMBER, "-0x" + f100}, + {token.NUMBER, "-0X0"}, + {token.NUMBER, "-0X1"}, + {token.NUMBER, "-0XF"}, + {token.NUMBER, "-0X42"}, + {token.NUMBER, "-0X123456789abcDEF"}, + {token.NUMBER, "-0X" + f100}, + {token.NUMBER, "-0e0"}, + {token.NUMBER, "-1e0"}, + {token.NUMBER, "-42e0"}, + {token.NUMBER, "-01234567890e0"}, + {token.NUMBER, "-0E0"}, + {token.NUMBER, "-1E0"}, + {token.NUMBER, "-42E0"}, + {token.NUMBER, "-01234567890E0"}, + {token.NUMBER, "-0e+10"}, + {token.NUMBER, "-1e-10"}, + {token.NUMBER, "-42e+10"}, + {token.NUMBER, "-01234567890e-10"}, + {token.NUMBER, "-0E+10"}, + {token.NUMBER, "-1E-10"}, + {token.NUMBER, "-42E+10"}, + {token.NUMBER, "-01234567890E-10"}, + }, + "float": []tokenPair{ + {token.FLOAT, "0."}, + {token.FLOAT, "1."}, + {token.FLOAT, "42."}, + {token.FLOAT, "01234567890."}, + {token.FLOAT, ".0"}, + {token.FLOAT, ".1"}, + {token.FLOAT, ".42"}, + {token.FLOAT, ".0123456789"}, + {token.FLOAT, "0.0"}, + {token.FLOAT, "1.0"}, + {token.FLOAT, "42.0"}, + {token.FLOAT, "01234567890.0"}, + {token.FLOAT, "01.8e0"}, + {token.FLOAT, "1.4e0"}, + {token.FLOAT, "42.2e0"}, + {token.FLOAT, "01234567890.12e0"}, + {token.FLOAT, "0.E0"}, + {token.FLOAT, "1.12E0"}, + {token.FLOAT, "42.123E0"}, + {token.FLOAT, "01234567890.213E0"}, + {token.FLOAT, "0.2e+10"}, + {token.FLOAT, "1.2e-10"}, + {token.FLOAT, "42.54e+10"}, + {token.FLOAT, "01234567890.98e-10"}, + {token.FLOAT, "0.1E+10"}, + {token.FLOAT, "1.1E-10"}, + {token.FLOAT, "42.1E+10"}, + {token.FLOAT, "01234567890.1E-10"}, + {token.FLOAT, "-0.0"}, + {token.FLOAT, "-1.0"}, + {token.FLOAT, "-42.0"}, + {token.FLOAT, "-01234567890.0"}, + {token.FLOAT, "-01.8e0"}, + {token.FLOAT, "-1.4e0"}, + {token.FLOAT, "-42.2e0"}, + {token.FLOAT, "-01234567890.12e0"}, + {token.FLOAT, "-0.E0"}, + {token.FLOAT, "-1.12E0"}, + {token.FLOAT, "-42.123E0"}, + {token.FLOAT, "-01234567890.213E0"}, + {token.FLOAT, "-0.2e+10"}, + {token.FLOAT, "-1.2e-10"}, + {token.FLOAT, "-42.54e+10"}, + {token.FLOAT, "-01234567890.98e-10"}, + {token.FLOAT, "-0.1E+10"}, + {token.FLOAT, "-1.1E-10"}, + {token.FLOAT, "-42.1E+10"}, + {token.FLOAT, "-01234567890.1E-10"}, + }, +} + +var orderedTokenLists = []string{ + "comment", + "operator", + "bool", + "ident", + "string", + "number", + "float", +} + +func TestPosition(t *testing.T) { + // create artifical source code + buf := new(bytes.Buffer) + + for _, listName := range orderedTokenLists { + for _, ident := range tokenLists[listName] { + fmt.Fprintf(buf, "\t\t\t\t%s\n", ident.text) + } + } + + s := New(buf.Bytes()) + + pos := token.Pos{"", 4, 1, 5} + s.Scan() + for _, listName := range orderedTokenLists { + + for _, k := range tokenLists[listName] { + curPos := s.tokPos + // fmt.Printf("[%q] s = %+v:%+v\n", k.text, curPos.Offset, curPos.Column) + + if curPos.Offset != pos.Offset { + t.Fatalf("offset = %d, want %d for %q", curPos.Offset, pos.Offset, k.text) + } + if curPos.Line != pos.Line { + t.Fatalf("line = %d, want %d for %q", curPos.Line, pos.Line, k.text) + } + if curPos.Column != pos.Column { + t.Fatalf("column = %d, want %d for %q", curPos.Column, pos.Column, k.text) + } + pos.Offset += 4 + len(k.text) + 1 // 4 tabs + token bytes + newline + pos.Line += countNewlines(k.text) + 1 // each token is on a new line + s.Scan() + } + } + // make sure there were no token-internal errors reported by scanner + if s.ErrorCount != 0 { + t.Errorf("%d errors", s.ErrorCount) + } +} + +func TestComment(t *testing.T) { + testTokenList(t, tokenLists["comment"]) +} + +func TestOperator(t *testing.T) { + testTokenList(t, tokenLists["operator"]) +} + +func TestBool(t *testing.T) { + testTokenList(t, tokenLists["bool"]) +} + +func TestIdent(t *testing.T) { + testTokenList(t, tokenLists["ident"]) +} + +func TestString(t *testing.T) { + testTokenList(t, tokenLists["string"]) +} + +func TestNumber(t *testing.T) { + testTokenList(t, tokenLists["number"]) +} + +func TestFloat(t *testing.T) { + testTokenList(t, tokenLists["float"]) +} + +func TestRealExample(t *testing.T) { + complexHCL := `// This comes from Terraform, as a test + variable "foo" { + default = "bar" + description = "bar" + } + + provider "aws" { + access_key = "foo" + secret_key = "bar" + } + + resource "aws_security_group" "firewall" { + count = 5 + } + + resource aws_instance "web" { + ami = "${var.foo}" + security_groups = [ + "foo", + "${aws_security_group.firewall.foo}" + ] + + network_interface { + device_index = 0 + description = "Main network interface" + } + }` + + literals := []struct { + tokenType token.Type + literal string + }{ + {token.COMMENT, `// This comes from Terraform, as a test`}, + {token.IDENT, `variable`}, + {token.STRING, `"foo"`}, + {token.LBRACE, `{`}, + {token.IDENT, `default`}, + {token.ASSIGN, `=`}, + {token.STRING, `"bar"`}, + {token.IDENT, `description`}, + {token.ASSIGN, `=`}, + {token.STRING, `"bar"`}, + {token.RBRACE, `}`}, + {token.IDENT, `provider`}, + {token.STRING, `"aws"`}, + {token.LBRACE, `{`}, + {token.IDENT, `access_key`}, + {token.ASSIGN, `=`}, + {token.STRING, `"foo"`}, + {token.IDENT, `secret_key`}, + {token.ASSIGN, `=`}, + {token.STRING, `"bar"`}, + {token.RBRACE, `}`}, + {token.IDENT, `resource`}, + {token.STRING, `"aws_security_group"`}, + {token.STRING, `"firewall"`}, + {token.LBRACE, `{`}, + {token.IDENT, `count`}, + {token.ASSIGN, `=`}, + {token.NUMBER, `5`}, + {token.RBRACE, `}`}, + {token.IDENT, `resource`}, + {token.IDENT, `aws_instance`}, + {token.STRING, `"web"`}, + {token.LBRACE, `{`}, + {token.IDENT, `ami`}, + {token.ASSIGN, `=`}, + {token.STRING, `"${var.foo}"`}, + {token.IDENT, `security_groups`}, + {token.ASSIGN, `=`}, + {token.LBRACK, `[`}, + {token.STRING, `"foo"`}, + {token.COMMA, `,`}, + {token.STRING, `"${aws_security_group.firewall.foo}"`}, + {token.RBRACK, `]`}, + {token.IDENT, `network_interface`}, + {token.LBRACE, `{`}, + {token.IDENT, `device_index`}, + {token.ASSIGN, `=`}, + {token.NUMBER, `0`}, + {token.IDENT, `description`}, + {token.ASSIGN, `=`}, + {token.STRING, `"Main network interface"`}, + {token.RBRACE, `}`}, + {token.RBRACE, `}`}, + {token.EOF, ``}, + } + + s := New([]byte(complexHCL)) + for _, l := range literals { + tok := s.Scan() + if l.tokenType != tok.Type { + t.Errorf("got: %s want %s for %s\n", tok, l.tokenType, tok.String()) + } + + if l.literal != tok.Text { + t.Errorf("got: %s want %s\n", tok, l.literal) + } + } + +} + +func TestError(t *testing.T) { + testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL) + testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL) + + testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", token.IDENT) + testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", token.IDENT) + + testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING) + testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING) + + testError(t, `01238`, "1:6", "illegal octal number", token.NUMBER) + testError(t, `01238123`, "1:9", "illegal octal number", token.NUMBER) + testError(t, `0x`, "1:3", "illegal hexadecimal number", token.NUMBER) + testError(t, `0xg`, "1:3", "illegal hexadecimal number", token.NUMBER) + testError(t, `'aa'`, "1:1", "illegal char", token.ILLEGAL) + + testError(t, `"`, "1:2", "literal not terminated", token.STRING) + testError(t, `"abc`, "1:5", "literal not terminated", token.STRING) + testError(t, `"abc`+"\n", "1:5", "literal not terminated", token.STRING) + testError(t, `/*/`, "1:4", "comment not terminated", token.COMMENT) +} + +func testError(t *testing.T, src, pos, msg string, tok token.Type) { + s := New([]byte(src)) + + errorCalled := false + s.Error = func(p token.Pos, m string) { + if !errorCalled { + if pos != p.String() { + t.Errorf("pos = %q, want %q for %q", p, pos, src) + } + + if m != msg { + t.Errorf("msg = %q, want %q for %q", m, msg, src) + } + errorCalled = true + } + } + + tk := s.Scan() + if tk.Type != tok { + t.Errorf("tok = %s, want %s for %q", tk, tok, src) + } + if !errorCalled { + t.Errorf("error handler not called for %q", src) + } + if s.ErrorCount == 0 { + t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src) + } +} + +func testTokenList(t *testing.T, tokenList []tokenPair) { + // create artifical source code + buf := new(bytes.Buffer) + for _, ident := range tokenList { + fmt.Fprintf(buf, "%s\n", ident.text) + } + + s := New(buf.Bytes()) + for _, ident := range tokenList { + tok := s.Scan() + if tok.Type != ident.tok { + t.Errorf("tok = %q want %q for %q\n", tok, ident.tok, ident.text) + } + + if tok.Text != ident.text { + t.Errorf("text = %q want %q", tok.String(), ident.text) + } + + } +} + +func countNewlines(s string) int { + n := 0 + for _, ch := range s { + if ch == '\n' { + n++ + } + } + return n +} diff --git a/hcl/token/position.go b/hcl/token/position.go new file mode 100644 index 0000000..59c1bb7 --- /dev/null +++ b/hcl/token/position.go @@ -0,0 +1,46 @@ +package token + +import "fmt" + +// Pos describes an arbitrary source position +// including the file, line, and column location. +// A Position is valid if the line number is > 0. +type Pos struct { + Filename string // filename, if any + Offset int // offset, starting at 0 + Line int // line number, starting at 1 + Column int // column number, starting at 1 (character count) +} + +// IsValid returns true if the position is valid. +func (p *Pos) IsValid() bool { return p.Line > 0 } + +// String returns a string in one of several forms: +// +// file:line:column valid position with file name +// line:column valid position without file name +// file invalid position with file name +// - invalid position without file name +func (p Pos) String() string { + s := p.Filename + if p.IsValid() { + if s != "" { + s += ":" + } + s += fmt.Sprintf("%d:%d", p.Line, p.Column) + } + if s == "" { + s = "-" + } + return s +} + +// Before reports whether the position p is before u. +func (p Pos) Before(u Pos) bool { + return u.Offset > p.Offset || u.Line > p.Line +} + +// After reports whether the position p is after u. +func (p Pos) After(u Pos) bool { + return u.Offset < p.Offset || u.Line < p.Line +} diff --git a/hcl/token/token.go b/hcl/token/token.go new file mode 100644 index 0000000..3b7581a --- /dev/null +++ b/hcl/token/token.go @@ -0,0 +1,105 @@ +// Package token defines constants representing the lexical tokens for HCL +// (HashiCorp Configuration Language) +package token + +import ( + "fmt" + "strconv" +) + +// Token defines a single HCL token which can be obtained via the Scanner +type Token struct { + Type Type + Pos Pos + Text string +} + +// Type is the set of lexical tokens of the HCL (HashiCorp Configuration Language) +type Type int + +const ( + // Special tokens + ILLEGAL Type = iota + EOF + COMMENT + + identifier_beg + IDENT // literals + literal_beg + NUMBER // 12345 + FLOAT // 123.45 + BOOL // true,false + STRING // "abc" + literal_end + identifier_end + + operator_beg + LBRACK // [ + LBRACE // { + COMMA // , + PERIOD // . + + RBRACK // ] + RBRACE // } + + ASSIGN // = + ADD // + + SUB // - + operator_end +) + +var tokens = [...]string{ + ILLEGAL: "ILLEGAL", + + EOF: "EOF", + COMMENT: "COMMENT", + + IDENT: "IDENT", + NUMBER: "NUMBER", + FLOAT: "FLOAT", + BOOL: "BOOL", + STRING: "STRING", + + LBRACK: "LBRACK", + LBRACE: "LBRACE", + COMMA: "COMMA", + PERIOD: "PERIOD", + + RBRACK: "RBRACK", + RBRACE: "RBRACE", + + ASSIGN: "ASSIGN", + ADD: "ADD", + SUB: "SUB", +} + +// String returns the string corresponding to the token tok. +func (t Type) String() string { + s := "" + if 0 <= t && t < Type(len(tokens)) { + s = tokens[t] + } + if s == "" { + s = "token(" + strconv.Itoa(int(t)) + ")" + } + return s +} + +// IsIdentifier returns true for tokens corresponding to identifiers and basic +// type literals; it returns false otherwise. +func (t Type) IsIdentifier() bool { return identifier_beg < t && t < identifier_end } + +// IsLiteral returns true for tokens corresponding to basic type literals; it +// returns false otherwise. +func (t Type) IsLiteral() bool { return literal_beg < t && t < literal_end } + +// IsOperator returns true for tokens corresponding to operators and +// delimiters; it returns false otherwise. +func (t Type) IsOperator() bool { return operator_beg < t && t < operator_end } + +// String returns the token's literal text. Note that this is only +// applicable for certain token types, such as token.IDENT, +// token.STRING, etc.. +func (t Token) String() string { + return fmt.Sprintf("%s %s %s", t.Pos.String(), t.Type.String(), t.Text) +} diff --git a/hcl/token/token_test.go b/hcl/token/token_test.go new file mode 100644 index 0000000..796945c --- /dev/null +++ b/hcl/token/token_test.go @@ -0,0 +1,36 @@ +package token + +import "testing" + +func TestTypeString(t *testing.T) { + var tokens = []struct { + tt Type + str string + }{ + {ILLEGAL, "ILLEGAL"}, + {EOF, "EOF"}, + {COMMENT, "COMMENT"}, + {IDENT, "IDENT"}, + {NUMBER, "NUMBER"}, + {FLOAT, "FLOAT"}, + {BOOL, "BOOL"}, + {STRING, "STRING"}, + {LBRACK, "LBRACK"}, + {LBRACE, "LBRACE"}, + {COMMA, "COMMA"}, + {PERIOD, "PERIOD"}, + {RBRACK, "RBRACK"}, + {RBRACE, "RBRACE"}, + {ASSIGN, "ASSIGN"}, + {ADD, "ADD"}, + {SUB, "SUB"}, + } + + for _, token := range tokens { + if token.tt.String() != token.str { + t.Errorf("want: %q got:%q\n", token.str, token.tt) + + } + } + +} diff --git a/hclnew/LICENSE b/hclnew/LICENSE deleted file mode 100644 index 2dd5110..0000000 --- a/hclnew/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2015, Fatih Arslan -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of hcl nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/hclnew/README.md b/hclnew/README.md deleted file mode 100644 index 860681f..0000000 --- a/hclnew/README.md +++ /dev/null @@ -1,44 +0,0 @@ -# hcl [![GoDoc](http://img.shields.io/badge/go-documentation-blue.svg?style=flat-square)](http://godoc.org/github.com/fatih/hcl) [![Build Status](http://img.shields.io/travis/fatih/hcl.svg?style=flat-square)](https://travis-ci.org/fatih/hcl) - -HCL is a lexer and parser family written in Go for -[HCL](https://github.com/hashicorp/hcl) (Hashicorp Configuration Language). It -has several components, similar to Go's own parser family. It provides a set of -packages to write tools and customize files written in HCL. For example both -[`hclfmt`](https://github.com/fatih/hclfmt) and `hcl2json` (coming soon) is -written based on these tools. - -This package is still under heavy development. The next stable version will be -released with version 0.1. - -## API - -If you are already familiar with Go's own parser family it's really easy to -dive. It basically resembles the same logic. However there are several differences -and the implementation is completely different. Right now it contains the -following packages: - -* `token`: defines constants representing the lexical tokens for a scanned HCL file. -* `scanner`: scanner is a lexical scanner. It scans a given HCL file and - returns a stream of tokens. -* `ast`: declares the types used to represent the syntax tree for parsed HCL files. -* `parser`: parses a given HCL file and creates a AST representation -* `printer`: prints any given AST node and formats - -## Why - -The whole parser family was created because I wanted a `hclfmt` command. This -command would be just like `gofmt`, format an HCL file. I didn't want to use -the package [github/hashicorp/hcl](https://github.com/hashicorp/hcl) in the -first place, because the lexer and parser is generated and it doesn't expose -the flexibility I wanted to have. - -Another reason was that I wanted to learn and experience how to implement a -proper lexer and parser in Go and how a formatter could be implemented from an -AST. It was really fun and I think it was worth it. - -## License - -The BSD 3-Clause License - see -[`LICENSE`](https://github.com/fatih/hcl/blob/master/LICENSE) for more -details - diff --git a/hclnew/ast/ast.go b/hclnew/ast/ast.go index c5a6f47..82a6b83 100644 --- a/hclnew/ast/ast.go +++ b/hclnew/ast/ast.go @@ -2,7 +2,9 @@ // (HashiCorp Configuration Language) package ast -import "github.com/fatih/hcl/token" +import ( + "github.com/hashicorp/hcl/hcl/token" +) // Node is an element in the abstract syntax tree. type Node interface { diff --git a/hclnew/parser/parser.go b/hclnew/parser/parser.go index 7381bd8..24e878d 100644 --- a/hclnew/parser/parser.go +++ b/hclnew/parser/parser.go @@ -6,9 +6,9 @@ import ( "errors" "fmt" - "github.com/fatih/hcl/ast" - "github.com/fatih/hcl/scanner" - "github.com/fatih/hcl/token" + "github.com/hashicorp/hcl/hcl/ast" + "github.com/hashicorp/hcl/hcl/scanner" + "github.com/hashicorp/hcl/hcl/token" ) type Parser struct {