From 3832ed0981898bd796891731bdf3f0a17f12b49c Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Thu, 15 Oct 2015 01:27:35 +0300 Subject: [PATCH] parser: improve node parsing, remove string() and many other small fixes --- parser/ast.go | 58 ++++------------------------------------- parser/parser.go | 60 +++++++++++++++++++++---------------------- parser/parser_test.go | 5 ---- scanner/token.go | 10 ++++++-- 4 files changed, 43 insertions(+), 90 deletions(-) diff --git a/parser/ast.go b/parser/ast.go index eec25cc..8170115 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -5,7 +5,6 @@ import "github.com/fatih/hcl/scanner" // Node is an element in the abstract syntax tree. type Node interface { node() - String() string Pos() scanner.Pos } @@ -28,15 +27,6 @@ func (s *Source) add(node Node) { s.nodes = append(s.nodes, node) } -func (s *Source) String() string { - buf := "" - for _, n := range s.nodes { - buf += n.String() - } - - return buf -} - func (s *Source) Pos() scanner.Pos { // always returns the uninitiliazed position return s.nodes[0].Pos() @@ -47,10 +37,6 @@ type Ident struct { token scanner.Token } -func (i *Ident) String() string { - return i.token.Text -} - func (i *Ident) Pos() scanner.Pos { return i.token.Pos } @@ -62,10 +48,6 @@ type AssignStatement struct { assign scanner.Pos // position of "=" } -func (a *AssignStatement) String() string { - return a.lhs.String() + " = " + a.rhs.String() -} - func (a *AssignStatement) Pos() scanner.Pos { return a.lhs.Pos() } @@ -76,20 +58,6 @@ type ObjectStatement struct { ObjectType } -func (o *ObjectStatement) String() string { - s := "" - - for i, n := range o.Idents { - s += n.String() - if i != len(o.Idents) { - s += " " - } - } - - s += o.ObjectType.String() - return s -} - func (o *ObjectStatement) Pos() scanner.Pos { return o.Idents[0].Pos() } @@ -97,7 +65,7 @@ func (o *ObjectStatement) Pos() scanner.Pos { // LiteralType represents a literal of basic type. Valid types are: // scanner.NUMBER, scanner.FLOAT, scanner.BOOL and scanner.STRING type LiteralType struct { - *Ident + token scanner.Token } // isValid() returns true if the underlying identifier satisfies one of the @@ -111,6 +79,10 @@ func (l *LiteralType) isValid() bool { } } +func (l *LiteralType) Pos() scanner.Pos { + return l.token.Pos +} + // ListStatement represents a HCL List type type ListType struct { lbrack scanner.Pos // position of "[" @@ -118,16 +90,6 @@ type ListType struct { list []Node // the elements in lexical order } -func (l *ListType) String() string { - s := "[\n" - for _, n := range l.list { - s += n.String() + ",\n" - } - - s += "]" - return s -} - func (l *ListType) Pos() scanner.Pos { return l.lbrack } @@ -139,16 +101,6 @@ type ObjectType struct { list []Node // the nodes in lexical order } -func (b *ObjectType) String() string { - s := "{\n" - for _, n := range b.list { - s += n.String() + "\n" - } - - s += "}" - return s -} - func (b *ObjectType) Pos() scanner.Pos { return b.lbrace } diff --git a/parser/parser.go b/parser/parser.go index 5eb03da..e7fe9c1 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -24,6 +24,8 @@ func New(src []byte) *Parser { } } +var errEofToken = errors.New("EOF token found") + // Parse returns the fully parsed source and returns the abstract syntax tree. func (p *Parser) Parse() (Node, error) { defer un(trace(p, "ParseSource")) @@ -31,16 +33,15 @@ func (p *Parser) Parse() (Node, error) { for { n, err := p.parseNode() + if err == errEofToken { + break // we are finished + } if err != nil { return nil, err } + // we successfully parsed a node, add it to the final source node node.add(n) - - // break if we hit the end - if p.tok.Type == scanner.EOF { - break - } } return node, nil @@ -50,24 +51,33 @@ func (p *Parser) parseNode() (Node, error) { defer un(trace(p, "ParseNode")) tok := p.scan() - fmt.Println(tok) // debug - if tok.Type.IsLiteral() { - if p.prevTok.Type.IsLiteral() { + switch tok.Type { + case scanner.ASSIGN: + return p.parseAssignment() + case scanner.LBRACK: + // return p.parseListType() + case scanner.LBRACE: + // return p.parseObjectTpe() + case scanner.COMMENT: + // implement comment + case scanner.EOF: + return nil, errEofToken + } + + if tok.Type.IsIdentifier() { + if p.prevTok.Type.IsIdentifier() { return p.parseObjectStatement() } - tok := p.scan() - if tok.Type == scanner.ASSIGN { - return p.parseAssignment() + if tok.Type.IsLiteral() { + return p.parseLiteralType() } - - p.unscan() return p.parseIdent() } - return nil, errors.New("not yet implemented") + return nil, fmt.Errorf("not yet implemented: %s", tok.Type) } // parseAssignment parses an assignment and returns a AssignStatement AST @@ -93,10 +103,6 @@ func (p *Parser) parseAssignment() (*AssignStatement, error) { func (p *Parser) parseIdent() (*Ident, error) { defer un(trace(p, "ParseIdent")) - if !p.tok.Type.IsLiteral() { - return nil, errors.New("can't parse non literal token") - } - return &Ident{ token: p.tok, }, nil @@ -104,24 +110,18 @@ func (p *Parser) parseIdent() (*Ident, error) { // parseLiteralType parses a literal type and returns a LiteralType AST func (p *Parser) parseLiteralType() (*LiteralType, error) { - i, err := p.parseIdent() - if err != nil { - return nil, err - } + defer un(trace(p, "ParseLiteral")) - l := &LiteralType{} - l.Ident = i - - if !l.isValid() { - return nil, fmt.Errorf("Identifier is not a LiteralType: %s", l.token) - } - - return l, nil + return &LiteralType{ + token: p.tok, + }, nil } // parseObjectStatement parses an object statement returns an ObjectStatement // AST. ObjectsStatements represents both normal and nested objects statement func (p *Parser) parseObjectStatement() (*ObjectStatement, error) { + defer un(trace(p, "ParseObjectStatement")) + return nil, errors.New("ObjectStatement is not implemented yet") } diff --git a/parser/parser_test.go b/parser/parser_test.go index 054473a..63004f5 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -11,12 +11,7 @@ func TestAssignStatement(t *testing.T) { t.Fatal(err) } - if n.String() != src { - t.Errorf("AssignStatement is not parsed correctly\n\twant: '%s'\n\tgot : '%s'", src, n.String()) - } - if n.Pos().Line != 1 { t.Errorf("AssignStatement position is wrong\n\twant: '%d'\n\tgot : '%d'", 1, n.Pos().Line) } - } diff --git a/scanner/token.go b/scanner/token.go index 6c62fa8..deb6f9a 100644 --- a/scanner/token.go +++ b/scanner/token.go @@ -21,13 +21,15 @@ const ( EOF COMMENT + identifier_beg + IDENT // literals literal_beg - IDENT // literals NUMBER // 12345 FLOAT // 123.45 BOOL // true,false STRING // "abc" literal_end + identifier_end operator_beg LBRACK // [ @@ -81,8 +83,12 @@ func (t TokenType) String() string { return s } -// IsLiteral returns true for tokens corresponding to identifiers and basic +// IsIdentifier returns true for tokens corresponding to identifiers and basic // type literals; it returns false otherwise. +func (t TokenType) IsIdentifier() bool { return identifier_beg < t && t < identifier_end } + +// IsLiteral returns true for tokens corresponding to basic type literals; it +// returns false otherwise. func (t TokenType) IsLiteral() bool { return literal_beg < t && t < literal_end } // IsOperator returns true for tokens corresponding to operators and