From f3dba35accc55f1a291e2d72d93222df3c4af717 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 12 Oct 2015 02:27:43 +0300 Subject: [PATCH] parser: change how we obtain tokens --- parser/ast.go | 28 ++++++------ parser/parser.go | 99 +++++++++++++++++++++++++++++++---------- parser/parser_test.go | 17 +++++++ scanner/scanner.go | 6 +-- scanner/scanner_test.go | 12 ++--- scanner/token.go | 18 ++------ 6 files changed, 120 insertions(+), 60 deletions(-) create mode 100644 parser/parser_test.go diff --git a/parser/ast.go b/parser/ast.go index 70c7dc6..a66c587 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -33,11 +33,11 @@ type Source struct { nodes []Node } -func (s Source) add(node Node) { +func (s *Source) add(node Node) { s.nodes = append(s.nodes, node) } -func (s Source) String() string { +func (s *Source) String() string { buf := "" for _, n := range s.nodes { buf += n.String() @@ -46,7 +46,7 @@ func (s Source) String() string { return buf } -func (s Source) Pos() scanner.Pos { +func (s *Source) Pos() scanner.Pos { // always returns the uninitiliazed position return scanner.Pos{} } @@ -56,12 +56,12 @@ type IdentStatement struct { token scanner.Token } -func (i IdentStatement) String() string { +func (i *IdentStatement) String() string { return i.token.String() } -func (i IdentStatement) Pos() scanner.Pos { - return i.token.Pos() +func (i *IdentStatement) Pos() scanner.Pos { + return i.token.Pos } type BlockStatement struct { @@ -70,7 +70,7 @@ type BlockStatement struct { list []Node // the nodes in lexical order } -func (b BlockStatement) String() string { +func (b *BlockStatement) String() string { s := "{\n" for _, n := range b.list { s += n.String() + "\n" @@ -80,7 +80,7 @@ func (b BlockStatement) String() string { return s } -func (b BlockStatement) Pos() scanner.Pos { +func (b *BlockStatement) Pos() scanner.Pos { return b.lbrace } @@ -91,11 +91,11 @@ type AssignStatement struct { assign scanner.Pos // position of "=" } -func (a AssignStatement) String() string { +func (a *AssignStatement) String() string { return a.lhs.String() + " = " + a.rhs.String() } -func (a AssignStatement) Pos() scanner.Pos { +func (a *AssignStatement) Pos() scanner.Pos { return a.lhs.Pos() } @@ -106,7 +106,7 @@ type ListStatement struct { list []Node // the elements in lexical order } -func (l ListStatement) String() string { +func (l *ListStatement) String() string { s := "[\n" for _, n := range l.list { s += n.String() + ",\n" @@ -116,7 +116,7 @@ func (l ListStatement) String() string { return s } -func (l ListStatement) Pos() scanner.Pos { +func (l *ListStatement) Pos() scanner.Pos { return l.lbrack } @@ -126,7 +126,7 @@ type ObjectStatement struct { BlockStatement } -func (o ObjectStatement) String() string { +func (o *ObjectStatement) String() string { s := "" for i, n := range o.Idents { @@ -140,6 +140,6 @@ func (o ObjectStatement) String() string { return s } -func (o ObjectStatement) Pos() scanner.Pos { +func (o *ObjectStatement) Pos() scanner.Pos { return o.Idents[0].Pos() } diff --git a/parser/parser.go b/parser/parser.go index 695cee6..ee02e8a 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1,6 +1,10 @@ package parser -import "github.com/fatih/hcl/scanner" +import ( + "fmt" + + "github.com/fatih/hcl/scanner" +) type Parser struct { sc *scanner.Scanner @@ -8,10 +12,12 @@ type Parser struct { tok scanner.Token // last read token prevTok scanner.Token // previous read token - n int // buffer size (max = 1) + enableTrace bool + indent int + n int // buffer size (max = 1) } -func NewParser(src []byte) *Parser { +func New(src []byte) *Parser { return &Parser{ sc: scanner.New(src), } @@ -19,28 +25,37 @@ func NewParser(src []byte) *Parser { // Parse returns the fully parsed source and returns the abstract syntax tree. func (p *Parser) Parse() Node { - tok := p.scan() + defer un(trace(p, "ParseSource")) + node := &Source{} - node := Source{} + for { + // break if we hit the end + if p.tok.Type == scanner.EOF { + break + } - switch tok.Type() { - case scanner.IDENT: - n := p.parseStatement() - node.add(n) - case scanner.EOF: + if n := p.parseStatement(); n != nil { + node.add(n) + } } return node } func (p *Parser) parseStatement() Node { + defer un(trace(p, "ParseStatement")) + tok := p.scan() - if tok.Type().IsLiteral() { + if tok.Type.IsLiteral() { + // found an object + if p.prevTok.Type.IsLiteral() { + return p.parseObject() + } return p.parseIdent() } - switch tok.Type() { + switch tok.Type { case scanner.LBRACE: return p.parseObject() case scanner.LBRACK: @@ -48,11 +63,25 @@ func (p *Parser) parseStatement() Node { case scanner.ASSIGN: return p.parseAssignment() } + return nil } +func (p *Parser) parseAssignment() Node { + defer un(trace(p, "ParseAssignment")) + return &AssignStatement{ + lhs: &IdentStatement{ + token: p.prevTok, + }, + assign: p.tok.Pos, + rhs: p.parseStatement(), + } +} + func (p *Parser) parseIdent() Node { - return IdentStatement{ + defer un(trace(p, "ParseIdent")) + + return &IdentStatement{ token: p.tok, } } @@ -65,16 +94,6 @@ func (p *Parser) parseList() Node { return nil } -func (p *Parser) parseAssignment() Node { - return AssignStatement{ - lhs: IdentStatement{ - token: p.prevTok, - }, - assign: p.tok.Pos(), - rhs: p.parseStatement(), - } -} - // scan returns the next token from the underlying scanner. // If a token has been unscanned then read that instead. func (p *Parser) scan() scanner.Token { @@ -95,3 +114,37 @@ func (p *Parser) scan() scanner.Token { // unscan pushes the previously read token back onto the buffer. func (p *Parser) unscan() { p.n = 1 } + +// ---------------------------------------------------------------------------- +// Parsing support + +func (p *Parser) printTrace(a ...interface{}) { + if !p.enableTrace { + return + } + + const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " + const n = len(dots) + fmt.Printf("%5d:%3d: ", p.tok.Pos.Line, p.tok.Pos.Column) + + i := 2 * p.indent + for i > n { + fmt.Print(dots) + i -= n + } + // i <= n + fmt.Print(dots[0:i]) + fmt.Println(a...) +} + +func trace(p *Parser, msg string) *Parser { + p.printTrace(msg, "(") + p.indent++ + return p +} + +// Usage pattern: defer un(trace(p, "...")) +func un(p *Parser) { + p.indent-- + p.printTrace(")") +} diff --git a/parser/parser_test.go b/parser/parser_test.go new file mode 100644 index 0000000..e8f07eb --- /dev/null +++ b/parser/parser_test.go @@ -0,0 +1,17 @@ +package parser + +import ( + "fmt" + "testing" +) + +func TestAssignStatment(t *testing.T) { + src := `ami = "${var.foo}"` + + p := New([]byte(src)) + p.enableTrace = true + n := p.Parse() + + fmt.Println(n) + +} diff --git a/scanner/scanner.go b/scanner/scanner.go index e127240..42234a2 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -204,9 +204,9 @@ func (s *Scanner) Scan() Token { s.tokStart = s.tokEnd // ensure idempotency of tokenText() call return Token{ - token: tok, - pos: s.tokPos, - text: tokenText, + Type: tok, + Pos: s.tokPos, + Text: tokenText, } } diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 5918ef2..0556766 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -270,8 +270,8 @@ func TestRealExample(t *testing.T) { }` literals := []struct { - token TokenType - literal string + tokenType TokenType + literal string }{ {COMMENT, `// This comes from Terraform, as a test`}, {IDENT, `variable`}, @@ -332,8 +332,8 @@ func TestRealExample(t *testing.T) { s := New([]byte(complexHCL)) for _, l := range literals { tok := s.Scan() - if l.token != tok.Type() { - t.Errorf("got: %s want %s for %s\n", tok, l.token, tok.String()) + if l.tokenType != tok.Type { + t.Errorf("got: %s want %s for %s\n", tok, l.tokenType, tok.String()) } if l.literal != tok.String() { @@ -383,7 +383,7 @@ func testError(t *testing.T, src, pos, msg string, tok TokenType) { } tk := s.Scan() - if tk.Type() != tok { + if tk.Type != tok { t.Errorf("tok = %s, want %s for %q", tk, tok, src) } if !errorCalled { @@ -404,7 +404,7 @@ func testTokenList(t *testing.T, tokenList []tokenPair) { s := New(buf.Bytes()) for _, ident := range tokenList { tok := s.Scan() - if tok.Type() != ident.tok { + if tok.Type != ident.tok { t.Errorf("tok = %q want %q for %q\n", tok, ident.tok, ident.text) } diff --git a/scanner/token.go b/scanner/token.go index 1891130..30b215d 100644 --- a/scanner/token.go +++ b/scanner/token.go @@ -4,9 +4,9 @@ import "strconv" // Token defines a single HCL token which can be obtained via the Scanner type Token struct { - token TokenType - pos Pos - text string + Type TokenType + Pos Pos + Text string } // TokenType is the set of lexical tokens of the HCL (HashiCorp Configuration Language) @@ -86,19 +86,9 @@ func (t TokenType) IsLiteral() bool { return literal_beg < t && t < literal_end // delimiters; it returns false otherwise. func (t TokenType) IsOperator() bool { return operator_beg < t && t < operator_end } -// Type returns the token's type -func (t Token) Type() TokenType { - return t.token -} - -// Pos returns the token's position -func (t Token) Pos() Pos { - return t.pos -} - // String returns the token's literal text. Note that this is only // applicable for certain token types, such as token.IDENT, // token.STRING, etc.. func (t Token) String() string { - return t.text + return t.Text }