parser: change how we obtain tokens

2015-10-12 02:27:43 +03:00 · 2015-10-12 02:27:43 +03:00 · f3dba35acc
commit f3dba35acc
parent 99099cda62
6 changed files with 120 additions and 60 deletions
--- a/parser/ast.go
+++ b/parser/ast.go
@ -33,11 +33,11 @@ type Source struct {
 	nodes []Node
 }

-func (s Source) add(node Node) {
+func (s *Source) add(node Node) {
 	s.nodes = append(s.nodes, node)
 }

-func (s Source) String() string {
+func (s *Source) String() string {
 	buf := ""
 	for _, n := range s.nodes {
 		buf += n.String()
@ -46,7 +46,7 @@ func (s Source) String() string {
 	return buf
 }

-func (s Source) Pos() scanner.Pos {
+func (s *Source) Pos() scanner.Pos {
 	// always returns the uninitiliazed position
 	return scanner.Pos{}
 }
@ -56,12 +56,12 @@ type IdentStatement struct {
 	token scanner.Token
 }

-func (i IdentStatement) String() string {
+func (i *IdentStatement) String() string {
 	return i.token.String()
 }

-func (i IdentStatement) Pos() scanner.Pos {
-	return i.token.Pos()
+func (i *IdentStatement) Pos() scanner.Pos {
+	return i.token.Pos
 }

 type BlockStatement struct {
@ -70,7 +70,7 @@ type BlockStatement struct {
 	list   []Node      // the nodes in lexical order
 }

-func (b BlockStatement) String() string {
+func (b *BlockStatement) String() string {
 	s := "{\n"
 	for _, n := range b.list {
 		s += n.String() + "\n"
@ -80,7 +80,7 @@ func (b BlockStatement) String() string {
 	return s
 }

-func (b BlockStatement) Pos() scanner.Pos {
+func (b *BlockStatement) Pos() scanner.Pos {
 	return b.lbrace
 }

@ -91,11 +91,11 @@ type AssignStatement struct {
 	assign scanner.Pos // position of "="
 }

-func (a AssignStatement) String() string {
+func (a *AssignStatement) String() string {
 	return a.lhs.String() + " = " + a.rhs.String()
 }

-func (a AssignStatement) Pos() scanner.Pos {
+func (a *AssignStatement) Pos() scanner.Pos {
 	return a.lhs.Pos()
 }

@ -106,7 +106,7 @@ type ListStatement struct {
 	list   []Node      // the elements in lexical order
 }

-func (l ListStatement) String() string {
+func (l *ListStatement) String() string {
 	s := "[\n"
 	for _, n := range l.list {
 		s += n.String() + ",\n"
@ -116,7 +116,7 @@ func (l ListStatement) String() string {
 	return s
 }

-func (l ListStatement) Pos() scanner.Pos {
+func (l *ListStatement) Pos() scanner.Pos {
 	return l.lbrack
 }

@ -126,7 +126,7 @@ type ObjectStatement struct {
 	BlockStatement
 }

-func (o ObjectStatement) String() string {
+func (o *ObjectStatement) String() string {
 	s := ""

 	for i, n := range o.Idents {
@ -140,6 +140,6 @@ func (o ObjectStatement) String() string {
 	return s
 }

-func (o ObjectStatement) Pos() scanner.Pos {
+func (o *ObjectStatement) Pos() scanner.Pos {
 	return o.Idents[0].Pos()
 }
--- a/parser/parser.go
+++ b/parser/parser.go
@ -1,6 +1,10 @@
 package parser

-import "github.com/fatih/hcl/scanner"
+import (
+	"fmt"
+
+	"github.com/fatih/hcl/scanner"
+)

 type Parser struct {
 	sc *scanner.Scanner
@ -8,10 +12,12 @@ type Parser struct {
 	tok     scanner.Token // last read token
 	prevTok scanner.Token // previous read token

+	enableTrace bool
+	indent      int
 	n           int // buffer size (max = 1)
 }

-func NewParser(src []byte) *Parser {
+func New(src []byte) *Parser {
 	return &Parser{
 		sc: scanner.New(src),
 	}
@ -19,28 +25,37 @@ func NewParser(src []byte) *Parser {

 // Parse returns the fully parsed source and returns the abstract syntax tree.
 func (p *Parser) Parse() Node {
-	tok := p.scan()
+	defer un(trace(p, "ParseSource"))
+	node := &Source{}

-	node := Source{}
+	for {
+		// break if we hit the end
+		if p.tok.Type == scanner.EOF {
+			break
+		}

-	switch tok.Type() {
-	case scanner.IDENT:
-		n := p.parseStatement()
+		if n := p.parseStatement(); n != nil {
 			node.add(n)
-	case scanner.EOF:
+		}
 	}

 	return node
 }

 func (p *Parser) parseStatement() Node {
+	defer un(trace(p, "ParseStatement"))
+
 	tok := p.scan()

-	if tok.Type().IsLiteral() {
+	if tok.Type.IsLiteral() {
+		// found an object
+		if p.prevTok.Type.IsLiteral() {
+			return p.parseObject()
+		}
 		return p.parseIdent()
 	}

-	switch tok.Type() {
+	switch tok.Type {
 	case scanner.LBRACE:
 		return p.parseObject()
 	case scanner.LBRACK:
@ -48,11 +63,25 @@ func (p *Parser) parseStatement() Node {
 	case scanner.ASSIGN:
 		return p.parseAssignment()
 	}
+
 	return nil
 }

+func (p *Parser) parseAssignment() Node {
+	defer un(trace(p, "ParseAssignment"))
+	return &AssignStatement{
+		lhs: &IdentStatement{
+			token: p.prevTok,
+		},
+		assign: p.tok.Pos,
+		rhs:    p.parseStatement(),
+	}
+}
+
 func (p *Parser) parseIdent() Node {
-	return IdentStatement{
+	defer un(trace(p, "ParseIdent"))
+
+	return &IdentStatement{
 		token: p.tok,
 	}
 }
@ -65,16 +94,6 @@ func (p *Parser) parseList() Node {
 	return nil
 }

-func (p *Parser) parseAssignment() Node {
-	return AssignStatement{
-		lhs: IdentStatement{
-			token: p.prevTok,
-		},
-		assign: p.tok.Pos(),
-		rhs:    p.parseStatement(),
-	}
-}
-
 // scan returns the next token from the underlying scanner.
 // If a token has been unscanned then read that instead.
 func (p *Parser) scan() scanner.Token {
@ -95,3 +114,37 @@ func (p *Parser) scan() scanner.Token {

 // unscan pushes the previously read token back onto the buffer.
 func (p *Parser) unscan() { p.n = 1 }
+
+// ----------------------------------------------------------------------------
+// Parsing support
+
+func (p *Parser) printTrace(a ...interface{}) {
+	if !p.enableTrace {
+		return
+	}
+
+	const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . "
+	const n = len(dots)
+	fmt.Printf("%5d:%3d: ", p.tok.Pos.Line, p.tok.Pos.Column)
+
+	i := 2 * p.indent
+	for i > n {
+		fmt.Print(dots)
+		i -= n
+	}
+	// i <= n
+	fmt.Print(dots[0:i])
+	fmt.Println(a...)
+}
+
+func trace(p *Parser, msg string) *Parser {
+	p.printTrace(msg, "(")
+	p.indent++
+	return p
+}
+
+// Usage pattern: defer un(trace(p, "..."))
+func un(p *Parser) {
+	p.indent--
+	p.printTrace(")")
+}
--- a/parser/parser_test.go
+++ b/parser/parser_test.go
@ -0,0 +1,17 @@
+package parser
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestAssignStatment(t *testing.T) {
+	src := `ami = "${var.foo}"`
+
+	p := New([]byte(src))
+	p.enableTrace = true
+	n := p.Parse()
+
+	fmt.Println(n)
+
+}
--- a/scanner/scanner.go
+++ b/scanner/scanner.go
@ -204,9 +204,9 @@ func (s *Scanner) Scan() Token {
 	s.tokStart = s.tokEnd // ensure idempotency of tokenText() call

 	return Token{
-		token: tok,
-		pos:   s.tokPos,
-		text:  tokenText,
+		Type: tok,
+		Pos:  s.tokPos,
+		Text: tokenText,
 	}
 }

--- a/scanner/scanner_test.go
+++ b/scanner/scanner_test.go
@ -270,7 +270,7 @@ func TestRealExample(t *testing.T) {
 	}`

 	literals := []struct {
-		token   TokenType
+		tokenType TokenType
 		literal   string
 	}{
 		{COMMENT, `// This comes from Terraform, as a test`},
@ -332,8 +332,8 @@ func TestRealExample(t *testing.T) {
 	s := New([]byte(complexHCL))
 	for _, l := range literals {
 		tok := s.Scan()
-		if l.token != tok.Type() {
-			t.Errorf("got: %s want %s for %s\n", tok, l.token, tok.String())
+		if l.tokenType != tok.Type {
+			t.Errorf("got: %s want %s for %s\n", tok, l.tokenType, tok.String())
 		}

 		if l.literal != tok.String() {
@ -383,7 +383,7 @@ func testError(t *testing.T, src, pos, msg string, tok TokenType) {
 	}

 	tk := s.Scan()
-	if tk.Type() != tok {
+	if tk.Type != tok {
 		t.Errorf("tok = %s, want %s for %q", tk, tok, src)
 	}
 	if !errorCalled {
@ -404,7 +404,7 @@ func testTokenList(t *testing.T, tokenList []tokenPair) {
 	s := New(buf.Bytes())
 	for _, ident := range tokenList {
 		tok := s.Scan()
-		if tok.Type() != ident.tok {
+		if tok.Type != ident.tok {
 			t.Errorf("tok = %q want %q for %q\n", tok, ident.tok, ident.text)
 		}

--- a/scanner/token.go
+++ b/scanner/token.go
@ -4,9 +4,9 @@ import "strconv"

 // Token defines a single HCL token which can be obtained via the Scanner
 type Token struct {
-	token TokenType
-	pos   Pos
-	text  string
+	Type TokenType
+	Pos  Pos
+	Text string
 }

 // TokenType is the set of lexical tokens of the HCL (HashiCorp Configuration Language)
@ -86,19 +86,9 @@ func (t TokenType) IsLiteral() bool { return literal_beg < t && t < literal_end
 // delimiters; it returns false otherwise.
 func (t TokenType) IsOperator() bool { return operator_beg < t && t < operator_end }

-// Type returns the token's type
-func (t Token) Type() TokenType {
-	return t.token
-}
-
-// Pos returns the token's position
-func (t Token) Pos() Pos {
-	return t.pos
-}
-
 // String returns the token's literal text. Note that this is only
 // applicable for certain token types, such as token.IDENT,
 // token.STRING, etc..
 func (t Token) String() string {
-	return t.text
+	return t.Text
 }