parser: change how we obtain tokens

This commit is contained in:
Fatih Arslan 2015-10-12 02:27:43 +03:00
parent 99099cda62
commit f3dba35acc
6 changed files with 120 additions and 60 deletions

View File

@ -33,11 +33,11 @@ type Source struct {
nodes []Node nodes []Node
} }
func (s Source) add(node Node) { func (s *Source) add(node Node) {
s.nodes = append(s.nodes, node) s.nodes = append(s.nodes, node)
} }
func (s Source) String() string { func (s *Source) String() string {
buf := "" buf := ""
for _, n := range s.nodes { for _, n := range s.nodes {
buf += n.String() buf += n.String()
@ -46,7 +46,7 @@ func (s Source) String() string {
return buf return buf
} }
func (s Source) Pos() scanner.Pos { func (s *Source) Pos() scanner.Pos {
// always returns the uninitiliazed position // always returns the uninitiliazed position
return scanner.Pos{} return scanner.Pos{}
} }
@ -56,12 +56,12 @@ type IdentStatement struct {
token scanner.Token token scanner.Token
} }
func (i IdentStatement) String() string { func (i *IdentStatement) String() string {
return i.token.String() return i.token.String()
} }
func (i IdentStatement) Pos() scanner.Pos { func (i *IdentStatement) Pos() scanner.Pos {
return i.token.Pos() return i.token.Pos
} }
type BlockStatement struct { type BlockStatement struct {
@ -70,7 +70,7 @@ type BlockStatement struct {
list []Node // the nodes in lexical order list []Node // the nodes in lexical order
} }
func (b BlockStatement) String() string { func (b *BlockStatement) String() string {
s := "{\n" s := "{\n"
for _, n := range b.list { for _, n := range b.list {
s += n.String() + "\n" s += n.String() + "\n"
@ -80,7 +80,7 @@ func (b BlockStatement) String() string {
return s return s
} }
func (b BlockStatement) Pos() scanner.Pos { func (b *BlockStatement) Pos() scanner.Pos {
return b.lbrace return b.lbrace
} }
@ -91,11 +91,11 @@ type AssignStatement struct {
assign scanner.Pos // position of "=" assign scanner.Pos // position of "="
} }
func (a AssignStatement) String() string { func (a *AssignStatement) String() string {
return a.lhs.String() + " = " + a.rhs.String() return a.lhs.String() + " = " + a.rhs.String()
} }
func (a AssignStatement) Pos() scanner.Pos { func (a *AssignStatement) Pos() scanner.Pos {
return a.lhs.Pos() return a.lhs.Pos()
} }
@ -106,7 +106,7 @@ type ListStatement struct {
list []Node // the elements in lexical order list []Node // the elements in lexical order
} }
func (l ListStatement) String() string { func (l *ListStatement) String() string {
s := "[\n" s := "[\n"
for _, n := range l.list { for _, n := range l.list {
s += n.String() + ",\n" s += n.String() + ",\n"
@ -116,7 +116,7 @@ func (l ListStatement) String() string {
return s return s
} }
func (l ListStatement) Pos() scanner.Pos { func (l *ListStatement) Pos() scanner.Pos {
return l.lbrack return l.lbrack
} }
@ -126,7 +126,7 @@ type ObjectStatement struct {
BlockStatement BlockStatement
} }
func (o ObjectStatement) String() string { func (o *ObjectStatement) String() string {
s := "" s := ""
for i, n := range o.Idents { for i, n := range o.Idents {
@ -140,6 +140,6 @@ func (o ObjectStatement) String() string {
return s return s
} }
func (o ObjectStatement) Pos() scanner.Pos { func (o *ObjectStatement) Pos() scanner.Pos {
return o.Idents[0].Pos() return o.Idents[0].Pos()
} }

View File

@ -1,6 +1,10 @@
package parser package parser
import "github.com/fatih/hcl/scanner" import (
"fmt"
"github.com/fatih/hcl/scanner"
)
type Parser struct { type Parser struct {
sc *scanner.Scanner sc *scanner.Scanner
@ -8,10 +12,12 @@ type Parser struct {
tok scanner.Token // last read token tok scanner.Token // last read token
prevTok scanner.Token // previous read token prevTok scanner.Token // previous read token
n int // buffer size (max = 1) enableTrace bool
indent int
n int // buffer size (max = 1)
} }
func NewParser(src []byte) *Parser { func New(src []byte) *Parser {
return &Parser{ return &Parser{
sc: scanner.New(src), sc: scanner.New(src),
} }
@ -19,28 +25,37 @@ func NewParser(src []byte) *Parser {
// Parse returns the fully parsed source and returns the abstract syntax tree. // Parse returns the fully parsed source and returns the abstract syntax tree.
func (p *Parser) Parse() Node { func (p *Parser) Parse() Node {
tok := p.scan() defer un(trace(p, "ParseSource"))
node := &Source{}
node := Source{} for {
// break if we hit the end
if p.tok.Type == scanner.EOF {
break
}
switch tok.Type() { if n := p.parseStatement(); n != nil {
case scanner.IDENT: node.add(n)
n := p.parseStatement() }
node.add(n)
case scanner.EOF:
} }
return node return node
} }
func (p *Parser) parseStatement() Node { func (p *Parser) parseStatement() Node {
defer un(trace(p, "ParseStatement"))
tok := p.scan() tok := p.scan()
if tok.Type().IsLiteral() { if tok.Type.IsLiteral() {
// found an object
if p.prevTok.Type.IsLiteral() {
return p.parseObject()
}
return p.parseIdent() return p.parseIdent()
} }
switch tok.Type() { switch tok.Type {
case scanner.LBRACE: case scanner.LBRACE:
return p.parseObject() return p.parseObject()
case scanner.LBRACK: case scanner.LBRACK:
@ -48,11 +63,25 @@ func (p *Parser) parseStatement() Node {
case scanner.ASSIGN: case scanner.ASSIGN:
return p.parseAssignment() return p.parseAssignment()
} }
return nil return nil
} }
func (p *Parser) parseAssignment() Node {
defer un(trace(p, "ParseAssignment"))
return &AssignStatement{
lhs: &IdentStatement{
token: p.prevTok,
},
assign: p.tok.Pos,
rhs: p.parseStatement(),
}
}
func (p *Parser) parseIdent() Node { func (p *Parser) parseIdent() Node {
return IdentStatement{ defer un(trace(p, "ParseIdent"))
return &IdentStatement{
token: p.tok, token: p.tok,
} }
} }
@ -65,16 +94,6 @@ func (p *Parser) parseList() Node {
return nil return nil
} }
func (p *Parser) parseAssignment() Node {
return AssignStatement{
lhs: IdentStatement{
token: p.prevTok,
},
assign: p.tok.Pos(),
rhs: p.parseStatement(),
}
}
// scan returns the next token from the underlying scanner. // scan returns the next token from the underlying scanner.
// If a token has been unscanned then read that instead. // If a token has been unscanned then read that instead.
func (p *Parser) scan() scanner.Token { func (p *Parser) scan() scanner.Token {
@ -95,3 +114,37 @@ func (p *Parser) scan() scanner.Token {
// unscan pushes the previously read token back onto the buffer. // unscan pushes the previously read token back onto the buffer.
func (p *Parser) unscan() { p.n = 1 } func (p *Parser) unscan() { p.n = 1 }
// ----------------------------------------------------------------------------
// Parsing support
func (p *Parser) printTrace(a ...interface{}) {
if !p.enableTrace {
return
}
const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . "
const n = len(dots)
fmt.Printf("%5d:%3d: ", p.tok.Pos.Line, p.tok.Pos.Column)
i := 2 * p.indent
for i > n {
fmt.Print(dots)
i -= n
}
// i <= n
fmt.Print(dots[0:i])
fmt.Println(a...)
}
func trace(p *Parser, msg string) *Parser {
p.printTrace(msg, "(")
p.indent++
return p
}
// Usage pattern: defer un(trace(p, "..."))
func un(p *Parser) {
p.indent--
p.printTrace(")")
}

17
parser/parser_test.go Normal file
View File

@ -0,0 +1,17 @@
package parser
import (
"fmt"
"testing"
)
func TestAssignStatment(t *testing.T) {
src := `ami = "${var.foo}"`
p := New([]byte(src))
p.enableTrace = true
n := p.Parse()
fmt.Println(n)
}

View File

@ -204,9 +204,9 @@ func (s *Scanner) Scan() Token {
s.tokStart = s.tokEnd // ensure idempotency of tokenText() call s.tokStart = s.tokEnd // ensure idempotency of tokenText() call
return Token{ return Token{
token: tok, Type: tok,
pos: s.tokPos, Pos: s.tokPos,
text: tokenText, Text: tokenText,
} }
} }

View File

@ -270,8 +270,8 @@ func TestRealExample(t *testing.T) {
}` }`
literals := []struct { literals := []struct {
token TokenType tokenType TokenType
literal string literal string
}{ }{
{COMMENT, `// This comes from Terraform, as a test`}, {COMMENT, `// This comes from Terraform, as a test`},
{IDENT, `variable`}, {IDENT, `variable`},
@ -332,8 +332,8 @@ func TestRealExample(t *testing.T) {
s := New([]byte(complexHCL)) s := New([]byte(complexHCL))
for _, l := range literals { for _, l := range literals {
tok := s.Scan() tok := s.Scan()
if l.token != tok.Type() { if l.tokenType != tok.Type {
t.Errorf("got: %s want %s for %s\n", tok, l.token, tok.String()) t.Errorf("got: %s want %s for %s\n", tok, l.tokenType, tok.String())
} }
if l.literal != tok.String() { if l.literal != tok.String() {
@ -383,7 +383,7 @@ func testError(t *testing.T, src, pos, msg string, tok TokenType) {
} }
tk := s.Scan() tk := s.Scan()
if tk.Type() != tok { if tk.Type != tok {
t.Errorf("tok = %s, want %s for %q", tk, tok, src) t.Errorf("tok = %s, want %s for %q", tk, tok, src)
} }
if !errorCalled { if !errorCalled {
@ -404,7 +404,7 @@ func testTokenList(t *testing.T, tokenList []tokenPair) {
s := New(buf.Bytes()) s := New(buf.Bytes())
for _, ident := range tokenList { for _, ident := range tokenList {
tok := s.Scan() tok := s.Scan()
if tok.Type() != ident.tok { if tok.Type != ident.tok {
t.Errorf("tok = %q want %q for %q\n", tok, ident.tok, ident.text) t.Errorf("tok = %q want %q for %q\n", tok, ident.tok, ident.text)
} }

View File

@ -4,9 +4,9 @@ import "strconv"
// Token defines a single HCL token which can be obtained via the Scanner // Token defines a single HCL token which can be obtained via the Scanner
type Token struct { type Token struct {
token TokenType Type TokenType
pos Pos Pos Pos
text string Text string
} }
// TokenType is the set of lexical tokens of the HCL (HashiCorp Configuration Language) // TokenType is the set of lexical tokens of the HCL (HashiCorp Configuration Language)
@ -86,19 +86,9 @@ func (t TokenType) IsLiteral() bool { return literal_beg < t && t < literal_end
// delimiters; it returns false otherwise. // delimiters; it returns false otherwise.
func (t TokenType) IsOperator() bool { return operator_beg < t && t < operator_end } func (t TokenType) IsOperator() bool { return operator_beg < t && t < operator_end }
// Type returns the token's type
func (t Token) Type() TokenType {
return t.token
}
// Pos returns the token's position
func (t Token) Pos() Pos {
return t.pos
}
// String returns the token's literal text. Note that this is only // String returns the token's literal text. Note that this is only
// applicable for certain token types, such as token.IDENT, // applicable for certain token types, such as token.IDENT,
// token.STRING, etc.. // token.STRING, etc..
func (t Token) String() string { func (t Token) String() string {
return t.text return t.Text
} }