07cb426729
HCL supports # style comments, which are 1 size len. We assumed that it's always // or /* , which are two size length
369 lines
8.3 KiB
Go
369 lines
8.3 KiB
Go
// Package parser implements a parser for HCL (HashiCorp Configuration
|
|
// Language)
|
|
package parser
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
|
|
"github.com/fatih/hcl/ast"
|
|
"github.com/fatih/hcl/scanner"
|
|
"github.com/fatih/hcl/token"
|
|
)
|
|
|
|
type Parser struct {
|
|
sc *scanner.Scanner
|
|
|
|
// Last read token
|
|
tok token.Token
|
|
|
|
comments []*ast.CommentGroup
|
|
leadComment *ast.CommentGroup // last lead comment
|
|
lineComment *ast.CommentGroup // last line comment
|
|
|
|
enableTrace bool
|
|
indent int
|
|
n int // buffer size (max = 1)
|
|
}
|
|
|
|
func newParser(src []byte) *Parser {
|
|
return &Parser{
|
|
sc: scanner.New(src),
|
|
}
|
|
}
|
|
|
|
// Parse returns the fully parsed source and returns the abstract syntax tree.
|
|
func Parse(src []byte) (*ast.File, error) {
|
|
p := newParser(src)
|
|
return p.Parse()
|
|
}
|
|
|
|
var errEofToken = errors.New("EOF token found")
|
|
|
|
// Parse returns the fully parsed source and returns the abstract syntax tree.
|
|
func (p *Parser) Parse() (*ast.File, error) {
|
|
f := &ast.File{}
|
|
var err error
|
|
f.Node, err = p.objectList()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
f.Comments = p.comments
|
|
return f, nil
|
|
}
|
|
|
|
func (p *Parser) objectList() (*ast.ObjectList, error) {
|
|
defer un(trace(p, "ParseObjectList"))
|
|
node := &ast.ObjectList{}
|
|
|
|
for {
|
|
n, err := p.objectItem()
|
|
if err == errEofToken {
|
|
break // we are finished
|
|
}
|
|
|
|
// we don't return a nil node, because might want to use already
|
|
// collected items.
|
|
if err != nil {
|
|
return node, err
|
|
}
|
|
|
|
node.Add(n)
|
|
}
|
|
return node, nil
|
|
}
|
|
|
|
func (p *Parser) consumeComment() (comment *ast.Comment, endline int) {
|
|
endline = p.tok.Pos.Line
|
|
|
|
// count the endline if it's multiline comment, ie starting with /*
|
|
if len(p.tok.Text) > 1 && p.tok.Text[1] == '*' {
|
|
// don't use range here - no need to decode Unicode code points
|
|
for i := 0; i < len(p.tok.Text); i++ {
|
|
if p.tok.Text[i] == '\n' {
|
|
endline++
|
|
}
|
|
}
|
|
}
|
|
|
|
comment = &ast.Comment{Start: p.tok.Pos, Text: p.tok.Text}
|
|
p.tok = p.sc.Scan()
|
|
return
|
|
}
|
|
|
|
func (p *Parser) consumeCommentGroup(n int) (comments *ast.CommentGroup, endline int) {
|
|
var list []*ast.Comment
|
|
endline = p.tok.Pos.Line
|
|
|
|
for p.tok.Type == token.COMMENT && p.tok.Pos.Line <= endline+n {
|
|
var comment *ast.Comment
|
|
comment, endline = p.consumeComment()
|
|
list = append(list, comment)
|
|
}
|
|
|
|
// add comment group to the comments list
|
|
comments = &ast.CommentGroup{List: list}
|
|
p.comments = append(p.comments, comments)
|
|
|
|
return
|
|
}
|
|
|
|
// objectItem parses a single object item
|
|
func (p *Parser) objectItem() (*ast.ObjectItem, error) {
|
|
defer un(trace(p, "ParseObjectItem"))
|
|
|
|
keys, err := p.objectKey()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
o := &ast.ObjectItem{
|
|
Keys: keys,
|
|
}
|
|
|
|
if p.leadComment != nil {
|
|
o.LeadComment = p.leadComment
|
|
p.leadComment = nil
|
|
}
|
|
|
|
switch p.tok.Type {
|
|
case token.ASSIGN:
|
|
o.Assign = p.tok.Pos
|
|
o.Val, err = p.object()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
case token.LBRACE:
|
|
o.Val, err = p.objectType()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// do a look-ahead for line comment
|
|
p.scan()
|
|
if o.Val.Pos().Line == keys[0].Pos().Line && p.lineComment != nil {
|
|
o.LineComment = p.lineComment
|
|
p.lineComment = nil
|
|
}
|
|
|
|
p.unscan()
|
|
return o, nil
|
|
}
|
|
|
|
// objectKey parses an object key and returns a ObjectKey AST
|
|
func (p *Parser) objectKey() ([]*ast.ObjectKey, error) {
|
|
keyCount := 0
|
|
keys := make([]*ast.ObjectKey, 0)
|
|
|
|
for {
|
|
tok := p.scan()
|
|
switch tok.Type {
|
|
case token.EOF:
|
|
return nil, errEofToken
|
|
case token.ASSIGN:
|
|
// assignment or object only, but not nested objects. this is not
|
|
// allowed: `foo bar = {}`
|
|
if keyCount > 1 {
|
|
return nil, fmt.Errorf("nested object expected: LBRACE got: %s", p.tok.Type)
|
|
}
|
|
|
|
if keyCount == 0 {
|
|
return nil, errors.New("no keys found!!!")
|
|
}
|
|
|
|
return keys, nil
|
|
case token.LBRACE:
|
|
// object
|
|
return keys, nil
|
|
case token.IDENT, token.STRING:
|
|
keyCount++
|
|
keys = append(keys, &ast.ObjectKey{Token: p.tok})
|
|
case token.ILLEGAL:
|
|
fmt.Println("illegal")
|
|
default:
|
|
return nil, fmt.Errorf("expected: IDENT | STRING | ASSIGN | LBRACE got: %s", p.tok.Type)
|
|
}
|
|
}
|
|
}
|
|
|
|
// object parses any type of object, such as number, bool, string, object or
|
|
// list.
|
|
func (p *Parser) object() (ast.Node, error) {
|
|
defer un(trace(p, "ParseType"))
|
|
tok := p.scan()
|
|
|
|
switch tok.Type {
|
|
case token.NUMBER, token.FLOAT, token.BOOL, token.STRING:
|
|
return p.literalType()
|
|
case token.LBRACE:
|
|
return p.objectType()
|
|
case token.LBRACK:
|
|
return p.listType()
|
|
case token.COMMENT:
|
|
// implement comment
|
|
case token.EOF:
|
|
return nil, errEofToken
|
|
}
|
|
|
|
return nil, fmt.Errorf("Unknown token: %+v", tok)
|
|
}
|
|
|
|
// objectType parses an object type and returns a ObjectType AST
|
|
func (p *Parser) objectType() (*ast.ObjectType, error) {
|
|
defer un(trace(p, "ParseObjectType"))
|
|
|
|
// we assume that the currently scanned token is a LBRACE
|
|
o := &ast.ObjectType{
|
|
Lbrace: p.tok.Pos,
|
|
}
|
|
|
|
l, err := p.objectList()
|
|
|
|
// if we hit RBRACE, we are good to go (means we parsed all Items), if it's
|
|
// not a RBRACE, it's an syntax error and we just return it.
|
|
if err != nil && p.tok.Type != token.RBRACE {
|
|
return nil, err
|
|
}
|
|
|
|
o.List = l
|
|
o.Rbrace = p.tok.Pos // advanced via parseObjectList
|
|
return o, nil
|
|
}
|
|
|
|
// listType parses a list type and returns a ListType AST
|
|
func (p *Parser) listType() (*ast.ListType, error) {
|
|
defer un(trace(p, "ParseListType"))
|
|
|
|
// we assume that the currently scanned token is a LBRACK
|
|
l := &ast.ListType{
|
|
Lbrack: p.tok.Pos,
|
|
}
|
|
|
|
for {
|
|
tok := p.scan()
|
|
switch tok.Type {
|
|
case token.NUMBER, token.FLOAT, token.STRING:
|
|
node, err := p.literalType()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
l.Add(node)
|
|
case token.COMMA:
|
|
// get next list item or we are at the end
|
|
continue
|
|
case token.BOOL:
|
|
// TODO(arslan) should we support? not supported by HCL yet
|
|
case token.LBRACK:
|
|
// TODO(arslan) should we support nested lists? Even though it's
|
|
// written in README of HCL, it's not a part of the grammar
|
|
// (not defined in parse.y)
|
|
case token.RBRACK:
|
|
// finished
|
|
l.Rbrack = p.tok.Pos
|
|
return l, nil
|
|
default:
|
|
return nil, fmt.Errorf("unexpected token while parsing list: %s", tok.Type)
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
// literalType parses a literal type and returns a LiteralType AST
|
|
func (p *Parser) literalType() (*ast.LiteralType, error) {
|
|
defer un(trace(p, "ParseLiteral"))
|
|
|
|
return &ast.LiteralType{
|
|
Token: p.tok,
|
|
}, nil
|
|
}
|
|
|
|
// scan returns the next token from the underlying scanner. If a token has
|
|
// been unscanned then read that instead. In the process, it collects any
|
|
// comment groups encountered, and remembers the last lead and line comments.
|
|
func (p *Parser) scan() token.Token {
|
|
// If we have a token on the buffer, then return it.
|
|
if p.n != 0 {
|
|
p.n = 0
|
|
return p.tok
|
|
}
|
|
|
|
// Otherwise read the next token from the scanner and Save it to the buffer
|
|
// in case we unscan later.
|
|
prev := p.tok
|
|
p.tok = p.sc.Scan()
|
|
|
|
if p.tok.Type == token.COMMENT {
|
|
var comment *ast.CommentGroup
|
|
var endline int
|
|
|
|
// fmt.Printf("p.tok.Pos.Line = %+v prev: %d \n", p.tok.Pos.Line, prev.Pos.Line)
|
|
if p.tok.Pos.Line == prev.Pos.Line {
|
|
// The comment is on same line as the previous token; it
|
|
// cannot be a lead comment but may be a line comment.
|
|
comment, endline = p.consumeCommentGroup(0)
|
|
if p.tok.Pos.Line != endline {
|
|
// The next token is on a different line, thus
|
|
// the last comment group is a line comment.
|
|
p.lineComment = comment
|
|
}
|
|
}
|
|
|
|
// consume successor comments, if any
|
|
endline = -1
|
|
for p.tok.Type == token.COMMENT {
|
|
comment, endline = p.consumeCommentGroup(1)
|
|
}
|
|
|
|
if endline+1 == p.tok.Pos.Line {
|
|
// The next token is following on the line immediately after the
|
|
// comment group, thus the last comment group is a lead comment.
|
|
p.leadComment = comment
|
|
}
|
|
|
|
}
|
|
|
|
return p.tok
|
|
}
|
|
|
|
// unscan pushes the previously read token back onto the buffer.
|
|
func (p *Parser) unscan() {
|
|
p.n = 1
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// Parsing support
|
|
|
|
func (p *Parser) printTrace(a ...interface{}) {
|
|
if !p.enableTrace {
|
|
return
|
|
}
|
|
|
|
const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . "
|
|
const n = len(dots)
|
|
fmt.Printf("%5d:%3d: ", p.tok.Pos.Line, p.tok.Pos.Column)
|
|
|
|
i := 2 * p.indent
|
|
for i > n {
|
|
fmt.Print(dots)
|
|
i -= n
|
|
}
|
|
// i <= n
|
|
fmt.Print(dots[0:i])
|
|
fmt.Println(a...)
|
|
}
|
|
|
|
func trace(p *Parser, msg string) *Parser {
|
|
p.printTrace(msg, "(")
|
|
p.indent++
|
|
return p
|
|
}
|
|
|
|
// Usage pattern: defer un(trace(p, "..."))
|
|
func un(p *Parser) {
|
|
p.indent--
|
|
p.printTrace(")")
|
|
}
|