hclwrite: Simplify internal data structures

The original prototype of hclwrite tried to track both the tokens and
the AST as two parallel data structures. This quickly exploded in
complexity, leading to lots of messy code to manage keeping those two
structures in sync.

This new approach melds the two structures together, creating first a
physical token tree (made of "node" objects, and hidden from the caller)
and then attaching the AST nodes to that token tree as additional sidecar
data.

The result is much easier to work with, leading to less code in the parser
and considerably less complex data structures in the parser's tests.

This commit is enough to reach feature parity with the previous prototype,
but it remains a prototype. With a more usable foundation, we'll evolve
this into a more complete implementation in subsequent commits.
This commit is contained in:
Martin Atkins 2018-08-01 08:45:22 -07:00
parent b21bf61698
commit 77c0b55a59
11 changed files with 1182 additions and 2014 deletions

View File

@ -3,29 +3,25 @@ package hclwrite
import ( import (
"bytes" "bytes"
"io" "io"
"github.com/hashicorp/hcl2/hcl"
"github.com/zclconf/go-cty/cty"
) )
type Node interface { type File struct {
walkChildNodes(w internalWalkFunc) inTree
Tokens() *TokenSeq
srcBytes []byte
body *node
} }
type internalWalkFunc func(Node) // Body returns the root body of the file, which contains the top-level
// attributes and blocks.
type File struct { func (f *File) Body() *Body {
Name string return f.body.content.(*Body)
SrcBytes []byte
Body *Body
AllTokens *TokenSeq
} }
// WriteTo writes the tokens underlying the receiving file to the given writer. // WriteTo writes the tokens underlying the receiving file to the given writer.
func (f *File) WriteTo(wr io.Writer) (int, error) { func (f *File) WriteTo(wr io.Writer) (int, error) {
return f.AllTokens.WriteTo(wr) tokens := f.inTree.children.BuildTokens(nil)
return tokens.WriteTo(wr)
} }
// Bytes returns a buffer containing the source code resulting from the // Bytes returns a buffer containing the source code resulting from the
@ -37,169 +33,74 @@ func (f *File) Bytes() []byte {
return buf.Bytes() return buf.Bytes()
} }
// Format makes in-place modifications to the tokens underlying the receiving type comments struct {
// file in order to change the whitespace to be in canonical form. leafNode
func (f *File) Format() {
format(f.Body.AllTokens.Tokens()) parent *node
tokens Tokens
} }
type Body struct { func newComments(tokens Tokens) *comments {
// Items may contain Attribute, Block and Unstructured instances. return &comments{
// Items and AllTokens should be updated only by methods of this type, tokens: tokens,
// since they must be kept synchronized for correct operation.
Items []Node
AllTokens *TokenSeq
// IndentLevel is the number of spaces that should appear at the start
// of lines added within this body.
IndentLevel int
}
func (n *Body) walkChildNodes(w internalWalkFunc) {
for _, item := range n.Items {
w(item)
} }
} }
func (n *Body) Tokens() *TokenSeq { func (c *comments) BuildTokens(to Tokens) Tokens {
return n.AllTokens return c.tokens.BuildTokens(to)
} }
func (n *Body) AppendItem(node Node) { type identifier struct {
n.Items = append(n.Items, node) leafNode
n.AppendUnstructuredTokens(node.Tokens())
parent *node
token *Token
} }
func (n *Body) AppendUnstructuredTokens(seq *TokenSeq) { func newIdentifier(token *Token) *identifier {
if n.AllTokens == nil { return &identifier{
new := make(TokenSeq, 0, 1) token: token,
n.AllTokens = &new
}
*(n.AllTokens) = append(*(n.AllTokens), seq)
}
// FindAttribute returns the first attribute item from the body that has the
// given name, or returns nil if there is currently no matching attribute.
//
// A valid AST has only one definition of each attribute, but that constraint
// is not enforced in the hclwrite AST, so a tree that has been mutated by
// other calls may contain additional matching attributes that cannot be seen
// by this method.
func (n *Body) FindAttribute(name string) *Attribute {
nameBytes := []byte(name)
for _, item := range n.Items {
if attr, ok := item.(*Attribute); ok {
if attr.NameTokens.IsIdent(nameBytes) {
return attr
}
}
}
return nil
}
// SetAttributeValue either replaces the expression of an existing attribute
// of the given name or adds a new attribute definition to the end of the block.
//
// The value is given as a cty.Value, and must therefore be a literal. To set
// a variable reference or other traversal, use SetAttributeTraversal.
//
// The return value is the attribute that was either modified in-place or
// created.
func (n *Body) SetAttributeValue(name string, val cty.Value) *Attribute {
panic("Body.SetAttributeValue not yet implemented")
}
// SetAttributeTraversal either replaces the expression of an existing attribute
// of the given name or adds a new attribute definition to the end of the block.
//
// The new expression is given as a hcl.Traversal, which must be an absolute
// traversal. To set a literal value, use SetAttributeValue.
//
// The return value is the attribute that was either modified in-place or
// created.
func (n *Body) SetAttributeTraversal(name string, traversal hcl.Traversal) *Attribute {
panic("Body.SetAttributeTraversal not yet implemented")
}
type Attribute struct {
AllTokens *TokenSeq
LeadCommentTokens *TokenSeq
NameTokens *TokenSeq
EqualsTokens *TokenSeq
Expr *Expression
LineCommentTokens *TokenSeq
EOLTokens *TokenSeq
}
func (a *Attribute) walkChildNodes(w internalWalkFunc) {
w(a.Expr)
}
func (n *Attribute) Tokens() *TokenSeq {
return n.AllTokens
}
type Block struct {
AllTokens *TokenSeq
LeadCommentTokens *TokenSeq
TypeTokens *TokenSeq
LabelTokens []*TokenSeq
LabelTokensFlat *TokenSeq
OBraceTokens *TokenSeq
Body *Body
CBraceTokens *TokenSeq
EOLTokens *TokenSeq
}
func (n *Block) walkChildNodes(w internalWalkFunc) {
w(n.Body)
}
func (n *Block) Tokens() *TokenSeq {
return n.AllTokens
}
type Expression struct {
AllTokens *TokenSeq
AbsTraversals []*Traversal
}
func (n *Expression) walkChildNodes(w internalWalkFunc) {
for _, name := range n.AbsTraversals {
w(name)
} }
} }
func (n *Expression) Tokens() *TokenSeq { func (i *identifier) BuildTokens(to Tokens) Tokens {
return n.AllTokens return append(to, i.token)
} }
type Traversal struct { func (i *identifier) hasName(name string) bool {
AllTokens *TokenSeq return name == string(i.token.Bytes)
Steps []*Traverser
} }
func (n *Traversal) walkChildNodes(w internalWalkFunc) { type number struct {
for _, step := range n.Steps { leafNode
w(step)
parent *node
token *Token
}
func newNumber(token *Token) *number {
return &number{
token: token,
} }
} }
func (n *Traversal) Tokens() *TokenSeq { func (n *number) BuildTokens(to Tokens) Tokens {
return n.AllTokens return append(to, n.token)
} }
type Traverser struct { type quoted struct {
AllTokens *TokenSeq leafNode
Logical hcl.Traverser
parent *node
tokens Tokens
} }
func (n *Traverser) Tokens() *TokenSeq { func newQuoted(tokens Tokens) *quoted {
return n.AllTokens return &quoted{
tokens: tokens,
}
} }
func (n *Traverser) walkChildNodes(w internalWalkFunc) { func (q *quoted) BuildTokens(to Tokens) Tokens {
// No child nodes for a traversal step return q.tokens.BuildTokens(to)
} }

85
hclwrite/ast_body.go Normal file
View File

@ -0,0 +1,85 @@
package hclwrite
import (
"github.com/hashicorp/hcl2/hcl"
"github.com/zclconf/go-cty/cty"
)
type Body struct {
inTree
items nodeSet
// indentLevel is the number of spaces that should appear at the start
// of lines added within this body.
indentLevel int
}
func (b *Body) appendItem(n *node) {
b.inTree.children.AppendNode(n)
b.items.Add(n)
}
func (b *Body) AppendUnstructuredTokens(ts Tokens) {
b.inTree.children.Append(ts)
}
// GetAttribute returns the attribute from the body that has the given name,
// or returns nil if there is currently no matching attribute.
func (b *Body) GetAttribute(name string) *Attribute {
for n := range b.items {
if attr, isAttr := n.content.(*Attribute); isAttr {
nameObj := attr.name.content.(*identifier)
if nameObj.hasName(name) {
// We've found it!
return attr
}
}
}
return nil
}
// SetAttributeValue either replaces the expression of an existing attribute
// of the given name or adds a new attribute definition to the end of the block.
//
// The value is given as a cty.Value, and must therefore be a literal. To set
// a variable reference or other traversal, use SetAttributeTraversal.
//
// The return value is the attribute that was either modified in-place or
// created.
func (b *Body) SetAttributeValue(name string, val cty.Value) *Attribute {
panic("Body.SetAttributeValue not yet implemented")
}
// SetAttributeTraversal either replaces the expression of an existing attribute
// of the given name or adds a new attribute definition to the end of the block.
//
// The new expression is given as a hcl.Traversal, which must be an absolute
// traversal. To set a literal value, use SetAttributeValue.
//
// The return value is the attribute that was either modified in-place or
// created.
func (b *Body) SetAttributeTraversal(name string, traversal hcl.Traversal) *Attribute {
panic("Body.SetAttributeTraversal not yet implemented")
}
type Attribute struct {
inTree
leadComments *node
name *node
expr *node
lineComments *node
}
type Block struct {
inTree
leadComments *node
typeName *node
labels nodeSet
open *node
body *node
close *node
}

215
hclwrite/ast_body_test.go Normal file
View File

@ -0,0 +1,215 @@
package hclwrite
import (
"fmt"
"reflect"
"testing"
"github.com/davecgh/go-spew/spew"
"github.com/hashicorp/hcl2/hcl"
"github.com/hashicorp/hcl2/hcl/hclsyntax"
)
func TestBodyGetAttribute(t *testing.T) {
tests := []struct {
src string
name string
want Tokens
}{
{
"",
"a",
nil,
},
{
"a = 1\n",
"a",
Tokens{
{
Type: hclsyntax.TokenIdent,
Bytes: []byte{'a'},
SpacesBefore: 0,
},
{
Type: hclsyntax.TokenEqual,
Bytes: []byte{'='},
SpacesBefore: 1,
},
{
Type: hclsyntax.TokenNumberLit,
Bytes: []byte{'1'},
SpacesBefore: 1,
},
{
Type: hclsyntax.TokenNewline,
Bytes: []byte{'\n'},
SpacesBefore: 0,
},
},
},
{
"a = 1\nb = 1\nc = 1\n",
"a",
Tokens{
{
Type: hclsyntax.TokenIdent,
Bytes: []byte{'a'},
SpacesBefore: 0,
},
{
Type: hclsyntax.TokenEqual,
Bytes: []byte{'='},
SpacesBefore: 1,
},
{
Type: hclsyntax.TokenNumberLit,
Bytes: []byte{'1'},
SpacesBefore: 1,
},
{
Type: hclsyntax.TokenNewline,
Bytes: []byte{'\n'},
SpacesBefore: 0,
},
},
},
{
"a = 1\nb = 2\nc = 3\n",
"b",
Tokens{
{
Type: hclsyntax.TokenIdent,
Bytes: []byte{'b'},
SpacesBefore: 0,
},
{
Type: hclsyntax.TokenEqual,
Bytes: []byte{'='},
SpacesBefore: 1,
},
{
Type: hclsyntax.TokenNumberLit,
Bytes: []byte{'2'},
SpacesBefore: 1,
},
{
Type: hclsyntax.TokenNewline,
Bytes: []byte{'\n'},
SpacesBefore: 0,
},
},
},
{
"a = 1\nb = 2\nc = 3\n",
"c",
Tokens{
{
Type: hclsyntax.TokenIdent,
Bytes: []byte{'c'},
SpacesBefore: 0,
},
{
Type: hclsyntax.TokenEqual,
Bytes: []byte{'='},
SpacesBefore: 1,
},
{
Type: hclsyntax.TokenNumberLit,
Bytes: []byte{'3'},
SpacesBefore: 1,
},
{
Type: hclsyntax.TokenNewline,
Bytes: []byte{'\n'},
SpacesBefore: 0,
},
},
},
{
"a = 1\n# b is a b\nb = 2\nc = 3\n",
"b",
Tokens{
{
// Recognized as a lead comment and so attached to the attribute
Type: hclsyntax.TokenComment,
Bytes: []byte("# b is a b\n"),
SpacesBefore: 0,
},
{
Type: hclsyntax.TokenIdent,
Bytes: []byte{'b'},
SpacesBefore: 0,
},
{
Type: hclsyntax.TokenEqual,
Bytes: []byte{'='},
SpacesBefore: 1,
},
{
Type: hclsyntax.TokenNumberLit,
Bytes: []byte{'2'},
SpacesBefore: 1,
},
{
Type: hclsyntax.TokenNewline,
Bytes: []byte{'\n'},
SpacesBefore: 0,
},
},
},
{
"a = 1\n# not attached to a or b\n\nb = 2\nc = 3\n",
"b",
Tokens{
{
Type: hclsyntax.TokenIdent,
Bytes: []byte{'b'},
SpacesBefore: 0,
},
{
Type: hclsyntax.TokenEqual,
Bytes: []byte{'='},
SpacesBefore: 1,
},
{
Type: hclsyntax.TokenNumberLit,
Bytes: []byte{'2'},
SpacesBefore: 1,
},
{
Type: hclsyntax.TokenNewline,
Bytes: []byte{'\n'},
SpacesBefore: 0,
},
},
},
}
for _, test := range tests {
t.Run(fmt.Sprintf("%s in %s", test.name, test.src), func(t *testing.T) {
f, diags := ParseConfig([]byte(test.src), "", hcl.Pos{Line: 1, Column: 1})
if len(diags) != 0 {
for _, diag := range diags {
t.Logf("- %s", diag.Error())
}
t.Fatalf("unexpected diagnostics")
}
attr := f.Body().GetAttribute(test.name)
if attr == nil {
if test.want != nil {
t.Fatal("attribute not found, but want it to exist")
}
} else {
if test.want == nil {
t.Fatal("attribute found, but expecting not found")
}
got := attr.BuildTokens(nil)
if !reflect.DeepEqual(got, test.want) {
t.Errorf("wrong result\ngot: %s\nwant: %s", spew.Sdump(got), spew.Sdump(test.want))
}
}
})
}
}

View File

@ -0,0 +1,68 @@
package hclwrite
import (
"github.com/hashicorp/hcl2/hcl"
"github.com/zclconf/go-cty/cty"
)
type Expression struct {
inTree
absTraversals nodeSet
}
func newExpression() *Expression {
return &Expression{
inTree: newInTree(),
absTraversals: newNodeSet(),
}
}
// NewExpressionLiteral constructs an an expression that represents the given
// literal value.
func NewExpressionLiteral(val cty.Value) *Expression {
panic("NewExpressionLiteral not yet implemented")
}
// NewExpressionAbsTraversal constructs an expression that represents the
// given traversal, which must be absolute or this function will panic.
func NewExpressionAbsTraversal(traversal hcl.Traversal) {
panic("NewExpressionAbsTraversal not yet implemented")
}
type Traversal struct {
inTree
steps nodeSet
}
func newTraversal() *Traversal {
return &Traversal{
inTree: newInTree(),
steps: newNodeSet(),
}
}
type TraverseName struct {
inTree
name *node
}
func newTraverseName() *TraverseName {
return &TraverseName{
inTree: newInTree(),
}
}
type TraverseIndex struct {
inTree
key *node
}
func newTraverseIndex() *TraverseIndex {
return &TraverseIndex{
inTree: newInTree(),
}
}

View File

@ -2,96 +2,46 @@ package hclwrite
import ( import (
"fmt" "fmt"
"reflect" "strings"
"testing"
"github.com/davecgh/go-spew/spew"
"github.com/hashicorp/hcl2/hcl/hclsyntax"
"github.com/hashicorp/hcl2/hcl"
) )
func TestBodyFindAttribute(t *testing.T) { type TestTreeNode struct {
tests := []struct { Type string
src string Val string
name string
want *TokenSeq
}{
{
"",
"a",
nil,
},
{
"a = 1\n",
"a",
&TokenSeq{
Tokens{
{
Type: hclsyntax.TokenIdent,
Bytes: []byte{'a'},
},
},
},
},
{
"a = 1\nb = 1\nc = 1\n",
"a",
&TokenSeq{
Tokens{
{
Type: hclsyntax.TokenIdent,
Bytes: []byte{'a'},
},
},
},
},
{
"a = 1\nb = 1\nc = 1\n",
"b",
&TokenSeq{
Tokens{
{
Type: hclsyntax.TokenIdent,
Bytes: []byte{'b'},
},
},
},
},
{
"a = 1\nb = 1\nc = 1\n",
"c",
&TokenSeq{
Tokens{
{
Type: hclsyntax.TokenIdent,
Bytes: []byte{'c'},
},
},
},
},
}
for _, test := range tests { Children []TestTreeNode
t.Run(fmt.Sprintf("%s in %s", test.name, test.src), func(t *testing.T) { }
f, diags := ParseConfig([]byte(test.src), "", hcl.Pos{Line: 1, Column: 1})
if len(diags) != 0 { func makeTestTree(n *node) (root TestTreeNode) {
for _, diag := range diags { const us = "hclwrite."
t.Logf("- %s", diag.Error()) const usPtr = "*hclwrite."
} root.Type = fmt.Sprintf("%T", n.content)
t.Fatalf("unexpected diagnostics") if strings.HasPrefix(root.Type, us) {
} root.Type = root.Type[len(us):]
} else if strings.HasPrefix(root.Type, usPtr) {
attr := f.Body.FindAttribute(test.name) root.Type = root.Type[len(usPtr):]
if attr == nil { }
if test.want != nil {
t.Errorf("attribute found, but expecting not found") type WithVal interface {
} testValue() string
} else { }
got := attr.NameTokens hasTestVal := false
if !reflect.DeepEqual(got, test.want) { if withVal, ok := n.content.(WithVal); ok {
t.Errorf("wrong result\ngot: %s\nwant: %s", spew.Sdump(got), spew.Sdump(test.want)) root.Val = withVal.testValue()
} hasTestVal = true
} }
})
} n.content.walkChildNodes(func(n *node) {
root.Children = append(root.Children, makeTestTree(n))
})
// If we didn't end up with any children then this is probably a leaf
// node, so we'll set its content value to it raw bytes if we didn't
// already set a test value.
if !hasTestVal && len(root.Children) == 0 {
toks := n.content.BuildTokens(nil)
root.Val = toks.testValue()
}
return root
} }

200
hclwrite/node.go Normal file
View File

@ -0,0 +1,200 @@
package hclwrite
import (
"fmt"
"github.com/google/go-cmp/cmp"
)
// node represents a node in the AST.
type node struct {
content nodeContent
list *nodes
before, after *node
}
func newNode(c nodeContent) *node {
return &node{
content: c,
}
}
func (n *node) Equal(other *node) bool {
return cmp.Equal(n.content, other.content)
}
func (n *node) BuildTokens(to Tokens) Tokens {
return n.content.BuildTokens(to)
}
// Detach removes the receiver from the list it currently belongs to. If the
// node is not currently in a list, this is a no-op.
func (n *node) Detach() {
if n.list == nil {
return
}
if n.before != nil {
n.before.after = n.after
}
if n.after != nil {
n.after.before = n.before
}
if n.list.first == n {
n.list.first = n.after
}
if n.list.last == n {
n.list.last = n.before
}
n.list = nil
n.before = nil
n.after = nil
}
func (n *node) assertUnattached() {
if n.list != nil {
panic(fmt.Sprintf("attempt to attach already-attached node %#v", n))
}
}
// nodeContent is the interface type implemented by all AST content types.
type nodeContent interface {
walkChildNodes(w internalWalkFunc)
BuildTokens(to Tokens) Tokens
}
// nodes is a list of nodes.
type nodes struct {
first, last *node
}
func (ns *nodes) BuildTokens(to Tokens) Tokens {
for n := ns.first; n != nil; n = n.after {
to = n.BuildTokens(to)
}
return to
}
func (ns *nodes) Append(c nodeContent) *node {
n := &node{
content: c,
}
ns.AppendNode(n)
return n
}
func (ns *nodes) AppendNode(n *node) {
if ns.last != nil {
n.before = ns.last
ns.last.after = n
}
n.list = ns
ns.last = n
if ns.first == nil {
ns.first = n
}
}
func (ns *nodes) AppendUnstructuredTokens(tokens Tokens) *node {
if len(tokens) == 0 {
return nil
}
n := newNode(tokens)
ns.AppendNode(n)
return n
}
// nodeSet is an unordered set of nodes. It is used to describe a set of nodes
// that all belong to the same list that have some role or characteristic
// in common.
type nodeSet map[*node]struct{}
func newNodeSet() nodeSet {
return make(nodeSet)
}
func (ns nodeSet) Has(n *node) bool {
if ns == nil {
return false
}
_, exists := ns[n]
return exists
}
func (ns nodeSet) Add(n *node) {
ns[n] = struct{}{}
}
func (ns nodeSet) Remove(n *node) {
delete(ns, n)
}
func (ns nodeSet) List() []*node {
if len(ns) == 0 {
return nil
}
ret := make([]*node, 0, len(ns))
// Determine which list we are working with. We assume here that all of
// the nodes belong to the same list, since that is part of the contract
// for nodeSet.
var list *nodes
for n := range ns {
list = n.list
break
}
// We recover the order by iterating over the whole list. This is not
// the most efficient way to do it, but our node lists should always be
// small so not worth making things more complex.
for n := list.first; n != nil; n = n.after {
if ns.Has(n) {
ret = append(ret, n)
}
}
return ret
}
type internalWalkFunc func(*node)
// inTree can be embedded into a content struct that has child nodes to get
// a standard implementation of the NodeContent interface and a record of
// a potential parent node.
type inTree struct {
parent *node
children *nodes
}
func newInTree() inTree {
return inTree{
children: &nodes{},
}
}
func (it *inTree) assertUnattached() {
if it.parent != nil {
panic(fmt.Sprintf("node is already attached to %T", it.parent.content))
}
}
func (it *inTree) walkChildNodes(w internalWalkFunc) {
for n := it.children.first; n != nil; n = n.after {
w(n)
}
}
func (it *inTree) BuildTokens(to Tokens) Tokens {
for n := it.children.first; n != nil; n = n.after {
to = n.BuildTokens(to)
}
return to
}
// leafNode can be embedded into a content struct to give it a do-nothing
// implementation of walkChildNodes
type leafNode struct {
}
func (n *leafNode) walkChildNodes(w internalWalkFunc) {
}

View File

@ -1,10 +1,12 @@
package hclwrite package hclwrite
import ( import (
"fmt"
"sort" "sort"
"github.com/hashicorp/hcl2/hcl" "github.com/hashicorp/hcl2/hcl"
"github.com/hashicorp/hcl2/hcl/hclsyntax" "github.com/hashicorp/hcl2/hcl/hclsyntax"
"github.com/zclconf/go-cty/cty"
) )
// Our "parser" here is actually not doing any parsing of its own. Instead, // Our "parser" here is actually not doing any parsing of its own. Instead,
@ -49,18 +51,19 @@ func parse(src []byte, filename string, start hcl.Pos) (*File, hcl.Diagnostics)
} }
before, root, after := parseBody(file.Body.(*hclsyntax.Body), from) before, root, after := parseBody(file.Body.(*hclsyntax.Body), from)
ret := &File{
inTree: newInTree(),
return &File{ srcBytes: src,
Name: filename, body: root,
SrcBytes: src, }
Body: root, nodes := ret.inTree.children
AllTokens: &TokenSeq{ nodes.Append(before.Tokens())
before.Seq(), nodes.AppendNode(root)
root.AllTokens, nodes.Append(after.Tokens())
after.Seq(),
}, return ret, diags
}, nil
} }
type inputTokens struct { type inputTokens struct {
@ -76,6 +79,23 @@ func (it inputTokens) Partition(rng hcl.Range) (before, within, after inputToken
return return
} }
func (it inputTokens) PartitionType(ty hclsyntax.TokenType) (before, within, after inputTokens) {
for i, t := range it.writerTokens {
if t.Type == ty {
return it.Slice(0, i), it.Slice(i, i+1), it.Slice(i+1, len(it.nativeTokens))
}
}
panic(fmt.Sprintf("didn't find any token of type %s", ty))
}
func (it inputTokens) PartitionTypeSingle(ty hclsyntax.TokenType) (before inputTokens, found *Token, after inputTokens) {
before, within, after := it.PartitionType(ty)
if within.Len() != 1 {
panic("PartitionType found more than one token")
}
return before, within.Tokens()[0], after
}
// PartitionIncludeComments is like Partition except the returned "within" // PartitionIncludeComments is like Partition except the returned "within"
// range includes any lead and line comments associated with the range. // range includes any lead and line comments associated with the range.
func (it inputTokens) PartitionIncludingComments(rng hcl.Range) (before, within, after inputTokens) { func (it inputTokens) PartitionIncludingComments(rng hcl.Range) (before, within, after inputTokens) {
@ -133,8 +153,8 @@ func (it inputTokens) Len() int {
return len(it.nativeTokens) return len(it.nativeTokens)
} }
func (it inputTokens) Seq() *TokenSeq { func (it inputTokens) Tokens() Tokens {
return &TokenSeq{it.writerTokens} return it.writerTokens
} }
func (it inputTokens) Types() []hclsyntax.TokenType { func (it inputTokens) Types() []hclsyntax.TokenType {
@ -148,7 +168,7 @@ func (it inputTokens) Types() []hclsyntax.TokenType {
// parseBody locates the given body within the given input tokens and returns // parseBody locates the given body within the given input tokens and returns
// the resulting *Body object as well as the tokens that appeared before and // the resulting *Body object as well as the tokens that appeared before and
// after it. // after it.
func parseBody(nativeBody *hclsyntax.Body, from inputTokens) (inputTokens, *Body, inputTokens) { func parseBody(nativeBody *hclsyntax.Body, from inputTokens) (inputTokens, *node, inputTokens) {
before, within, after := from.PartitionIncludingComments(nativeBody.SrcRange) before, within, after := from.PartitionIncludingComments(nativeBody.SrcRange)
// The main AST doesn't retain the original source ordering of the // The main AST doesn't retain the original source ordering of the
@ -164,7 +184,10 @@ func parseBody(nativeBody *hclsyntax.Body, from inputTokens) (inputTokens, *Body
sort.Sort(nativeNodeSorter{nativeItems}) sort.Sort(nativeNodeSorter{nativeItems})
body := &Body{ body := &Body{
IndentLevel: 0, // TODO: deal with this inTree: newInTree(),
indentLevel: 0, // TODO: deal with this
items: newNodeSet(),
} }
remain := within remain := within
@ -172,24 +195,24 @@ func parseBody(nativeBody *hclsyntax.Body, from inputTokens) (inputTokens, *Body
beforeItem, item, afterItem := parseBodyItem(nativeItem, remain) beforeItem, item, afterItem := parseBodyItem(nativeItem, remain)
if beforeItem.Len() > 0 { if beforeItem.Len() > 0 {
body.AppendUnstructuredTokens(beforeItem.Seq()) body.AppendUnstructuredTokens(beforeItem.Tokens())
} }
body.AppendItem(item) body.appendItem(item)
remain = afterItem remain = afterItem
} }
if remain.Len() > 0 { if remain.Len() > 0 {
body.AppendUnstructuredTokens(remain.Seq()) body.AppendUnstructuredTokens(remain.Tokens())
} }
return before, body, after return before, newNode(body), after
} }
func parseBodyItem(nativeItem hclsyntax.Node, from inputTokens) (inputTokens, Node, inputTokens) { func parseBodyItem(nativeItem hclsyntax.Node, from inputTokens) (inputTokens, *node, inputTokens) {
before, leadComments, within, lineComments, newline, after := from.PartitionBlockItem(nativeItem.Range()) before, leadComments, within, lineComments, newline, after := from.PartitionBlockItem(nativeItem.Range())
var item Node var item *node
switch tItem := nativeItem.(type) { switch tItem := nativeItem.(type) {
case *hclsyntax.Attribute: case *hclsyntax.Attribute:
@ -204,90 +227,96 @@ func parseBodyItem(nativeItem hclsyntax.Node, from inputTokens) (inputTokens, No
return before, item, after return before, item, after
} }
func parseAttribute(nativeAttr *hclsyntax.Attribute, from, leadComments, lineComments, newline inputTokens) *Attribute { func parseAttribute(nativeAttr *hclsyntax.Attribute, from, leadComments, lineComments, newline inputTokens) *node {
var allTokens TokenSeq attr := &Attribute{
attr := &Attribute{} inTree: newInTree(),
}
children := attr.inTree.children
if leadComments.Len() > 0 { {
attr.LeadCommentTokens = leadComments.Seq() cn := newNode(newComments(leadComments.Tokens()))
allTokens = append(allTokens, attr.LeadCommentTokens) attr.leadComments = cn
children.AppendNode(cn)
} }
before, nameTokens, from := from.Partition(nativeAttr.NameRange) before, nameTokens, from := from.Partition(nativeAttr.NameRange)
if before.Len() > 0 { {
allTokens = append(allTokens, before.Seq()) children.AppendUnstructuredTokens(before.Tokens())
if nameTokens.Len() != 1 {
// Should never happen with valid input
panic("attribute name is not exactly one token")
}
token := nameTokens.Tokens()[0]
in := newNode(newIdentifier(token))
attr.name = in
children.AppendNode(in)
} }
attr.NameTokens = nameTokens.Seq()
allTokens = append(allTokens, attr.NameTokens)
before, equalsTokens, from := from.Partition(nativeAttr.EqualsRange) before, equalsTokens, from := from.Partition(nativeAttr.EqualsRange)
if before.Len() > 0 { children.AppendUnstructuredTokens(before.Tokens())
allTokens = append(allTokens, before.Seq()) children.AppendUnstructuredTokens(equalsTokens.Tokens())
}
attr.EqualsTokens = equalsTokens.Seq()
allTokens = append(allTokens, attr.EqualsTokens)
before, exprTokens, from := from.Partition(nativeAttr.Expr.Range()) before, exprTokens, from := from.Partition(nativeAttr.Expr.Range())
if before.Len() > 0 { {
allTokens = append(allTokens, before.Seq()) children.AppendUnstructuredTokens(before.Tokens())
} exprNode := parseExpression(nativeAttr.Expr, exprTokens)
attr.Expr = parseExpression(nativeAttr.Expr, exprTokens) attr.expr = exprNode
allTokens = append(allTokens, attr.Expr.AllTokens) children.AppendNode(exprNode)
if lineComments.Len() > 0 {
attr.LineCommentTokens = lineComments.Seq()
allTokens = append(allTokens, attr.LineCommentTokens)
} }
if newline.Len() > 0 { {
attr.EOLTokens = newline.Seq() cn := newNode(newComments(lineComments.Tokens()))
allTokens = append(allTokens, attr.EOLTokens) attr.lineComments = cn
children.AppendNode(cn)
} }
children.AppendUnstructuredTokens(newline.Tokens())
// Collect any stragglers, though there shouldn't be any // Collect any stragglers, though there shouldn't be any
if from.Len() > 0 { children.AppendUnstructuredTokens(from.Tokens())
allTokens = append(allTokens, from.Seq())
}
attr.AllTokens = &allTokens return newNode(attr)
return attr
} }
func parseBlock(nativeBlock *hclsyntax.Block, from, leadComments, lineComments, newline inputTokens) *Block { func parseBlock(nativeBlock *hclsyntax.Block, from, leadComments, lineComments, newline inputTokens) *node {
var allTokens TokenSeq block := &Block{
block := &Block{} inTree: newInTree(),
labels: newNodeSet(),
}
children := block.inTree.children
if leadComments.Len() > 0 { {
block.LeadCommentTokens = leadComments.Seq() cn := newNode(newComments(leadComments.Tokens()))
allTokens = append(allTokens, block.LeadCommentTokens) block.leadComments = cn
children.AppendNode(cn)
} }
before, typeTokens, from := from.Partition(nativeBlock.TypeRange) before, typeTokens, from := from.Partition(nativeBlock.TypeRange)
if before.Len() > 0 { {
allTokens = append(allTokens, before.Seq()) children.AppendUnstructuredTokens(before.Tokens())
if typeTokens.Len() != 1 {
// Should never happen with valid input
panic("block type name is not exactly one token")
}
token := typeTokens.Tokens()[0]
in := newNode(newIdentifier(token))
block.typeName = in
children.AppendNode(in)
} }
block.TypeTokens = typeTokens.Seq()
allTokens = append(allTokens, block.TypeTokens)
for _, rng := range nativeBlock.LabelRanges { for _, rng := range nativeBlock.LabelRanges {
var labelTokens inputTokens var labelTokens inputTokens
before, labelTokens, from = from.Partition(rng) before, labelTokens, from = from.Partition(rng)
if before.Len() > 0 { children.AppendUnstructuredTokens(before.Tokens())
allTokens = append(allTokens, before.Seq()) tokens := labelTokens.Tokens()
} ln := newNode(newQuoted(tokens))
seq := labelTokens.Seq() block.labels.Add(ln)
block.LabelTokens = append(block.LabelTokens, seq) children.AppendNode(ln)
*(block.LabelTokensFlat) = append(*(block.LabelTokensFlat), seq)
allTokens = append(allTokens, seq)
} }
before, oBrace, from := from.Partition(nativeBlock.OpenBraceRange) before, oBrace, from := from.Partition(nativeBlock.OpenBraceRange)
if before.Len() > 0 { children.AppendUnstructuredTokens(before.Tokens())
allTokens = append(allTokens, before.Seq()) children.AppendUnstructuredTokens(oBrace.Tokens())
}
block.OBraceTokens = oBrace.Seq()
allTokens = append(allTokens, block.OBraceTokens)
// We go a bit out of order here: we go hunting for the closing brace // We go a bit out of order here: we go hunting for the closing brace
// so that we have a delimited body, but then we'll deal with the body // so that we have a delimited body, but then we'll deal with the body
@ -295,87 +324,109 @@ func parseBlock(nativeBlock *hclsyntax.Block, from, leadComments, lineComments,
// that appear after it. // that appear after it.
bodyTokens, cBrace, from := from.Partition(nativeBlock.CloseBraceRange) bodyTokens, cBrace, from := from.Partition(nativeBlock.CloseBraceRange)
before, body, after := parseBody(nativeBlock.Body, bodyTokens) before, body, after := parseBody(nativeBlock.Body, bodyTokens)
children.AppendUnstructuredTokens(before.Tokens())
block.body = body
children.AppendNode(body)
children.AppendUnstructuredTokens(after.Tokens())
if before.Len() > 0 { children.AppendUnstructuredTokens(cBrace.Tokens())
allTokens = append(allTokens, before.Seq())
}
block.Body = body
allTokens = append(allTokens, body.AllTokens)
if after.Len() > 0 {
allTokens = append(allTokens, after.Seq())
}
block.CBraceTokens = cBrace.Seq()
allTokens = append(allTokens, block.CBraceTokens)
// stragglers // stragglers
if after.Len() > 0 { children.AppendUnstructuredTokens(from.Tokens())
allTokens = append(allTokens, from.Seq())
}
if lineComments.Len() > 0 { if lineComments.Len() > 0 {
// blocks don't actually have line comments, so we'll just treat // blocks don't actually have line comments, so we'll just treat
// them as extra stragglers // them as extra stragglers
allTokens = append(allTokens, lineComments.Seq()) children.AppendUnstructuredTokens(lineComments.Tokens())
}
if newline.Len() > 0 {
block.EOLTokens = newline.Seq()
allTokens = append(allTokens, block.EOLTokens)
} }
children.AppendUnstructuredTokens(newline.Tokens())
block.AllTokens = &allTokens return newNode(block)
return block
} }
func parseExpression(nativeExpr hclsyntax.Expression, from inputTokens) *Expression { func parseExpression(nativeExpr hclsyntax.Expression, from inputTokens) *node {
var allTokens TokenSeq expr := newExpression()
children := expr.inTree.children
nativeVars := nativeExpr.Variables() nativeVars := nativeExpr.Variables()
var absTraversals []*Traversal
for _, nativeTraversal := range nativeVars { for _, nativeTraversal := range nativeVars {
var traversalTokens TokenSeq before, traversal, after := parseTraversal(nativeTraversal, from)
var before, traversalFrom inputTokens children.AppendUnstructuredTokens(before.Tokens())
before, traversalFrom, from = from.Partition(nativeTraversal.SourceRange()) children.AppendNode(traversal)
if before.Len() > 0 { expr.absTraversals.Add(traversal)
allTokens = append(allTokens, before.Seq()) from = after
}
var steps []*Traverser
for _, nativeStep := range nativeTraversal {
var stepFrom inputTokens
before, stepFrom, traversalFrom = traversalFrom.Partition(nativeStep.SourceRange())
stepTokens := stepFrom.Seq()
if before.Len() > 0 {
traversalTokens = append(traversalTokens, before.Seq())
}
traversalTokens = append(traversalTokens, stepTokens)
step := &Traverser{
AllTokens: stepTokens,
Logical: nativeStep,
}
steps = append(steps, step)
}
// Attach any straggler that don't belong to a step to the traversal itself.
if traversalFrom.Len() > 0 {
traversalTokens = append(traversalTokens, traversalFrom.Seq())
}
allTokens = append(allTokens, &traversalTokens)
absTraversals = append(absTraversals, &Traversal{
AllTokens: &traversalTokens,
Steps: steps,
})
} }
// Attach any stragglers that don't belong to a traversal to the expression // Attach any stragglers that don't belong to a traversal to the expression
// itself. In an expression with no traversals at all, this is just the // itself. In an expression with no traversals at all, this is just the
// entirety of "from". // entirety of "from".
if from.Len() > 0 { children.AppendUnstructuredTokens(from.Tokens())
allTokens = append(allTokens, from.Seq())
return newNode(expr)
}
func parseTraversal(nativeTraversal hcl.Traversal, from inputTokens) (before inputTokens, n *node, after inputTokens) {
traversal := newTraversal()
children := traversal.inTree.children
before, from, after = from.Partition(nativeTraversal.SourceRange())
stepAfter := from
for _, nativeStep := range nativeTraversal {
before, step, after := parseTraversalStep(nativeStep, stepAfter)
children.AppendUnstructuredTokens(before.Tokens())
children.AppendNode(step)
stepAfter = after
} }
return &Expression{ return before, newNode(traversal), after
AllTokens: &allTokens, }
AbsTraversals: absTraversals,
func parseTraversalStep(nativeStep hcl.Traverser, from inputTokens) (before inputTokens, n *node, after inputTokens) {
var children *nodes
switch tNativeStep := nativeStep.(type) {
case hcl.TraverseRoot, hcl.TraverseAttr:
step := newTraverseName()
children = step.inTree.children
before, from, after = from.Partition(nativeStep.SourceRange())
inBefore, token, inAfter := from.PartitionTypeSingle(hclsyntax.TokenIdent)
name := newIdentifier(token)
children.AppendUnstructuredTokens(inBefore.Tokens())
step.name = children.Append(name)
children.AppendUnstructuredTokens(inAfter.Tokens())
return before, newNode(step), after
case hcl.TraverseIndex:
step := newTraverseIndex()
children = step.inTree.children
before, from, after = from.Partition(nativeStep.SourceRange())
var inBefore, oBrack, keyTokens, cBrack inputTokens
inBefore, oBrack, from = from.PartitionType(hclsyntax.TokenOBrack)
children.AppendUnstructuredTokens(inBefore.Tokens())
children.AppendUnstructuredTokens(oBrack.Tokens())
keyTokens, cBrack, from = from.PartitionType(hclsyntax.TokenCBrack)
keyVal := tNativeStep.Key
switch keyVal.Type() {
case cty.String:
key := newQuoted(keyTokens.Tokens())
step.key = children.Append(key)
case cty.Number:
valBefore, valToken, valAfter := keyTokens.PartitionTypeSingle(hclsyntax.TokenNumberLit)
children.AppendUnstructuredTokens(valBefore.Tokens())
key := newNumber(valToken)
step.key = children.Append(key)
children.AppendUnstructuredTokens(valAfter.Tokens())
} }
children.AppendUnstructuredTokens(cBrack.Tokens())
children.AppendUnstructuredTokens(from.Tokens())
return before, newNode(step), after
default:
panic(fmt.Sprintf("unsupported traversal step type %T", nativeStep))
}
} }
// writerTokens takes a sequence of tokens as produced by the main hclsyntax // writerTokens takes a sequence of tokens as produced by the main hclsyntax

File diff suppressed because it is too large Load Diff

View File

@ -6,6 +6,20 @@ import (
"github.com/hashicorp/hcl2/hcl" "github.com/hashicorp/hcl2/hcl"
) )
// NewFile creates a new file object that is empty and ready to have constructs
// added t it.
func NewFile() *File {
body := &Body{
inTree: newInTree(),
indentLevel: 0,
}
file := &File{
inTree: newInTree(),
}
file.body = file.inTree.children.Append(body)
return file
}
// ParseConfig interprets the given source bytes into a *hclwrite.File. The // ParseConfig interprets the given source bytes into a *hclwrite.File. The
// resulting AST can be used to perform surgical edits on the source code // resulting AST can be used to perform surgical edits on the source code
// before turning it back into bytes again. // before turning it back into bytes again.
@ -25,6 +39,6 @@ func Format(src []byte) []byte {
tokens := lexConfig(src) tokens := lexConfig(src)
format(tokens) format(tokens)
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
(&TokenSeq{tokens}).WriteTo(buf) tokens.WriteTo(buf)
return buf.Bytes() return buf.Bytes()
} }

View File

@ -4,11 +4,13 @@ import (
"bytes" "bytes"
"testing" "testing"
"github.com/hashicorp/hcl2/hcl/hclsyntax" "github.com/sergi/go-diff/diffmatchpatch"
"github.com/hashicorp/hcl2/hcl"
"github.com/zclconf/go-cty/cty" "github.com/zclconf/go-cty/cty"
"github.com/zclconf/go-cty/cty/function" "github.com/zclconf/go-cty/cty/function"
"github.com/zclconf/go-cty/cty/function/stdlib" "github.com/zclconf/go-cty/cty/function/stdlib"
"github.com/hashicorp/hcl2/hcl"
"github.com/hashicorp/hcl2/hcl/hclsyntax"
) )
func TestRoundTripVerbatim(t *testing.T) { func TestRoundTripVerbatim(t *testing.T) {
@ -68,7 +70,10 @@ block {
result := wr.Bytes() result := wr.Bytes()
if !bytes.Equal(result, src) { if !bytes.Equal(result, src) {
t.Errorf("wrong result\nresult:\n%s\ninput:\n%s", result, src) dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(string(src), string(result), false)
//t.Errorf("wrong result\nresult:\n%s\ninput:\n%s", result, src)
t.Errorf("wrong result\ndiff: (red indicates missing lines, and green indicates unexpected lines)\n%s", dmp.DiffPrettyText(diffs))
} }
}) })
} }

View File

@ -8,19 +8,6 @@ import (
"github.com/hashicorp/hcl2/hcl/hclsyntax" "github.com/hashicorp/hcl2/hcl/hclsyntax"
) )
// TokenGen is an abstract type that can append tokens to a list. It is the
// low-level foundation underlying the hclwrite AST; the AST provides a
// convenient abstraction over raw token sequences to facilitate common tasks,
// but it's also possible to directly manipulate the tree of token generators
// to make changes that the AST API doesn't directly allow.
type TokenGen interface {
EachToken(TokenCallback)
}
// TokenCallback is used with TokenGen implementations to specify the action
// that is to be taken for each token in the flattened token sequence.
type TokenCallback func(*Token)
// Token is a single sequence of bytes annotated with a type. It is similar // Token is a single sequence of bytes annotated with a type. It is similar
// in purpose to hclsyntax.Token, but discards the source position information // in purpose to hclsyntax.Token, but discards the source position information
// since that is not useful in code generation. // since that is not useful in code generation.
@ -38,17 +25,16 @@ type Token struct {
// Tokens is a flat list of tokens. // Tokens is a flat list of tokens.
type Tokens []*Token type Tokens []*Token
func (ts Tokens) WriteTo(wr io.Writer) (int, error) {
seq := &TokenSeq{ts}
return seq.WriteTo(wr)
}
func (ts Tokens) Bytes() []byte { func (ts Tokens) Bytes() []byte {
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
ts.WriteTo(buf) ts.WriteTo(buf)
return buf.Bytes() return buf.Bytes()
} }
func (ts Tokens) testValue() string {
return string(ts.Bytes())
}
// Columns returns the number of columns (grapheme clusters) the token sequence // Columns returns the number of columns (grapheme clusters) the token sequence
// occupies. The result is not meaningful if there are newline or single-line // occupies. The result is not meaningful if there are newline or single-line
// comment tokens in the sequence. // comment tokens in the sequence.
@ -62,43 +48,10 @@ func (ts Tokens) Columns() int {
return ret return ret
} }
// TokenSeq combines zero or more TokenGens together to produce a flat sequence
// of tokens from a tree of TokenGens.
type TokenSeq []TokenGen
func (t *Token) EachToken(cb TokenCallback) {
cb(t)
}
func (ts Tokens) EachToken(cb TokenCallback) {
for _, t := range ts {
cb(t)
}
}
func (ts *TokenSeq) EachToken(cb TokenCallback) {
if ts == nil {
return
}
for _, gen := range *ts {
gen.EachToken(cb)
}
}
// Tokens returns the flat list of tokens represented by the receiving
// token sequence.
func (ts *TokenSeq) Tokens() Tokens {
var tokens Tokens
ts.EachToken(func(token *Token) {
tokens = append(tokens, token)
})
return tokens
}
// WriteTo takes an io.Writer and writes the bytes for each token to it, // WriteTo takes an io.Writer and writes the bytes for each token to it,
// along with the spacing that separates each token. In other words, this // along with the spacing that separates each token. In other words, this
// allows serializing the tokens to a file or other such byte stream. // allows serializing the tokens to a file or other such byte stream.
func (ts *TokenSeq) WriteTo(wr io.Writer) (int, error) { func (ts Tokens) WriteTo(wr io.Writer) (int, error) {
// We know we're going to be writing a lot of small chunks of repeated // We know we're going to be writing a lot of small chunks of repeated
// space characters, so we'll prepare a buffer of these that we can // space characters, so we'll prepare a buffer of these that we can
// easily pass to wr.Write without any further allocation. // easily pass to wr.Write without any further allocation.
@ -109,9 +62,9 @@ func (ts *TokenSeq) WriteTo(wr io.Writer) (int, error) {
var n int var n int
var err error var err error
ts.EachToken(func(token *Token) { for _, token := range ts {
if err != nil { if err != nil {
return return n, err
} }
for spacesBefore := token.SpacesBefore; spacesBefore > 0; spacesBefore -= len(spaces) { for spacesBefore := token.SpacesBefore; spacesBefore > 0; spacesBefore -= len(spaces) {
@ -123,48 +76,22 @@ func (ts *TokenSeq) WriteTo(wr io.Writer) (int, error) {
thisN, err = wr.Write(spaces[:thisChunk]) thisN, err = wr.Write(spaces[:thisChunk])
n += thisN n += thisN
if err != nil { if err != nil {
return return n, err
} }
} }
var thisN int var thisN int
thisN, err = wr.Write(token.Bytes) thisN, err = wr.Write(token.Bytes)
n += thisN n += thisN
}) }
return n, err return n, err
} }
// SoloToken returns the single token represented by the receiving sequence, func (ts Tokens) walkChildNodes(w internalWalkFunc) {
// or nil if the sequence does not represent exactly one token. // Unstructured tokens have no child nodes
func (ts *TokenSeq) SoloToken() *Token {
var ret *Token
found := false
ts.EachToken(func(tok *Token) {
if ret == nil && !found {
ret = tok
found = true
} else if ret != nil && found {
ret = nil
}
})
return ret
} }
// IsIdent returns true if and only if the token sequence represents a single func (ts Tokens) BuildTokens(to Tokens) Tokens {
// ident token whose name matches the given string. return append(to, ts...)
func (ts *TokenSeq) IsIdent(name []byte) bool {
tok := ts.SoloToken()
if tok == nil {
return false
}
if tok.Type != hclsyntax.TokenIdent {
return false
}
return bytes.Equal(tok.Bytes, name)
} }
// TokenSeqEmpty is a TokenSeq that contains no tokens. It can be used anywhere,
// but its primary purpose is to be assigned as a replacement for a non-empty
// TokenSeq when eliminating a section of an input file.
var TokenSeqEmpty = TokenSeq([]TokenGen(nil))