hclpack: Implement JSON marshaling (but not unmarshaling, yet)

2018-11-11 03:25:19 +00:00 · 2018-11-11 03:25:19 +00:00 · 309e278914
commit 309e278914
parent ed7453e277
9 changed files with 494 additions and 18 deletions
--- a/go.mod
+++ b/go.mod
@ -7,6 +7,7 @@ require (
 	github.com/agext/levenshtein v1.2.1
 	github.com/apparentlymart/go-dump v0.0.0-20180507223929-23540a00eaa3
 	github.com/apparentlymart/go-textseg v1.0.0
+	github.com/bsm/go-vlq v0.0.0-20150828105119-ec6e8d4f5f4e
 	github.com/davecgh/go-spew v1.1.1
 	github.com/go-test/deep v1.0.1
 	github.com/google/go-cmp v0.2.0
--- a/go.sum
+++ b/go.sum
@ -6,6 +6,8 @@ github.com/apparentlymart/go-dump v0.0.0-20180507223929-23540a00eaa3 h1:ZSTrOEhi
 github.com/apparentlymart/go-dump v0.0.0-20180507223929-23540a00eaa3/go.mod h1:oL81AME2rN47vu18xqj1S1jPIPuN7afo62yKTNn3XMM=
 github.com/apparentlymart/go-textseg v1.0.0 h1:rRmlIsPEEhUTIKQb7T++Nz/A5Q6C9IuX2wFoYVvnCs0=
 github.com/apparentlymart/go-textseg v1.0.0/go.mod h1:z96Txxhf3xSFMPmb5X/1W05FF/Nj9VFpLOpjS5yuumk=
+github.com/bsm/go-vlq v0.0.0-20150828105119-ec6e8d4f5f4e h1:D64GF/Xr5zSUnM3q1Jylzo4sK7szhP/ON+nb2DB5XJA=
+github.com/bsm/go-vlq v0.0.0-20150828105119-ec6e8d4f5f4e/go.mod h1:N+BjUcTjSxc2mtRGSCPsat1kze3CUtvJN3/jTXlp29k=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/go-test/deep v1.0.1 h1:UQhStjbkDClarlmv0am7OXXO4/GaPdCGiUiMTvi28sg=
--- a/hclpack/example_test.go
+++ b/hclpack/example_test.go
@ -0,0 +1,129 @@
+package hclpack_test
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"os"
+
+	"github.com/hashicorp/hcl2/hcl"
+	"github.com/hashicorp/hcl2/hclpack"
+)
+
+func Example_marshalJSON() {
+	src := `
+	service "example" {
+	  priority = 2
+	  platform {
+		os   = "linux"
+		arch = "amd64"
+	  }
+	  process "web" {
+	    exec = ["./webapp"]
+	  }
+	  process "worker" {
+	    exec = ["./worker"]
+	  }
+	}
+	`
+
+	body, diags := hclpack.PackNativeFile([]byte(src), "example.svc", hcl.Pos{Line: 1, Column: 1})
+	if diags.HasErrors() {
+		fmt.Fprintf(os.Stderr, "Failed to parse: %s", diags.Error())
+		return
+	}
+
+	jb, err := body.MarshalJSON()
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Failed to marshal: %s", err)
+		return
+	}
+
+	// Normally the compact form is best, but we'll indent just for the sake
+	// of this example so the result is readable.
+	var buf bytes.Buffer
+	json.Indent(&buf, jb, "", " ")
+	os.Stdout.Write(buf.Bytes())
+
+	// Output:
+	// {
+	//  "r": {
+	//   "b": [
+	//    {
+	//     "h": [
+	//      "service",
+	//      "example"
+	//     ],
+	//     "b": {
+	//      "a": {
+	//       "priority": {
+	//        "s": "2",
+	//        "r": "ChAKDA4QDhA"
+	//       }
+	//      },
+	//      "b": [
+	//       {
+	//        "h": [
+	//         "platform"
+	//        ],
+	//        "b": {
+	//         "a": {
+	//          "arch": {
+	//           "s": "\"amd64\"",
+	//           "r": "IiwiJCYsKCo"
+	//          },
+	//          "os": {
+	//           "s": "\"linux\"",
+	//           "r": "FiAWGBogHB4"
+	//          }
+	//         },
+	//         "r": "Li4"
+	//        },
+	//        "r": "EhQSFA"
+	//       },
+	//       {
+	//        "h": [
+	//         "process",
+	//         "web"
+	//        ],
+	//        "b": {
+	//         "a": {
+	//          "exec": {
+	//           "s": "[\"./webapp\"]",
+	//           "r": "OEA4OjxAPD4"
+	//          }
+	//         },
+	//         "r": "QkI"
+	//        },
+	//        "r": "MDYwMjQ2"
+	//       },
+	//       {
+	//        "h": [
+	//         "process",
+	//         "worker"
+	//        ],
+	//        "b": {
+	//         "a": {
+	//          "exec": {
+	//           "s": "[\"./worker\"]",
+	//           "r": "TFRMTlBUUFI"
+	//          }
+	//         },
+	//         "r": "VlY"
+	//        },
+	//        "r": "REpERkhK"
+	//       }
+	//      ],
+	//      "r": "WFg"
+	//     },
+	//     "r": "AggCBAYI"
+	//    }
+	//   ],
+	//   "r": "Wlo"
+	//  },
+	//  "s": [
+	//   "example.svc"
+	//  ],
+	//  "p": "BAQEAA4OAAICABISAggMABAQAAYGAAICAggIABAQAgYKAAQEAAoKAAICAAoKAAICAgYGAAgIAAYGAAICAAoKAAICAgoKAggIAA4OAAICAAoKAgwQAAgIAAYGAAICABYWAgoKAggIAA4OAAICABAQAgwQAAgIAAYGAAICABYWAgoKAgYGAgQE"
+	// }
+}
--- a/hclpack/expression.go
+++ b/hclpack/expression.go
@ -87,6 +87,11 @@ func (e *Expression) Parse() (hcl.Expression, hcl.Diagnostics) {
 	}
 }

+func (e *Expression) addRanges(rngs map[hcl.Range]struct{}) {
+	rngs[e.Range_] = struct{}{}
+	rngs[e.StartRange_] = struct{}{}
+}
+
 // ExprSourceType defines the syntax type used for an expression's source code,
 // which is then used to select a suitable parser for it when evaluating.
 type ExprSourceType rune
--- a/hclpack/json_marshal.go
+++ b/hclpack/json_marshal.go
@ -1,12 +1,86 @@
 package hclpack

+import (
+	"encoding/json"
+
+	"github.com/hashicorp/hcl2/hcl"
+)
+
 // MarshalJSON is an implementation of Marshaler from encoding/json, allowing
 // bodies to be included in other types that are JSON-marshalable.
 //
 // The result of MarshalJSON is optimized for compactness rather than easy
 // human consumption/editing. Use UnmarshalJSON to decode it.
 func (b *Body) MarshalJSON() ([]byte, error) {
-	return nil, nil
+	rngs := make(map[hcl.Range]struct{})
+	b.addRanges(rngs)
+
+	fns, posList, posMap := packPositions(rngs)
+
+	head := jsonHeader{
+		Body:    b.forJSON(posMap),
+		Sources: fns,
+		Pos:     posList,
+	}
+
+	return json.Marshal(&head)
+}
+
+func (b *Body) forJSON(pos map[string]map[hcl.Pos]posOfs) bodyJSON {
+	var ret bodyJSON
+
+	if len(b.Attributes) > 0 {
+		ret.Attrs = make(map[string]attrJSON, len(b.Attributes))
+		for name, attr := range b.Attributes {
+			ret.Attrs[name] = attr.forJSON(pos)
+		}
+	}
+	if len(b.ChildBlocks) > 0 {
+		ret.Blocks = make([]blockJSON, len(b.ChildBlocks))
+		for i, block := range b.ChildBlocks {
+			ret.Blocks[i] = block.forJSON(pos)
+		}
+	}
+	ret.Ranges = make(rangesPacked, 1)
+	ret.Ranges[0] = packRange(b.MissingItemRange_, pos)
+
+	return ret
+}
+
+func (a *Attribute) forJSON(pos map[string]map[hcl.Pos]posOfs) attrJSON {
+	var ret attrJSON
+
+	ret.Source = string(a.Expr.Source)
+	switch a.Expr.SourceType {
+	case ExprNative:
+		ret.Syntax = 0
+	case ExprTemplate:
+		ret.Syntax = 1
+	}
+	ret.Ranges = make(rangesPacked, 4)
+	ret.Ranges[0] = packRange(a.Range, pos)
+	ret.Ranges[1] = packRange(a.NameRange, pos)
+	ret.Ranges[2] = packRange(a.Expr.Range_, pos)
+	ret.Ranges[3] = packRange(a.Expr.StartRange_, pos)
+
+	return ret
+}
+
+func (b *Block) forJSON(pos map[string]map[hcl.Pos]posOfs) blockJSON {
+	var ret blockJSON
+
+	ret.Header = make([]string, len(b.Labels)+1)
+	ret.Header[0] = b.Type
+	copy(ret.Header[1:], b.Labels)
+	ret.Body = b.Body.forJSON(pos)
+	ret.Ranges = make(rangesPacked, 2+len(b.LabelRanges))
+	ret.Ranges[0] = packRange(b.DefRange, pos)
+	ret.Ranges[1] = packRange(b.TypeRange, pos)
+	for i, rng := range b.LabelRanges {
+		ret.Ranges[i+2] = packRange(rng, pos)
+	}
+
+	return ret
 }

 // UnmarshalJSON is an implementation of Unmarshaler from encoding/json,
@ -16,7 +90,7 @@ func (b *Body) UnmarshalJSON([]byte) error {
 }

 type jsonHeader struct {
-	Body bodyJSON `json:"b"`
+	Body bodyJSON `json:"r"`

 	Sources []string        `json:"s,omitempty"`
 	Pos     positionsPacked `json:"p,omitempty"`
@ -24,31 +98,31 @@ type jsonHeader struct {

 type bodyJSON struct {
 	// Files are the source filenames that were involved in
-	Attrs            map[string]attrJSON `json:"a,omitempty"`
-	Blocks           []blockJSON         `json:"b,omitempty"`
-	MissingItemRange rangePacked         `json:"r,omitempty"`
+	Attrs  map[string]attrJSON `json:"a,omitempty"`
+	Blocks []blockJSON         `json:"b,omitempty"`
+
+	// Ranges contains the MissingItemRange
+	Ranges rangesPacked `json:"r,omitempty"`
 }

 type attrJSON struct {
-	Expr exprJSON `json:"e"`
+	// To keep things compact, in the JSON encoding we flatten the
+	// expression down into the attribute object, since overhead
+	// for attributes adds up in a complex config.
+	Source string `json:"s"`
+	Syntax int    `json:"t,omitempty"` // omitted for 0=native

-	// Ranges contains the full range followed by the name range
+	// Ranges contains the Range, NameRange, Expr.Range, Expr.StartRange
 	Ranges rangesPacked `json:"r,omitempty"`
 }

 type blockJSON struct {
-	Type   string   `json:"t"`
-	Labels []string `json:"l,omitempty"`
+	// Header is the type followed by any labels. We flatten this here
+	// to keep the JSON encoding compact.
+	Header []string `json:"h"`
 	Body   bodyJSON `json:"b,omitempty"`

-	// Ranges contains the DefRange followed by the TypeRange
-	Ranges rangesPacked `json:"r,omitempty"`
-}
-
-type exprJSON struct {
-	Source string `json:"s"`
-	Syntax string `json:"t"`
-
-	// Ranges contains the Range followed by the StartRange
+	// Ranges contains the DefRange followed by the TypeRange and then
+	// each of the label ranges in turn.
 	Ranges rangesPacked `json:"r,omitempty"`
 }
--- a/hclpack/json_marshal_test.go
+++ b/hclpack/json_marshal_test.go
@ -0,0 +1,9 @@
+package hclpack
+
+import (
+	"testing"
+)
+
+func TestJSONRoundTrip(t *testing.T) {
+
+}
--- a/hclpack/positions_packed.go
+++ b/hclpack/positions_packed.go
@ -1,5 +1,12 @@
 package hclpack

+import (
+	"encoding/base64"
+	"sort"
+
+	"github.com/hashicorp/hcl2/hcl"
+)
+
 // positionsPacked is a delta-based representation of source positions
 // that implements encoding.TextMarshaler and encoding.TextUnmarshaler using
 // a compact variable-length quantity encoding to mimimize the overhead of
@ -9,6 +16,71 @@ package hclpack
 // in a positionsPacked by index.
 type positionsPacked []positionPacked

+func (pp positionsPacked) MarshalBinary() ([]byte, error) {
+	lenInt := len(pp) * 4 // each positionPacked contains four ints, but we don't include the fileidx
+
+	// guess avg of ~1.25 bytes per int, in which case we'll avoid further allocation
+	buf := newVLQBuf(lenInt + (lenInt / 4))
+	var lastFileIdx int
+	for _, ppr := range pp {
+		// Rather than writing out the same file index over and over, we instead
+		// insert a ; delimiter each time it increases. Since it's common for
+		// for a body to be entirely in one file, this can lead to considerable
+		// savings in that case.
+		delims := ppr.FileIdx - lastFileIdx
+		for i := 0; i < delims; i++ {
+			buf = buf.AppendRawByte(';')
+		}
+		buf = buf.AppendInt(ppr.LineDelta)
+		buf = buf.AppendInt(ppr.ColumnDelta)
+		buf = buf.AppendInt(ppr.ByteDelta)
+	}
+
+	return buf.Bytes(), nil
+}
+
+func (pp positionsPacked) MarshalText() ([]byte, error) {
+	raw, err := pp.MarshalBinary()
+	if err != nil {
+		return nil, err
+	}
+
+	l := base64.RawStdEncoding.EncodedLen(len(raw))
+	ret := make([]byte, l)
+	base64.RawStdEncoding.Encode(ret, raw)
+	return ret, nil
+}
+
+type position struct {
+	FileIdx int
+	Pos     hcl.Pos
+}
+
+func (pp positionsPacked) Unpack() []position {
+	ret := make([]position, len(pp))
+	var accPos hcl.Pos
+	var accFileIdx int
+
+	for i, relPos := range pp {
+		if relPos.FileIdx != accFileIdx {
+			accPos = hcl.Pos{} // reset base position for each new file
+			accFileIdx = pp[i].FileIdx
+		}
+		if relPos.LineDelta > 0 {
+			accPos.Column = 0 // reset column position for each new line
+		}
+		accPos.Line += relPos.LineDelta
+		accPos.Column += relPos.ColumnDelta
+		accPos.Byte += relPos.ByteDelta
+		ret[i] = position{
+			FileIdx: relPos.FileIdx,
+			Pos:     accPos,
+		}
+	}
+
+	return ret
+}
+
 type positionPacked struct {
 	FileIdx                           int
 	LineDelta, ColumnDelta, ByteDelta int
@ -46,6 +118,132 @@ type rangePacked struct {
 	End   posOfs
 }

+func packRange(rng hcl.Range, pos map[string]map[hcl.Pos]posOfs) rangePacked {
+	return rangePacked{
+		Start: pos[rng.Filename][rng.Start],
+		End:   pos[rng.Filename][rng.End],
+	}
+}
+
+func (rp rangePacked) Unpack(fns []string, poss []position) hcl.Range {
+	startIdx := rp.Start.Index()
+	endIdx := rp.End.Index()
+	if startIdx < 0 && startIdx >= len(poss) {
+		return hcl.Range{} // out of bounds, so invalid
+	}
+	if endIdx < 0 && endIdx >= len(poss) {
+		return hcl.Range{} // out of bounds, so invalid
+	}
+	startPos := poss[startIdx]
+	endPos := poss[endIdx]
+	fnIdx := startPos.FileIdx
+	var fn string
+	if fnIdx >= 0 && fnIdx < len(fns) {
+		fn = fns[fnIdx]
+	}
+	return hcl.Range{
+		Filename: fn,
+		Start:    startPos.Pos,
+		End:      endPos.Pos,
+	}
+}
+
 // rangesPacked represents a sequence of ranges, packed compactly into a single
 // string during marshaling.
 type rangesPacked []rangePacked
+
+func (rp rangesPacked) MarshalBinary() ([]byte, error) {
+	lenInt := len(rp) * 2 // each positionPacked contains two ints
+
+	// guess avg of ~1.25 bytes per int, in which case we'll avoid further allocation
+	buf := newVLQBuf(lenInt + (lenInt / 4))
+	for _, rpr := range rp {
+		buf = buf.AppendInt(int(rpr.Start)) // intentionally storing these as 1-based offsets
+		buf = buf.AppendInt(int(rpr.End))
+	}
+
+	return buf.Bytes(), nil
+}
+
+func (rp rangesPacked) MarshalText() ([]byte, error) {
+	raw, err := rp.MarshalBinary()
+	if err != nil {
+		return nil, err
+	}
+
+	l := base64.RawStdEncoding.EncodedLen(len(raw))
+	ret := make([]byte, l)
+	base64.RawStdEncoding.Encode(ret, raw)
+	return ret, nil
+}
+
+func (rps rangesPacked) UnpackIdx(fns []string, poss []position, idx int) hcl.Range {
+	if idx < 0 || idx >= len(rps) {
+		return hcl.Range{} // out of bounds, so invalid
+	}
+	return rps[idx].Unpack(fns, poss)
+}
+
+// packPositions will find the distinct positions from the given ranges
+// and then pack them into a positionsPacked, along with a lookup table to find
+// the encoded offset of each distinct position.
+func packPositions(rngs map[hcl.Range]struct{}) (fns []string, poss positionsPacked, posMap map[string]map[hcl.Pos]posOfs) {
+	const noOfs = posOfs(0)
+
+	posByFile := make(map[string][]hcl.Pos)
+	for rng := range rngs {
+		fn := rng.Filename
+		posByFile[fn] = append(posByFile[fn], rng.Start)
+		posByFile[fn] = append(posByFile[fn], rng.End)
+	}
+	fns = make([]string, 0, len(posByFile))
+	for fn := range posByFile {
+		fns = append(fns, fn)
+	}
+	sort.Strings(fns)
+
+	var retPos positionsPacked
+	posMap = make(map[string]map[hcl.Pos]posOfs)
+	for fileIdx, fn := range fns {
+		poss := posByFile[fn]
+		sort.Sort(sortPositions(poss))
+		var prev hcl.Pos
+		for _, pos := range poss {
+			if _, exists := posMap[fn][pos]; exists {
+				continue
+			}
+			ofs := newPosOffs(len(retPos))
+			if pos.Line != prev.Line {
+				// Column indices start from zero for each new line.
+				prev.Column = 0
+			}
+			retPos = append(retPos, positionPacked{
+				FileIdx:     fileIdx,
+				LineDelta:   pos.Line - prev.Line,
+				ColumnDelta: pos.Column - prev.Column,
+				ByteDelta:   pos.Byte - prev.Byte,
+			})
+			if posMap[fn] == nil {
+				posMap[fn] = make(map[hcl.Pos]posOfs)
+			}
+			posMap[fn][pos] = ofs
+			prev = pos
+		}
+	}
+
+	return fns, retPos, posMap
+}
+
+type sortPositions []hcl.Pos
+
+func (sp sortPositions) Len() int {
+	return len(sp)
+}
+
+func (sp sortPositions) Less(i, j int) bool {
+	return sp[i].Byte < sp[j].Byte
+}
+
+func (sp sortPositions) Swap(i, j int) {
+	sp[i], sp[j] = sp[j], sp[i]
+}
--- a/hclpack/structure.go
+++ b/hclpack/structure.go
@ -226,6 +226,16 @@ func (b *Body) appendBlock(block Block) {
 	b.ChildBlocks = append(b.ChildBlocks, block)
 }

+func (b *Body) addRanges(rngs map[hcl.Range]struct{}) {
+	rngs[b.MissingItemRange_] = struct{}{}
+	for _, attr := range b.Attributes {
+		attr.addRanges(rngs)
+	}
+	for _, block := range b.ChildBlocks {
+		block.addRanges(rngs)
+	}
+}
+
 // Block represents a nested block within a body.
 type Block struct {
 	Type   string
@ -248,6 +258,15 @@ func (b *Block) asHCLBlock() *hcl.Block {
 	}
 }

+func (b *Block) addRanges(rngs map[hcl.Range]struct{}) {
+	rngs[b.DefRange] = struct{}{}
+	rngs[b.TypeRange] = struct{}{}
+	for _, rng := range b.LabelRanges {
+		rngs[rng] = struct{}{}
+	}
+	b.Body.addRanges(rngs)
+}
+
 // Attribute represents an attribute definition within a body.
 type Attribute struct {
 	Expr Expression
@ -263,3 +282,9 @@ func (a *Attribute) asHCLAttribute(name string) *hcl.Attribute {
 		NameRange: a.NameRange,
 	}
 }
+
+func (a *Attribute) addRanges(rngs map[hcl.Range]struct{}) {
+	rngs[a.Range] = struct{}{}
+	rngs[a.NameRange] = struct{}{}
+	a.Expr.addRanges(rngs)
+}
--- a/hclpack/vlq.go
+++ b/hclpack/vlq.go
@ -0,0 +1,33 @@
+package hclpack
+
+import (
+	"github.com/bsm/go-vlq"
+)
+
+type vlqBuf []byte
+
+var vlqSpace [vlq.MaxLen64]byte
+
+func newVLQBuf(byteCap int) vlqBuf {
+	return make(vlqBuf, 0, byteCap)
+}
+
+func (b vlqBuf) AppendInt(i int) vlqBuf {
+	spc := cap(b) - len(b)
+	if spc < len(vlqSpace) {
+		b = append(b, vlqSpace[:]...)
+		b = b[:len(b)-len(vlqSpace)]
+	}
+	into := b[len(b):cap(b)]
+	l := vlq.PutInt(into, int64(i))
+	b = b[:len(b)+l]
+	return b
+}
+
+func (b vlqBuf) AppendRawByte(by byte) vlqBuf {
+	return append(b, by)
+}
+
+func (b vlqBuf) Bytes() []byte {
+	return []byte(b)
+}