fee90926da
HCL uses grapheme cluster segmentation to produce accurate "column" indications in diagnostic messages and other human-oriented source location information. Each new major version of Unicode introduces new codepoints, some of which are defined to combine with other codepoints to produce a single visible character (grapheme cluster). We were previously using the rules from Unicode 9.0.0. This change switches to using the segmentation rules from Unicode 12.0.0, which is the latest version at the time of this commit and is also the version of Unicode used for other purposes by the Go 1.14 runtime. HCL does not use text segmentation results for any purpose that would affect the meaning of decoded data extracted from HCL files, so this change will only affect the human-oriented source positions generated for files containing characters that were newly-introduced in Unicode 10, 11, or 12. (Machine-oriented uses of source location information are based on byte offsets and not affected by text segmentation.)
123 lines
3.3 KiB
Go
123 lines
3.3 KiB
Go
package hclwrite
|
|
|
|
import (
|
|
"bytes"
|
|
"io"
|
|
|
|
"github.com/apparentlymart/go-textseg/v12/textseg"
|
|
"github.com/hashicorp/hcl/v2"
|
|
"github.com/hashicorp/hcl/v2/hclsyntax"
|
|
)
|
|
|
|
// Token is a single sequence of bytes annotated with a type. It is similar
|
|
// in purpose to hclsyntax.Token, but discards the source position information
|
|
// since that is not useful in code generation.
|
|
type Token struct {
|
|
Type hclsyntax.TokenType
|
|
Bytes []byte
|
|
|
|
// We record the number of spaces before each token so that we can
|
|
// reproduce the exact layout of the original file when we're making
|
|
// surgical changes in-place. When _new_ code is created it will always
|
|
// be in the canonical style, but we preserve layout of existing code.
|
|
SpacesBefore int
|
|
}
|
|
|
|
// asHCLSyntax returns the receiver expressed as an incomplete hclsyntax.Token.
|
|
// A complete token is not possible since we don't have source location
|
|
// information here, and so this method is unexported so we can be sure it will
|
|
// only be used for internal purposes where we know the range isn't important.
|
|
//
|
|
// This is primarily intended to allow us to re-use certain functionality from
|
|
// hclsyntax rather than re-implementing it against our own token type here.
|
|
func (t *Token) asHCLSyntax() hclsyntax.Token {
|
|
return hclsyntax.Token{
|
|
Type: t.Type,
|
|
Bytes: t.Bytes,
|
|
Range: hcl.Range{
|
|
Filename: "<invalid>",
|
|
},
|
|
}
|
|
}
|
|
|
|
// Tokens is a flat list of tokens.
|
|
type Tokens []*Token
|
|
|
|
func (ts Tokens) Bytes() []byte {
|
|
buf := &bytes.Buffer{}
|
|
ts.WriteTo(buf)
|
|
return buf.Bytes()
|
|
}
|
|
|
|
func (ts Tokens) testValue() string {
|
|
return string(ts.Bytes())
|
|
}
|
|
|
|
// Columns returns the number of columns (grapheme clusters) the token sequence
|
|
// occupies. The result is not meaningful if there are newline or single-line
|
|
// comment tokens in the sequence.
|
|
func (ts Tokens) Columns() int {
|
|
ret := 0
|
|
for _, token := range ts {
|
|
ret += token.SpacesBefore // spaces are always worth one column each
|
|
ct, _ := textseg.TokenCount(token.Bytes, textseg.ScanGraphemeClusters)
|
|
ret += ct
|
|
}
|
|
return ret
|
|
}
|
|
|
|
// WriteTo takes an io.Writer and writes the bytes for each token to it,
|
|
// along with the spacing that separates each token. In other words, this
|
|
// allows serializing the tokens to a file or other such byte stream.
|
|
func (ts Tokens) WriteTo(wr io.Writer) (int64, error) {
|
|
// We know we're going to be writing a lot of small chunks of repeated
|
|
// space characters, so we'll prepare a buffer of these that we can
|
|
// easily pass to wr.Write without any further allocation.
|
|
spaces := make([]byte, 40)
|
|
for i := range spaces {
|
|
spaces[i] = ' '
|
|
}
|
|
|
|
var n int64
|
|
var err error
|
|
for _, token := range ts {
|
|
if err != nil {
|
|
return n, err
|
|
}
|
|
|
|
for spacesBefore := token.SpacesBefore; spacesBefore > 0; spacesBefore -= len(spaces) {
|
|
thisChunk := spacesBefore
|
|
if thisChunk > len(spaces) {
|
|
thisChunk = len(spaces)
|
|
}
|
|
var thisN int
|
|
thisN, err = wr.Write(spaces[:thisChunk])
|
|
n += int64(thisN)
|
|
if err != nil {
|
|
return n, err
|
|
}
|
|
}
|
|
|
|
var thisN int
|
|
thisN, err = wr.Write(token.Bytes)
|
|
n += int64(thisN)
|
|
}
|
|
|
|
return n, err
|
|
}
|
|
|
|
func (ts Tokens) walkChildNodes(w internalWalkFunc) {
|
|
// Unstructured tokens have no child nodes
|
|
}
|
|
|
|
func (ts Tokens) BuildTokens(to Tokens) Tokens {
|
|
return append(to, ts...)
|
|
}
|
|
|
|
func newIdentToken(name string) *Token {
|
|
return &Token{
|
|
Type: hclsyntax.TokenIdent,
|
|
Bytes: []byte(name),
|
|
}
|
|
}
|