2018-01-14 19:24:19 +00:00
|
|
|
package hcl
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"bytes"
|
|
|
|
|
|
|
|
"github.com/apparentlymart/go-textseg/textseg"
|
|
|
|
)
|
|
|
|
|
|
|
|
// RangeScanner is a helper that will scan over a buffer using a bufio.SplitFunc
|
|
|
|
// and visit a source range for each token matched.
|
|
|
|
//
|
|
|
|
// For example, this can be used with bufio.ScanLines to find the source range
|
|
|
|
// for each line in the file, skipping over the actual newline characters, which
|
|
|
|
// may be useful when printing source code snippets as part of diagnostic
|
|
|
|
// messages.
|
|
|
|
//
|
|
|
|
// The line and column information in the returned ranges is produced by
|
|
|
|
// counting newline characters and grapheme clusters respectively, which
|
|
|
|
// mimics the behavior we expect from a parser when producing ranges.
|
|
|
|
type RangeScanner struct {
|
|
|
|
filename string
|
|
|
|
b []byte
|
|
|
|
cb bufio.SplitFunc
|
|
|
|
|
|
|
|
pos Pos // position of next byte to process in b
|
|
|
|
cur Range // latest range
|
|
|
|
tok []byte // slice of b that is covered by cur
|
|
|
|
err error // error from last scan, if any
|
|
|
|
}
|
|
|
|
|
2019-04-12 22:16:41 +00:00
|
|
|
// NewRangeScanner creates a new RangeScanner for the given buffer, producing
|
|
|
|
// ranges for the given filename.
|
2018-01-14 19:24:19 +00:00
|
|
|
//
|
|
|
|
// Since ranges have grapheme-cluster granularity rather than byte granularity,
|
|
|
|
// the scanner will produce incorrect results if the given SplitFunc creates
|
|
|
|
// tokens between grapheme cluster boundaries. In particular, it is incorrect
|
|
|
|
// to use RangeScanner with bufio.ScanRunes because it will produce tokens
|
|
|
|
// around individual UTF-8 sequences, which will split any multi-sequence
|
|
|
|
// grapheme clusters.
|
|
|
|
func NewRangeScanner(b []byte, filename string, cb bufio.SplitFunc) *RangeScanner {
|
2019-04-12 22:16:41 +00:00
|
|
|
return NewRangeScannerFragment(b, filename, InitialPos, cb)
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewRangeScannerFragment is like NewRangeScanner but the ranges it produces
|
|
|
|
// will be offset by the given starting position, which is appropriate for
|
|
|
|
// sub-slices of a file, whereas NewRangeScanner assumes it is scanning an
|
|
|
|
// entire file.
|
|
|
|
func NewRangeScannerFragment(b []byte, filename string, start Pos, cb bufio.SplitFunc) *RangeScanner {
|
2018-01-14 19:24:19 +00:00
|
|
|
return &RangeScanner{
|
|
|
|
filename: filename,
|
|
|
|
b: b,
|
|
|
|
cb: cb,
|
2019-04-12 22:16:41 +00:00
|
|
|
pos: start,
|
2018-01-14 19:24:19 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sc *RangeScanner) Scan() bool {
|
|
|
|
if sc.pos.Byte >= len(sc.b) || sc.err != nil {
|
|
|
|
// All done
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// Since we're operating on an in-memory buffer, we always pass the whole
|
|
|
|
// remainder of the buffer to our SplitFunc and set isEOF to let it know
|
|
|
|
// that it has the whole thing.
|
|
|
|
advance, token, err := sc.cb(sc.b[sc.pos.Byte:], true)
|
|
|
|
|
|
|
|
// Since we are setting isEOF to true this should never happen, but
|
|
|
|
// if it does we will just abort and assume the SplitFunc is misbehaving.
|
|
|
|
if advance == 0 && token == nil && err == nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
sc.err = err
|
|
|
|
sc.cur = Range{
|
|
|
|
Filename: sc.filename,
|
|
|
|
Start: sc.pos,
|
|
|
|
End: sc.pos,
|
|
|
|
}
|
|
|
|
sc.tok = nil
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
sc.tok = token
|
|
|
|
start := sc.pos
|
|
|
|
end := sc.pos
|
|
|
|
new := sc.pos
|
|
|
|
|
|
|
|
// adv is similar to token but it also includes any subsequent characters
|
|
|
|
// we're being asked to skip over by the SplitFunc.
|
|
|
|
// adv is a slice covering any additional bytes we are skipping over, based
|
|
|
|
// on what the SplitFunc told us to do with advance.
|
|
|
|
adv := sc.b[sc.pos.Byte : sc.pos.Byte+advance]
|
|
|
|
|
|
|
|
// We now need to scan over our token to count the grapheme clusters
|
|
|
|
// so we can correctly advance Column, and count the newlines so we
|
|
|
|
// can correctly advance Line.
|
|
|
|
advR := bytes.NewReader(adv)
|
|
|
|
gsc := bufio.NewScanner(advR)
|
|
|
|
advanced := 0
|
|
|
|
gsc.Split(textseg.ScanGraphemeClusters)
|
|
|
|
for gsc.Scan() {
|
|
|
|
gr := gsc.Bytes()
|
|
|
|
new.Byte += len(gr)
|
|
|
|
new.Column++
|
|
|
|
|
|
|
|
// We rely here on the fact that \r\n is considered a grapheme cluster
|
|
|
|
// and so we don't need to worry about miscounting additional lines
|
|
|
|
// on files with Windows-style line endings.
|
|
|
|
if len(gr) != 0 && (gr[0] == '\r' || gr[0] == '\n') {
|
|
|
|
new.Column = 1
|
|
|
|
new.Line++
|
|
|
|
}
|
|
|
|
|
|
|
|
if advanced < len(token) {
|
|
|
|
// If we've not yet found the end of our token then we'll
|
|
|
|
// also push our "end" marker along.
|
|
|
|
// (if advance > len(token) then we'll stop moving "end" early
|
|
|
|
// so that the caller only sees the range covered by token.)
|
|
|
|
end = new
|
|
|
|
}
|
|
|
|
advanced += len(gr)
|
|
|
|
}
|
|
|
|
|
|
|
|
sc.cur = Range{
|
|
|
|
Filename: sc.filename,
|
|
|
|
Start: start,
|
|
|
|
End: end,
|
|
|
|
}
|
|
|
|
sc.pos = new
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
// Range returns a range that covers the latest token obtained after a call
|
|
|
|
// to Scan returns true.
|
|
|
|
func (sc *RangeScanner) Range() Range {
|
|
|
|
return sc.cur
|
|
|
|
}
|
|
|
|
|
|
|
|
// Bytes returns the slice of the input buffer that is covered by the range
|
|
|
|
// that would be returned by Range.
|
|
|
|
func (sc *RangeScanner) Bytes() []byte {
|
|
|
|
return sc.tok
|
|
|
|
}
|
|
|
|
|
|
|
|
// Err can be called after Scan returns false to determine if the latest read
|
|
|
|
// resulted in an error, and obtain that error if so.
|
|
|
|
func (sc *RangeScanner) Err() error {
|
|
|
|
return sc.err
|
|
|
|
}
|