package hcl import ( "bufio" "bytes" "github.com/apparentlymart/go-textseg/v13/textseg" ) // RangeScanner is a helper that will scan over a buffer using a bufio.SplitFunc // and visit a source range for each token matched. // // For example, this can be used with bufio.ScanLines to find the source range // for each line in the file, skipping over the actual newline characters, which // may be useful when printing source code snippets as part of diagnostic // messages. // // The line and column information in the returned ranges is produced by // counting newline characters and grapheme clusters respectively, which // mimics the behavior we expect from a parser when producing ranges. type RangeScanner struct { filename string b []byte cb bufio.SplitFunc pos Pos // position of next byte to process in b cur Range // latest range tok []byte // slice of b that is covered by cur err error // error from last scan, if any } // NewRangeScanner creates a new RangeScanner for the given buffer, producing // ranges for the given filename. // // Since ranges have grapheme-cluster granularity rather than byte granularity, // the scanner will produce incorrect results if the given SplitFunc creates // tokens between grapheme cluster boundaries. In particular, it is incorrect // to use RangeScanner with bufio.ScanRunes because it will produce tokens // around individual UTF-8 sequences, which will split any multi-sequence // grapheme clusters. func NewRangeScanner(b []byte, filename string, cb bufio.SplitFunc) *RangeScanner { return NewRangeScannerFragment(b, filename, InitialPos, cb) } // NewRangeScannerFragment is like NewRangeScanner but the ranges it produces // will be offset by the given starting position, which is appropriate for // sub-slices of a file, whereas NewRangeScanner assumes it is scanning an // entire file. func NewRangeScannerFragment(b []byte, filename string, start Pos, cb bufio.SplitFunc) *RangeScanner { return &RangeScanner{ filename: filename, b: b, cb: cb, pos: start, } } func (sc *RangeScanner) Scan() bool { if sc.pos.Byte >= len(sc.b) || sc.err != nil { // All done return false } // Since we're operating on an in-memory buffer, we always pass the whole // remainder of the buffer to our SplitFunc and set isEOF to let it know // that it has the whole thing. advance, token, err := sc.cb(sc.b[sc.pos.Byte:], true) // Since we are setting isEOF to true this should never happen, but // if it does we will just abort and assume the SplitFunc is misbehaving. if advance == 0 && token == nil && err == nil { return false } if err != nil { sc.err = err sc.cur = Range{ Filename: sc.filename, Start: sc.pos, End: sc.pos, } sc.tok = nil return false } sc.tok = token start := sc.pos end := sc.pos new := sc.pos // adv is similar to token but it also includes any subsequent characters // we're being asked to skip over by the SplitFunc. // adv is a slice covering any additional bytes we are skipping over, based // on what the SplitFunc told us to do with advance. adv := sc.b[sc.pos.Byte : sc.pos.Byte+advance] // We now need to scan over our token to count the grapheme clusters // so we can correctly advance Column, and count the newlines so we // can correctly advance Line. advR := bytes.NewReader(adv) gsc := bufio.NewScanner(advR) advanced := 0 gsc.Split(textseg.ScanGraphemeClusters) for gsc.Scan() { gr := gsc.Bytes() new.Byte += len(gr) new.Column++ // We rely here on the fact that \r\n is considered a grapheme cluster // and so we don't need to worry about miscounting additional lines // on files with Windows-style line endings. if len(gr) != 0 && (gr[0] == '\r' || gr[0] == '\n') { new.Column = 1 new.Line++ } if advanced < len(token) { // If we've not yet found the end of our token then we'll // also push our "end" marker along. // (if advance > len(token) then we'll stop moving "end" early // so that the caller only sees the range covered by token.) end = new } advanced += len(gr) } sc.cur = Range{ Filename: sc.filename, Start: start, End: end, } sc.pos = new return true } // Range returns a range that covers the latest token obtained after a call // to Scan returns true. func (sc *RangeScanner) Range() Range { return sc.cur } // Bytes returns the slice of the input buffer that is covered by the range // that would be returned by Range. func (sc *RangeScanner) Bytes() []byte { return sc.tok } // Err can be called after Scan returns false to determine if the latest read // resulted in an error, and obtain that error if so. func (sc *RangeScanner) Err() error { return sc.err }