scanner: change signature of Scanner

This commit is contained in:
Fatih Arslan 2015-10-06 19:53:56 +03:00
parent da40013062
commit 3631451bd2
2 changed files with 86 additions and 53 deletions

View File

@ -1,10 +1,10 @@
// Package scanner implements a scanner for HCL (HashiCorp Configuration
// Language) source text.
package scanner package scanner
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"io"
"io/ioutil"
"os" "os"
"unicode" "unicode"
"unicode/utf8" "unicode/utf8"
@ -40,37 +40,30 @@ type Scanner struct {
// ErrorCount is incremented by one for each error encountered. // ErrorCount is incremented by one for each error encountered.
ErrorCount int ErrorCount int
// Start position of most recently scanned token; set by Scan. // tokPos is the start position of most recently scanned token; set by
// Calling Init or Next invalidates the position (Line == 0). // Scan. The Filename field is always left untouched by the Scanner. If
// The Filename field is always left untouched by the Scanner. // an error is reported (via Error) and Position is invalid, the scanner is
// If an error is reported (via Error) and Position is invalid, // not inside a token.
// the scanner is not inside a token. Call Pos to obtain an error
// position in that case.
tokPos Position tokPos Position
} }
// NewScanner returns a new instance of Lexer. Even though src is an io.Reader, // NewScanner returns a new instance of Scanner.
// we fully consume the content. func NewScanner(src []byte) *Scanner {
func NewScanner(src io.Reader) (*Scanner, error) { b := bytes.NewBuffer(src)
buf, err := ioutil.ReadAll(src)
if err != nil {
return nil, err
}
b := bytes.NewBuffer(buf)
s := &Scanner{ s := &Scanner{
src: b, src: b,
srcBuf: b.Bytes(), srcBuf: src, // immutable src
} }
// srcPosition always starts with 1 // srcPosition always starts with 1
s.srcPos.Line = 1 s.srcPos.Line = 1
return s, nil return s
} }
// next reads the next rune from the bufferred reader. Returns the rune(0) if // next reads the next rune from the bufferred reader. Returns the rune(0) if
// an error occurs (or io.EOF is returned). // an error occurs (or io.EOF is returned).
func (s *Scanner) next() rune { func (s *Scanner) next() rune {
ch, size, err := s.src.ReadRune() ch, size, err := s.src.ReadRune()
if err != nil { if err != nil {
// advance for error reporting // advance for error reporting
@ -106,6 +99,7 @@ func (s *Scanner) next() rune {
return ch return ch
} }
// unread
func (s *Scanner) unread() { func (s *Scanner) unread() {
if err := s.src.UnreadRune(); err != nil { if err := s.src.UnreadRune(); err != nil {
panic(err) // this is user fault, we should catch it panic(err) // this is user fault, we should catch it
@ -113,6 +107,7 @@ func (s *Scanner) unread() {
s.srcPos = s.prevPos // put back last position s.srcPos = s.prevPos // put back last position
} }
// peek returns the next rune without advancing the reader.
func (s *Scanner) peek() rune { func (s *Scanner) peek() rune {
peek, _, err := s.src.ReadRune() peek, _, err := s.src.ReadRune()
if err != nil { if err != nil {
@ -203,6 +198,26 @@ func (s *Scanner) Scan() (tok token.Token) {
return tok return tok
} }
// TokenText returns the literal string corresponding to the most recently
// scanned token.
func (s *Scanner) TokenText() string {
if s.tokStart < 0 {
// no token text
return ""
}
// part of the token text was saved in tokBuf: save the rest in
// tokBuf as well and return its content
s.tokBuf.Write(s.srcBuf[s.tokStart:s.tokEnd])
s.tokStart = s.tokEnd // ensure idempotency of TokenText() call
return s.tokBuf.String()
}
// Pos returns the successful position of the most recently scanned token.
func (s *Scanner) Pos() (pos Position) {
return s.tokPos
}
func (s *Scanner) scanComment(ch rune) { func (s *Scanner) scanComment(ch rune) {
// single line comments // single line comments
if ch == '#' || (ch == '/' && s.peek() != '*') { if ch == '#' || (ch == '/' && s.peek() != '*') {
@ -335,6 +350,7 @@ func (s *Scanner) scanMantissa(ch rune) rune {
return ch return ch
} }
// scanFraction scans the fraction after the '.' rune
func (s *Scanner) scanFraction(ch rune) rune { func (s *Scanner) scanFraction(ch rune) rune {
if ch == '.' { if ch == '.' {
ch = s.peek() // we peek just to see if we can move forward ch = s.peek() // we peek just to see if we can move forward
@ -343,6 +359,8 @@ func (s *Scanner) scanFraction(ch rune) rune {
return ch return ch
} }
// scanExponent scans the remaining parts of an exponent after the 'e' or 'E'
// rune.
func (s *Scanner) scanExponent(ch rune) rune { func (s *Scanner) scanExponent(ch rune) rune {
if ch == 'e' || ch == 'E' { if ch == 'e' || ch == 'E' {
ch = s.next() ch = s.next()
@ -431,26 +449,6 @@ func (s *Scanner) scanIdentifier() string {
return string(s.srcBuf[offs:s.srcPos.Offset]) return string(s.srcBuf[offs:s.srcPos.Offset])
} }
// TokenText returns the literal string corresponding to the most recently
// scanned token.
func (s *Scanner) TokenText() string {
if s.tokStart < 0 {
// no token text
return ""
}
// part of the token text was saved in tokBuf: save the rest in
// tokBuf as well and return its content
s.tokBuf.Write(s.srcBuf[s.tokStart:s.tokEnd])
s.tokStart = s.tokEnd // ensure idempotency of TokenText() call
return s.tokBuf.String()
}
// Pos returns the successful position of the most recently scanned token.
func (s *Scanner) Pos() (pos Position) {
return s.tokPos
}
// recentPosition returns the position of the character immediately after the // recentPosition returns the position of the character immediately after the
// character or token returned by the last call to Scan. // character or token returned by the last call to Scan.
func (s *Scanner) recentPosition() (pos Position) { func (s *Scanner) recentPosition() (pos Position) {

View File

@ -3,7 +3,6 @@ package scanner
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"strings"
"testing" "testing"
"github.com/fatih/hcl/token" "github.com/fatih/hcl/token"
@ -185,10 +184,7 @@ func TestPosition(t *testing.T) {
} }
} }
s, err := NewScanner(buf) s := NewScanner(buf.Bytes())
if err != nil {
t.Fatal(err)
}
pos := Position{"", 4, 1, 5} pos := Position{"", 4, 1, 5}
s.Scan() s.Scan()
@ -246,6 +242,52 @@ func TestFloat(t *testing.T) {
testTokenList(t, tokenLists["float"]) testTokenList(t, tokenLists["float"])
} }
func TestComplexHCL(t *testing.T) {
// complexHCL = `// This comes from Terraform, as a test
// variable "foo" {
// default = "bar"
// description = "bar"
// }
//
// provider "aws" {
// access_key = "foo"
// secret_key = "bar"
// }
//
// provider "do" {
// api_key = "${var.foo}"
// }
//
// resource "aws_security_group" "firewall" {
// count = 5
// }
//
// resource aws_instance "web" {
// ami = "${var.foo}"
// security_groups = [
// "foo",
// "${aws_security_group.firewall.foo}"
// ]
//
// network_interface {
// device_index = 0
// description = "Main network interface"
// }
// }
//
// resource "aws_instance" "db" {
// security_groups = "${aws_security_group.firewall.*.id}"
// VPC = "foo"
//
// depends_on = ["aws_instance.web"]
// }
//
// output "web_ip" {
// value = "${aws_instance.web.private_ip}"
// }`
}
func TestError(t *testing.T) { func TestError(t *testing.T) {
testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL) testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL) testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
@ -269,10 +311,7 @@ func TestError(t *testing.T) {
} }
func testError(t *testing.T, src, pos, msg string, tok token.Token) { func testError(t *testing.T, src, pos, msg string, tok token.Token) {
s, err := NewScanner(strings.NewReader(src)) s := NewScanner([]byte(src))
if err != nil {
t.Fatal(err)
}
errorCalled := false errorCalled := false
s.Error = func(p Position, m string) { s.Error = func(p Position, m string) {
@ -307,11 +346,7 @@ func testTokenList(t *testing.T, tokenList []tokenPair) {
fmt.Fprintf(buf, "%s\n", ident.text) fmt.Fprintf(buf, "%s\n", ident.text)
} }
s, err := NewScanner(buf) s := NewScanner(buf.Bytes())
if err != nil {
t.Fatal(err)
}
for _, ident := range tokenList { for _, ident := range tokenList {
tok := s.Scan() tok := s.Scan()
if tok != ident.tok { if tok != ident.tok {