scanner: fix all errors
This commit is contained in:
parent
ea92162955
commit
62a4ab3db7
@ -46,7 +46,7 @@ type Scanner struct {
|
|||||||
// If an error is reported (via Error) and Position is invalid,
|
// If an error is reported (via Error) and Position is invalid,
|
||||||
// the scanner is not inside a token. Call Pos to obtain an error
|
// the scanner is not inside a token. Call Pos to obtain an error
|
||||||
// position in that case.
|
// position in that case.
|
||||||
tokPos Position
|
Position
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewScanner returns a new instance of Lexer. Even though src is an io.Reader,
|
// NewScanner returns a new instance of Lexer. Even though src is an io.Reader,
|
||||||
@ -65,7 +65,6 @@ func NewScanner(src io.Reader) (*Scanner, error) {
|
|||||||
|
|
||||||
// srcPosition always starts with 1
|
// srcPosition always starts with 1
|
||||||
s.srcPos.Line = 1
|
s.srcPos.Line = 1
|
||||||
|
|
||||||
return s, nil
|
return s, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -74,30 +73,36 @@ func NewScanner(src io.Reader) (*Scanner, error) {
|
|||||||
func (s *Scanner) next() rune {
|
func (s *Scanner) next() rune {
|
||||||
ch, size, err := s.src.ReadRune()
|
ch, size, err := s.src.ReadRune()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
// advance for error reporting
|
||||||
|
s.srcPos.Column++
|
||||||
|
s.srcPos.Offset += size
|
||||||
|
s.lastCharLen = size
|
||||||
return eof
|
return eof
|
||||||
}
|
}
|
||||||
|
|
||||||
if ch == utf8.RuneError && size == 1 {
|
if ch == utf8.RuneError && size == 1 {
|
||||||
s.srcPos.Column++
|
s.srcPos.Column++
|
||||||
s.srcPos.Offset += size
|
s.srcPos.Offset += size
|
||||||
|
s.lastCharLen = size
|
||||||
s.err("illegal UTF-8 encoding")
|
s.err("illegal UTF-8 encoding")
|
||||||
return eof
|
return ch
|
||||||
}
|
}
|
||||||
|
|
||||||
// remember last position
|
// remember last position
|
||||||
s.prevPos = s.srcPos
|
s.prevPos = s.srcPos
|
||||||
s.lastCharLen = size
|
|
||||||
|
|
||||||
s.srcPos.Offset += size
|
|
||||||
s.srcPos.Column++
|
s.srcPos.Column++
|
||||||
|
s.lastCharLen = size
|
||||||
|
s.srcPos.Offset += size
|
||||||
|
|
||||||
if ch == '\n' {
|
if ch == '\n' {
|
||||||
s.srcPos.Line++
|
s.srcPos.Line++
|
||||||
s.srcPos.Column = 0
|
|
||||||
s.lastLineLen = s.srcPos.Column
|
s.lastLineLen = s.srcPos.Column
|
||||||
|
s.srcPos.Column = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// debug
|
// debug
|
||||||
// fmt.Printf("ch: %q, off:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
|
// fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
|
||||||
return ch
|
return ch
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -133,18 +138,17 @@ func (s *Scanner) Scan() (tok token.Token) {
|
|||||||
|
|
||||||
// token position, initial next() is moving the offset by one(size of rune
|
// token position, initial next() is moving the offset by one(size of rune
|
||||||
// actually), though we are interested with the starting point
|
// actually), though we are interested with the starting point
|
||||||
s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen
|
s.Position.Offset = s.srcPos.Offset - s.lastCharLen
|
||||||
|
|
||||||
if s.srcPos.Column > 0 {
|
if s.srcPos.Column > 0 {
|
||||||
// common case: last character was not a '\n'
|
// common case: last character was not a '\n'
|
||||||
s.tokPos.Line = s.srcPos.Line
|
s.Position.Line = s.srcPos.Line
|
||||||
s.tokPos.Column = s.srcPos.Column
|
s.Position.Column = s.srcPos.Column
|
||||||
} else {
|
} else {
|
||||||
// last character was a '\n'
|
// last character was a '\n'
|
||||||
// (we cannot be at the beginning of the source
|
// (we cannot be at the beginning of the source
|
||||||
// since we have called next() at least once)
|
// since we have called next() at least once)
|
||||||
s.tokPos.Line = s.srcPos.Line - 1
|
s.Position.Line = s.srcPos.Line - 1
|
||||||
s.tokPos.Column = s.lastLineLen
|
s.Position.Column = s.lastLineLen
|
||||||
}
|
}
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
@ -190,6 +194,8 @@ func (s *Scanner) Scan() (tok token.Token) {
|
|||||||
tok = token.ADD
|
tok = token.ADD
|
||||||
case '-':
|
case '-':
|
||||||
tok = token.SUB
|
tok = token.SUB
|
||||||
|
default:
|
||||||
|
s.err("illegal char")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -198,24 +204,8 @@ func (s *Scanner) Scan() (tok token.Token) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *Scanner) scanComment(ch rune) {
|
func (s *Scanner) scanComment(ch rune) {
|
||||||
// look for /* - style comments
|
|
||||||
if ch == '/' && s.peek() == '*' {
|
|
||||||
for {
|
|
||||||
if ch < 0 {
|
|
||||||
s.err("comment not terminated")
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
ch0 := ch
|
|
||||||
ch = s.next()
|
|
||||||
if ch0 == '*' && ch == '/' {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// single line comments
|
// single line comments
|
||||||
if ch == '#' || ch == '/' {
|
if ch == '#' || (ch == '/' && s.peek() != '*') {
|
||||||
ch = s.next()
|
ch = s.next()
|
||||||
for ch != '\n' && ch >= 0 {
|
for ch != '\n' && ch >= 0 {
|
||||||
ch = s.next()
|
ch = s.next()
|
||||||
@ -223,6 +213,27 @@ func (s *Scanner) scanComment(ch rune) {
|
|||||||
s.unread()
|
s.unread()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// be sure we get the character after /* This allows us to find comment's
|
||||||
|
// that are not erminated
|
||||||
|
if ch == '/' {
|
||||||
|
s.next()
|
||||||
|
ch = s.next() // read character after "/*"
|
||||||
|
}
|
||||||
|
|
||||||
|
// look for /* - style comments
|
||||||
|
for {
|
||||||
|
if ch < 0 || ch == eof {
|
||||||
|
s.err("comment not terminated")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
ch0 := ch
|
||||||
|
ch = s.next()
|
||||||
|
if ch0 == '*' && ch == '/' {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// scanNumber scans a HCL number definition starting with the given rune
|
// scanNumber scans a HCL number definition starting with the given rune
|
||||||
@ -238,12 +249,15 @@ func (s *Scanner) scanNumber(ch rune) token.Token {
|
|||||||
ch = s.next()
|
ch = s.next()
|
||||||
found = true
|
found = true
|
||||||
}
|
}
|
||||||
s.unread()
|
|
||||||
|
|
||||||
if !found {
|
if !found {
|
||||||
s.err("illegal hexadecimal number")
|
s.err("illegal hexadecimal number")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ch != eof {
|
||||||
|
s.unread()
|
||||||
|
}
|
||||||
|
|
||||||
return token.NUMBER
|
return token.NUMBER
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -256,9 +270,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token {
|
|||||||
// 0159.23 is valid. So we mark a possible illegal octal. If
|
// 0159.23 is valid. So we mark a possible illegal octal. If
|
||||||
// the next character is not a period, we'll print the error.
|
// the next character is not a period, we'll print the error.
|
||||||
illegalOctal = true
|
illegalOctal = true
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// literals of form 01e10 are treates as Numbers in HCL, which differs from Go.
|
// literals of form 01e10 are treates as Numbers in HCL, which differs from Go.
|
||||||
@ -281,7 +293,9 @@ func (s *Scanner) scanNumber(ch rune) token.Token {
|
|||||||
s.err("illegal octal number")
|
s.err("illegal octal number")
|
||||||
}
|
}
|
||||||
|
|
||||||
s.unread()
|
if ch != eof {
|
||||||
|
s.unread()
|
||||||
|
}
|
||||||
return token.NUMBER
|
return token.NUMBER
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -435,19 +449,38 @@ func (s *Scanner) TokenText() string {
|
|||||||
// Pos returns the position of the character immediately after the character or
|
// Pos returns the position of the character immediately after the character or
|
||||||
// token returned by the last call to Scan.
|
// token returned by the last call to Scan.
|
||||||
func (s *Scanner) Pos() (pos Position) {
|
func (s *Scanner) Pos() (pos Position) {
|
||||||
return s.tokPos
|
pos.Offset = s.srcPos.Offset - s.lastCharLen
|
||||||
|
switch {
|
||||||
|
case s.srcPos.Column > 0:
|
||||||
|
// common case: last character was not a '\n'
|
||||||
|
pos.Line = s.srcPos.Line
|
||||||
|
pos.Column = s.srcPos.Column
|
||||||
|
case s.lastLineLen > 0:
|
||||||
|
// last character was a '\n'
|
||||||
|
// (we cannot be at the beginning of the source
|
||||||
|
// since we have called next() at least once)
|
||||||
|
pos.Line = s.srcPos.Line - 1
|
||||||
|
pos.Column = s.lastLineLen
|
||||||
|
default:
|
||||||
|
// at the beginning of the source
|
||||||
|
pos.Line = 1
|
||||||
|
pos.Column = 1
|
||||||
|
}
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// err prints the error of any scanning to s.Error function. If the function is
|
// err prints the error of any scanning to s.Error function. If the function is
|
||||||
// not defined, by default it prints them to os.Stderr
|
// not defined, by default it prints them to os.Stderr
|
||||||
func (s *Scanner) err(msg string) {
|
func (s *Scanner) err(msg string) {
|
||||||
s.ErrorCount++
|
s.ErrorCount++
|
||||||
|
pos := s.Pos()
|
||||||
|
|
||||||
if s.Error != nil {
|
if s.Error != nil {
|
||||||
s.Error(s.srcPos, msg)
|
s.Error(pos, msg)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Fprintf(os.Stderr, "%s: %s\n", s.srcPos, msg)
|
fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
// isHexadecimal returns true if the given rune is a letter
|
// isHexadecimal returns true if the given rune is a letter
|
||||||
|
@ -195,8 +195,9 @@ func TestPosition(t *testing.T) {
|
|||||||
for _, listName := range orderedTokenLists {
|
for _, listName := range orderedTokenLists {
|
||||||
|
|
||||||
for _, k := range tokenLists[listName] {
|
for _, k := range tokenLists[listName] {
|
||||||
curPos := s.Pos()
|
curPos := s.Position
|
||||||
// fmt.Printf("[%q] s = %+v:%+v\n", k.text, curPos.Offset, curPos.Column)
|
// fmt.Printf("[%q] s = %+v:%+v\n", k.text, curPos.Offset, curPos.Column)
|
||||||
|
|
||||||
if curPos.Offset != pos.Offset {
|
if curPos.Offset != pos.Offset {
|
||||||
t.Fatalf("offset = %d, want %d for %q", curPos.Offset, pos.Offset, k.text)
|
t.Fatalf("offset = %d, want %d for %q", curPos.Offset, pos.Offset, k.text)
|
||||||
}
|
}
|
||||||
@ -246,8 +247,8 @@ func TestFloat(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestError(t *testing.T) {
|
func TestError(t *testing.T) {
|
||||||
testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.EOF)
|
testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
|
||||||
testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.EOF)
|
testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
|
||||||
|
|
||||||
testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", token.IDENT)
|
testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", token.IDENT)
|
||||||
testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", token.IDENT)
|
testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", token.IDENT)
|
||||||
@ -255,21 +256,16 @@ func TestError(t *testing.T) {
|
|||||||
testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING)
|
testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING)
|
||||||
testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING)
|
testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING)
|
||||||
|
|
||||||
testError(t, "`ab"+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING)
|
|
||||||
testError(t, "`abc"+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING)
|
|
||||||
|
|
||||||
testError(t, `01238`, "1:6", "illegal octal number", token.NUMBER)
|
testError(t, `01238`, "1:6", "illegal octal number", token.NUMBER)
|
||||||
testError(t, `01238123`, "1:9", "illegal octal number", token.NUMBER)
|
testError(t, `01238123`, "1:9", "illegal octal number", token.NUMBER)
|
||||||
testError(t, `0x`, "1:3", "illegal hexadecimal number", token.NUMBER)
|
testError(t, `0x`, "1:3", "illegal hexadecimal number", token.NUMBER)
|
||||||
testError(t, `0xg`, "1:3", "illegal hexadecimal number", token.NUMBER)
|
testError(t, `0xg`, "1:3", "illegal hexadecimal number", token.NUMBER)
|
||||||
testError(t, `'aa'`, "1:4", "illegal char literal", token.STRING)
|
testError(t, `'aa'`, "1:1", "illegal char", token.ILLEGAL)
|
||||||
|
|
||||||
testError(t, `'`, "1:2", "literal not terminated", token.STRING)
|
testError(t, `"`, "1:2", "literal not terminated", token.STRING)
|
||||||
testError(t, `'`+"\n", "1:2", "literal not terminated", token.STRING)
|
|
||||||
testError(t, `"abc`, "1:5", "literal not terminated", token.STRING)
|
testError(t, `"abc`, "1:5", "literal not terminated", token.STRING)
|
||||||
testError(t, `"abc`+"\n", "1:5", "literal not terminated", token.STRING)
|
testError(t, `"abc`+"\n", "1:5", "literal not terminated", token.STRING)
|
||||||
testError(t, "`abc\n", "2:1", "literal not terminated", token.STRING)
|
testError(t, `/*/`, "1:4", "comment not terminated", token.COMMENT)
|
||||||
testError(t, `/*/`, "1:4", "comment not terminated", token.EOF)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func testError(t *testing.T, src, pos, msg string, tok token.Token) {
|
func testError(t *testing.T, src, pos, msg string, tok token.Token) {
|
||||||
|
Loading…
Reference in New Issue
Block a user