scanner: add more tests for capturing errors

This commit is contained in:
Fatih Arslan 2015-10-05 18:48:26 +03:00
parent bc777d79f3
commit ea92162955
2 changed files with 76 additions and 1 deletions

View File

@ -7,6 +7,7 @@ import (
"io/ioutil" "io/ioutil"
"os" "os"
"unicode" "unicode"
"unicode/utf8"
"github.com/fatih/hcl/token" "github.com/fatih/hcl/token"
) )
@ -76,6 +77,13 @@ func (s *Scanner) next() rune {
return eof return eof
} }
if ch == utf8.RuneError && size == 1 {
s.srcPos.Column++
s.srcPos.Offset += size
s.err("illegal UTF-8 encoding")
return eof
}
// remember last position // remember last position
s.prevPos = s.srcPos s.prevPos = s.srcPos
s.lastCharLen = size s.lastCharLen = size
@ -430,6 +438,8 @@ func (s *Scanner) Pos() (pos Position) {
return s.tokPos return s.tokPos
} }
// err prints the error of any scanning to s.Error function. If the function is
// not defined, by default it prints them to os.Stderr
func (s *Scanner) err(msg string) { func (s *Scanner) err(msg string) {
s.ErrorCount++ s.ErrorCount++
if s.Error != nil { if s.Error != nil {
@ -440,22 +450,27 @@ func (s *Scanner) err(msg string) {
fmt.Fprintf(os.Stderr, "%s: %s\n", s.srcPos, msg) fmt.Fprintf(os.Stderr, "%s: %s\n", s.srcPos, msg)
} }
// isHexadecimal returns true if the given rune is a letter
func isLetter(ch rune) bool { func isLetter(ch rune) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch) return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
} }
// isHexadecimal returns true if the given rune is a decimal digit
func isDigit(ch rune) bool { func isDigit(ch rune) bool {
return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch) return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
} }
// isHexadecimal returns true if the given rune is an octan number
func isOctal(ch rune) bool { func isOctal(ch rune) bool {
return '0' <= ch && ch <= '7' return '0' <= ch && ch <= '7'
} }
// isHexadecimal returns true if the given rune is a decimal number
func isDecimal(ch rune) bool { func isDecimal(ch rune) bool {
return '0' <= ch && ch <= '9' return '0' <= ch && ch <= '9'
} }
// isHexadecimal returns true if the given rune is an hexadecimal number
func isHexadecimal(ch rune) bool { func isHexadecimal(ch rune) bool {
return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F' return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
} }
@ -465,6 +480,7 @@ func isWhitespace(ch rune) bool {
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
} }
// digitVal returns the integer value of a given octal,decimal or hexadecimal rune
func digitVal(ch rune) int { func digitVal(ch rune) int {
switch { switch {
case '0' <= ch && ch <= '9': case '0' <= ch && ch <= '9':

View File

@ -3,6 +3,7 @@ package scanner
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"strings"
"testing" "testing"
"github.com/fatih/hcl/token" "github.com/fatih/hcl/token"
@ -175,7 +176,6 @@ var orderedTokenLists = []string{
} }
func TestPosition(t *testing.T) { func TestPosition(t *testing.T) {
// t.SkipNow()
// create artifical source code // create artifical source code
buf := new(bytes.Buffer) buf := new(bytes.Buffer)
@ -245,6 +245,65 @@ func TestFloat(t *testing.T) {
testTokenList(t, tokenLists["float"]) testTokenList(t, tokenLists["float"])
} }
func TestError(t *testing.T) {
testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.EOF)
testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.EOF)
testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", token.IDENT)
testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", token.IDENT)
testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING)
testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING)
testError(t, "`ab"+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING)
testError(t, "`abc"+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING)
testError(t, `01238`, "1:6", "illegal octal number", token.NUMBER)
testError(t, `01238123`, "1:9", "illegal octal number", token.NUMBER)
testError(t, `0x`, "1:3", "illegal hexadecimal number", token.NUMBER)
testError(t, `0xg`, "1:3", "illegal hexadecimal number", token.NUMBER)
testError(t, `'aa'`, "1:4", "illegal char literal", token.STRING)
testError(t, `'`, "1:2", "literal not terminated", token.STRING)
testError(t, `'`+"\n", "1:2", "literal not terminated", token.STRING)
testError(t, `"abc`, "1:5", "literal not terminated", token.STRING)
testError(t, `"abc`+"\n", "1:5", "literal not terminated", token.STRING)
testError(t, "`abc\n", "2:1", "literal not terminated", token.STRING)
testError(t, `/*/`, "1:4", "comment not terminated", token.EOF)
}
func testError(t *testing.T, src, pos, msg string, tok token.Token) {
s, err := NewScanner(strings.NewReader(src))
if err != nil {
t.Fatal(err)
}
errorCalled := false
s.Error = func(p Position, m string) {
if !errorCalled {
if pos != p.String() {
t.Errorf("pos = %q, want %q for %q", p, pos, src)
}
if m != msg {
t.Errorf("msg = %q, want %q for %q", m, msg, src)
}
errorCalled = true
}
}
tk := s.Scan()
if tk != tok {
t.Errorf("tok = %s, want %s for %q", tk, tok, src)
}
if !errorCalled {
t.Errorf("error handler not called for %q", src)
}
if s.ErrorCount == 0 {
t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src)
}
}
func testTokenList(t *testing.T, tokenList []tokenPair) { func testTokenList(t *testing.T, tokenList []tokenPair) {
// create artifical source code // create artifical source code
buf := new(bytes.Buffer) buf := new(bytes.Buffer)