diff --git a/scanner/scanner.go b/scanner/scanner.go index 4580c89..12d8bbc 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -7,6 +7,7 @@ import ( "io/ioutil" "os" "unicode" + "unicode/utf8" "github.com/fatih/hcl/token" ) @@ -76,6 +77,13 @@ func (s *Scanner) next() rune { return eof } + if ch == utf8.RuneError && size == 1 { + s.srcPos.Column++ + s.srcPos.Offset += size + s.err("illegal UTF-8 encoding") + return eof + } + // remember last position s.prevPos = s.srcPos s.lastCharLen = size @@ -430,6 +438,8 @@ func (s *Scanner) Pos() (pos Position) { return s.tokPos } +// err prints the error of any scanning to s.Error function. If the function is +// not defined, by default it prints them to os.Stderr func (s *Scanner) err(msg string) { s.ErrorCount++ if s.Error != nil { @@ -440,22 +450,27 @@ func (s *Scanner) err(msg string) { fmt.Fprintf(os.Stderr, "%s: %s\n", s.srcPos, msg) } +// isHexadecimal returns true if the given rune is a letter func isLetter(ch rune) bool { return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch) } +// isHexadecimal returns true if the given rune is a decimal digit func isDigit(ch rune) bool { return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch) } +// isHexadecimal returns true if the given rune is an octan number func isOctal(ch rune) bool { return '0' <= ch && ch <= '7' } +// isHexadecimal returns true if the given rune is a decimal number func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' } +// isHexadecimal returns true if the given rune is an hexadecimal number func isHexadecimal(ch rune) bool { return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F' } @@ -465,6 +480,7 @@ func isWhitespace(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' } +// digitVal returns the integer value of a given octal,decimal or hexadecimal rune func digitVal(ch rune) int { switch { case '0' <= ch && ch <= '9': diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 07951b2..5f97679 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -3,6 +3,7 @@ package scanner import ( "bytes" "fmt" + "strings" "testing" "github.com/fatih/hcl/token" @@ -175,7 +176,6 @@ var orderedTokenLists = []string{ } func TestPosition(t *testing.T) { - // t.SkipNow() // create artifical source code buf := new(bytes.Buffer) @@ -245,6 +245,65 @@ func TestFloat(t *testing.T) { testTokenList(t, tokenLists["float"]) } +func TestError(t *testing.T) { + testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.EOF) + testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.EOF) + + testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", token.IDENT) + testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", token.IDENT) + + testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING) + testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING) + + testError(t, "`ab"+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING) + testError(t, "`abc"+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING) + + testError(t, `01238`, "1:6", "illegal octal number", token.NUMBER) + testError(t, `01238123`, "1:9", "illegal octal number", token.NUMBER) + testError(t, `0x`, "1:3", "illegal hexadecimal number", token.NUMBER) + testError(t, `0xg`, "1:3", "illegal hexadecimal number", token.NUMBER) + testError(t, `'aa'`, "1:4", "illegal char literal", token.STRING) + + testError(t, `'`, "1:2", "literal not terminated", token.STRING) + testError(t, `'`+"\n", "1:2", "literal not terminated", token.STRING) + testError(t, `"abc`, "1:5", "literal not terminated", token.STRING) + testError(t, `"abc`+"\n", "1:5", "literal not terminated", token.STRING) + testError(t, "`abc\n", "2:1", "literal not terminated", token.STRING) + testError(t, `/*/`, "1:4", "comment not terminated", token.EOF) +} + +func testError(t *testing.T, src, pos, msg string, tok token.Token) { + s, err := NewScanner(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + + errorCalled := false + s.Error = func(p Position, m string) { + if !errorCalled { + if pos != p.String() { + t.Errorf("pos = %q, want %q for %q", p, pos, src) + } + + if m != msg { + t.Errorf("msg = %q, want %q for %q", m, msg, src) + } + errorCalled = true + } + } + + tk := s.Scan() + if tk != tok { + t.Errorf("tok = %s, want %s for %q", tk, tok, src) + } + if !errorCalled { + t.Errorf("error handler not called for %q", src) + } + if s.ErrorCount == 0 { + t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src) + } +} + func testTokenList(t *testing.T, tokenList []tokenPair) { // create artifical source code buf := new(bytes.Buffer)