scanner: Fail if U+E123 is found in input.

This (invalid) Unicode codepoint is used by the printer package to fix up
the indentation of generated files. If this codepoint is present in the
input, the package gets confused and removes more than it should,
producing unparsable output.
This commit is contained in:
Florian Forster 2018-03-20 14:14:51 +01:00 committed by Florian Forster
parent a5efd34964
commit ec2ba18997
2 changed files with 6 additions and 0 deletions

View File

@ -100,6 +100,11 @@ func (s *Scanner) next() rune {
return eof
}
if ch == '\uE123' {
s.err("unicode code point U+E123 reserved for internal use")
return utf8.RuneError
}
// debug
// fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
return ch

View File

@ -509,6 +509,7 @@ func TestScan_crlf(t *testing.T) {
func TestError(t *testing.T) {
testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
testError(t, "\uE123", "1:1", "unicode code point U+E123 reserved for internal use", token.ILLEGAL)
testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", token.IDENT)
testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", token.IDENT)