Merge pull request #241 from octo/scanner-null

printer, scanner: Don't produce unparsable output.
This commit is contained in:
Mitchell Hashimoto 2018-03-20 13:19:40 -07:00 committed by GitHub
commit adef769457
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 31 additions and 2 deletions

View File

@ -147,3 +147,25 @@ func lineAt(text []byte, offs int) []byte {
}
return text[offs:i]
}
// TestFormatParsable ensures that the output of Format() is can be parsed again.
func TestFormatValidOutput(t *testing.T) {
cases := []string{
"#\x00",
"#\ue123t",
}
for _, c := range cases {
f, err := Format([]byte(c))
if err != nil {
// ignore these failures, not all inputs are valid HCL.
t.Logf("Format(%q) = %v", c, err)
continue
}
if _, err := parser.Parse(f); err != nil {
t.Errorf("Format(%q) = %q; Parse(%q) = %v", c, f, f, err)
continue
}
}
}

View File

@ -95,12 +95,16 @@ func (s *Scanner) next() rune {
s.srcPos.Column = 0
}
// If we see a null character with data left, then that is an error
if ch == '\x00' && s.buf.Len() > 0 {
if ch == '\x00' {
s.err("unexpected null character (0x00)")
return eof
}
if ch == '\uE123' {
s.err("unicode code point U+E123 reserved for internal use")
return utf8.RuneError
}
// debug
// fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
return ch

View File

@ -509,9 +509,12 @@ func TestScan_crlf(t *testing.T) {
func TestError(t *testing.T) {
testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
testError(t, "\uE123", "1:1", "unicode code point U+E123 reserved for internal use", token.ILLEGAL)
testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", token.IDENT)
testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", token.IDENT)
testError(t, "ab\x00", "1:3", "unexpected null character (0x00)", token.IDENT)
testError(t, "ab\x00\n", "1:3", "unexpected null character (0x00)", token.IDENT)
testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING)
testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING)