Merge pull request #241 from octo/scanner-null

printer, scanner: Don't produce unparsable output.
2018-03-20 13:19:40 -07:00 · 2018-03-20 13:19:40 -07:00 · adef769457
commit adef769457
parent b1738d9053 ec2ba18997
3 changed files with 31 additions and 2 deletions
--- a/hcl/printer/printer_test.go
+++ b/hcl/printer/printer_test.go
@ -147,3 +147,25 @@ func lineAt(text []byte, offs int) []byte {
 	}
 	return text[offs:i]
 }
+
+// TestFormatParsable ensures that the output of Format() is can be parsed again.
+func TestFormatValidOutput(t *testing.T) {
+	cases := []string{
+		"#\x00",
+		"#\ue123t",
+	}
+
+	for _, c := range cases {
+		f, err := Format([]byte(c))
+		if err != nil {
+			// ignore these failures, not all inputs are valid HCL.
+			t.Logf("Format(%q) = %v", c, err)
+			continue
+		}
+
+		if _, err := parser.Parse(f); err != nil {
+			t.Errorf("Format(%q) = %q; Parse(%q) = %v", c, f, f, err)
+			continue
+		}
+	}
+}
--- a/hcl/scanner/scanner.go
+++ b/hcl/scanner/scanner.go
@ -95,12 +95,16 @@ func (s *Scanner) next() rune {
 		s.srcPos.Column = 0
 	}

-	// If we see a null character with data left, then that is an error
-	if ch == '\x00' && s.buf.Len() > 0 {
+	if ch == '\x00' {
 		s.err("unexpected null character (0x00)")
 		return eof
 	}

+	if ch == '\uE123' {
+		s.err("unicode code point U+E123 reserved for internal use")
+		return utf8.RuneError
+	}
+
 	// debug
 	// fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
 	return ch
--- a/hcl/scanner/scanner_test.go
+++ b/hcl/scanner/scanner_test.go
@ -509,9 +509,12 @@ func TestScan_crlf(t *testing.T) {
 func TestError(t *testing.T) {
 	testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
 	testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
+	testError(t, "\uE123", "1:1", "unicode code point U+E123 reserved for internal use", token.ILLEGAL)

 	testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", token.IDENT)
 	testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", token.IDENT)
+	testError(t, "ab\x00", "1:3", "unexpected null character (0x00)", token.IDENT)
+	testError(t, "ab\x00\n", "1:3", "unexpected null character (0x00)", token.IDENT)

 	testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING)
 	testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING)