Merge pull request #240 from octo/scanner-next

scanner: Update prevPos even when returning utf8.RuneError.
This commit is contained in:
Mitchell Hashimoto 2018-03-20 13:20:55 -07:00 committed by GitHub
commit f40e974e75
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 24 additions and 8 deletions

View File

@ -74,14 +74,6 @@ func (s *Scanner) next() rune {
return eof return eof
} }
if ch == utf8.RuneError && size == 1 {
s.srcPos.Column++
s.srcPos.Offset += size
s.lastCharLen = size
s.err("illegal UTF-8 encoding")
return ch
}
// remember last position // remember last position
s.prevPos = s.srcPos s.prevPos = s.srcPos
@ -89,6 +81,11 @@ func (s *Scanner) next() rune {
s.lastCharLen = size s.lastCharLen = size
s.srcPos.Offset += size s.srcPos.Offset += size
if ch == utf8.RuneError && size == 1 {
s.err("illegal UTF-8 encoding")
return ch
}
if ch == '\n' { if ch == '\n' {
s.srcPos.Line++ s.srcPos.Line++
s.lastLineLen = s.srcPos.Column s.lastLineLen = s.srcPos.Column

View File

@ -592,3 +592,22 @@ func countNewlines(s string) int {
} }
return n return n
} }
func TestScanHeredocRegexpCompile(t *testing.T) {
cases := []string{
"0\xe1\n<<ȸ\nhello\nworld\nȸ",
}
for _, c := range cases {
s := New([]byte(c))
fmt.Printf("START %q\n", c)
for {
tok := s.Scan()
if tok.Type == token.EOF {
break
}
t.Logf("s.Scan() = %s", tok)
}
}
}