diff --git a/scanner/scanner.go b/scanner/scanner.go index de5a54f..4580c89 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -81,13 +81,15 @@ func (s *Scanner) next() rune { s.lastCharLen = size s.srcPos.Offset += size - s.srcPos.Column += size + s.srcPos.Column++ if ch == '\n' { s.srcPos.Line++ s.srcPos.Column = 0 s.lastLineLen = s.srcPos.Column } + // debug + // fmt.Printf("ch: %q, off:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column) return ch } @@ -110,7 +112,7 @@ func (s *Scanner) peek() rune { // Scan scans the next token and returns the token. func (s *Scanner) Scan() (tok token.Token) { - ch := s.peek() + ch := s.next() // skip white space for isWhitespace(ch) { @@ -121,9 +123,9 @@ func (s *Scanner) Scan() (tok token.Token) { s.tokBuf.Reset() s.tokStart = s.srcPos.Offset - s.lastCharLen - // token position, initial next() is moving the offset by one, though we - // are interested with the starting point - s.tokPos.Offset = s.srcPos.Offset - 1 + // token position, initial next() is moving the offset by one(size of rune + // actually), though we are interested with the starting point + s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen if s.srcPos.Column > 0 { // common case: last character was not a '\n' diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 64ba1f4..af56d80 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -44,8 +44,9 @@ func TestPosition(t *testing.T) { // t.SkipNow() // create artifical source code buf := new(bytes.Buffer) - for _, list := range tokenLists { - for _, ident := range list { + + for _, listName := range orderedTokenLists { + for _, ident := range tokenLists[listName] { fmt.Fprintf(buf, "\t\t\t\t%s\n", ident.text) } } @@ -55,20 +56,21 @@ func TestPosition(t *testing.T) { t.Fatal(err) } - s.Scan() pos := Position{"", 4, 1, 5} - for _, list := range tokenLists { - for _, k := range list { + for _, listName := range orderedTokenLists { + s.Scan() + + for _, k := range tokenLists[listName] { curPos := s.Pos() - fmt.Printf("[%q] s = %+v:%+v\n", k.text, curPos.Offset, curPos.Column) + // fmt.Printf("[%q] s = %+v:%+v\n", k.text, curPos.Offset, curPos.Column) if curPos.Offset != pos.Offset { - t.Errorf("offset = %d, want %d for %q", curPos.Offset, pos.Offset, k.text) + t.Fatalf("offset = %d, want %d for %q", curPos.Offset, pos.Offset, k.text) } if curPos.Line != pos.Line { - t.Errorf("line = %d, want %d for %q", curPos.Line, pos.Line, k.text) + t.Fatalf("line = %d, want %d for %q", curPos.Line, pos.Line, k.text) } if curPos.Column != pos.Column { - t.Errorf("column = %d, want %d for %q", curPos.Column, pos.Column, k.text) + t.Fatalf("column = %d, want %d for %q", curPos.Column, pos.Column, k.text) } pos.Offset += 4 + len(k.text) + 1 // 4 tabs + token bytes + newline pos.Line += countNewlines(k.text) + 1 // each token is on a new line @@ -81,44 +83,53 @@ func TestPosition(t *testing.T) { } } -var tokenLists = map[string][]tokenPair{ - // "comment": []tokenPair{ - // {token.COMMENT, "//"}, - // {token.COMMENT, "////"}, - // {token.COMMENT, "// comment"}, - // {token.COMMENT, "// /* comment */"}, - // {token.COMMENT, "// // comment //"}, - // {token.COMMENT, "//" + f100}, - // {token.COMMENT, "#"}, - // {token.COMMENT, "##"}, - // {token.COMMENT, "# comment"}, - // {token.COMMENT, "# /* comment */"}, - // {token.COMMENT, "# # comment #"}, - // {token.COMMENT, "#" + f100}, - // {token.COMMENT, "/**/"}, - // {token.COMMENT, "/***/"}, - // {token.COMMENT, "/* comment */"}, - // {token.COMMENT, "/* // comment */"}, - // {token.COMMENT, "/* /* comment */"}, - // {token.COMMENT, "/*\n comment\n*/"}, - // {token.COMMENT, "/*" + f100 + "*/"}, - // }, - // "operator": []tokenPair{ - // {token.LBRACK, "["}, - // {token.LBRACE, "{"}, - // {token.COMMA, ","}, - // {token.PERIOD, "."}, - // {token.RBRACK, "]"}, - // {token.RBRACE, "}"}, - // {token.ASSIGN, "="}, - // {token.ADD, "+"}, - // {token.SUB, "-"}, - // }, - // "bool": []tokenPair{ - // {token.BOOL, "true"}, - // {token.BOOL, "false"}, - // }, +var orderedTokenLists = []string{ + // "comment", + // "operator", + // "bool", + // "ident", + // "string", + "number", + // "float", +} +var tokenLists = map[string][]tokenPair{ + "comment": []tokenPair{ + {token.COMMENT, "//"}, + {token.COMMENT, "////"}, + {token.COMMENT, "// comment"}, + {token.COMMENT, "// /* comment */"}, + {token.COMMENT, "// // comment //"}, + {token.COMMENT, "//" + f100}, + {token.COMMENT, "#"}, + {token.COMMENT, "##"}, + {token.COMMENT, "# comment"}, + {token.COMMENT, "# /* comment */"}, + {token.COMMENT, "# # comment #"}, + {token.COMMENT, "#" + f100}, + {token.COMMENT, "/**/"}, + {token.COMMENT, "/***/"}, + {token.COMMENT, "/* comment */"}, + {token.COMMENT, "/* // comment */"}, + {token.COMMENT, "/* /* comment */"}, + {token.COMMENT, "/*\n comment\n*/"}, + {token.COMMENT, "/*" + f100 + "*/"}, + }, + "operator": []tokenPair{ + {token.LBRACK, "["}, + {token.LBRACE, "{"}, + {token.COMMA, ","}, + {token.PERIOD, "."}, + {token.RBRACK, "]"}, + {token.RBRACE, "}"}, + {token.ASSIGN, "="}, + {token.ADD, "+"}, + {token.SUB, "-"}, + }, + "bool": []tokenPair{ + {token.BOOL, "true"}, + {token.BOOL, "false"}, + }, "ident": []tokenPair{ {token.IDENT, "a"}, {token.IDENT, "a0"}, @@ -129,106 +140,106 @@ var tokenLists = map[string][]tokenPair{ {token.IDENT, "_abc123"}, {token.IDENT, "abc123_"}, {token.IDENT, "_abc_123_"}, - // {token.IDENT, "_äöü"}, - // {token.IDENT, "_本"}, - // {token.IDENT, "äöü"}, - // {token.IDENT, "本"}, - // {token.IDENT, "a۰۱۸"}, - // {token.IDENT, "foo६४"}, - // {token.IDENT, "bar9876"}, + {token.IDENT, "_äöü"}, + {token.IDENT, "_本"}, + {token.IDENT, "äöü"}, + {token.IDENT, "本"}, + {token.IDENT, "a۰۱۸"}, + {token.IDENT, "foo६४"}, + {token.IDENT, "bar9876"}, + }, + "string": []tokenPair{ + {token.STRING, `" "`}, + {token.STRING, `"a"`}, + {token.STRING, `"本"`}, + {token.STRING, `"\a"`}, + {token.STRING, `"\b"`}, + {token.STRING, `"\f"`}, + {token.STRING, `"\n"`}, + {token.STRING, `"\r"`}, + {token.STRING, `"\t"`}, + {token.STRING, `"\v"`}, + {token.STRING, `"\""`}, + {token.STRING, `"\000"`}, + {token.STRING, `"\777"`}, + {token.STRING, `"\x00"`}, + {token.STRING, `"\xff"`}, + {token.STRING, `"\u0000"`}, + {token.STRING, `"\ufA16"`}, + {token.STRING, `"\U00000000"`}, + {token.STRING, `"\U0000ffAB"`}, + {token.STRING, `"` + f100 + `"`}, + }, + "number": []tokenPair{ + {token.NUMBER, "0"}, + {token.NUMBER, "1"}, + {token.NUMBER, "9"}, + {token.NUMBER, "42"}, + {token.NUMBER, "1234567890"}, + {token.NUMBER, "00"}, + {token.NUMBER, "01"}, + {token.NUMBER, "07"}, + {token.NUMBER, "042"}, + {token.NUMBER, "01234567"}, + {token.NUMBER, "0x0"}, + {token.NUMBER, "0x1"}, + {token.NUMBER, "0xf"}, + {token.NUMBER, "0x42"}, + {token.NUMBER, "0x123456789abcDEF"}, + {token.NUMBER, "0x" + f100}, + {token.NUMBER, "0X0"}, + {token.NUMBER, "0X1"}, + {token.NUMBER, "0XF"}, + {token.NUMBER, "0X42"}, + {token.NUMBER, "0X123456789abcDEF"}, + {token.NUMBER, "0X" + f100}, + {token.NUMBER, "0e0"}, + {token.NUMBER, "1e0"}, + {token.NUMBER, "42e0"}, + {token.NUMBER, "01234567890e0"}, + {token.NUMBER, "0E0"}, + {token.NUMBER, "1E0"}, + {token.NUMBER, "42E0"}, + {token.NUMBER, "01234567890E0"}, + {token.NUMBER, "0e+10"}, + {token.NUMBER, "1e-10"}, + {token.NUMBER, "42e+10"}, + {token.NUMBER, "01234567890e-10"}, + {token.NUMBER, "0E+10"}, + {token.NUMBER, "1E-10"}, + {token.NUMBER, "42E+10"}, + {token.NUMBER, "01234567890E-10"}, + }, + "float": []tokenPair{ + {token.FLOAT, "0."}, + {token.FLOAT, "1."}, + {token.FLOAT, "42."}, + {token.FLOAT, "01234567890."}, + {token.FLOAT, ".0"}, + {token.FLOAT, ".1"}, + {token.FLOAT, ".42"}, + {token.FLOAT, ".0123456789"}, + {token.FLOAT, "0.0"}, + {token.FLOAT, "1.0"}, + {token.FLOAT, "42.0"}, + {token.FLOAT, "01234567890.0"}, + {token.FLOAT, "01.8e0"}, + {token.FLOAT, "1.4e0"}, + {token.FLOAT, "42.2e0"}, + {token.FLOAT, "01234567890.12e0"}, + {token.FLOAT, "0.E0"}, + {token.FLOAT, "1.12E0"}, + {token.FLOAT, "42.123E0"}, + {token.FLOAT, "01234567890.213E0"}, + {token.FLOAT, "0.2e+10"}, + {token.FLOAT, "1.2e-10"}, + {token.FLOAT, "42.54e+10"}, + {token.FLOAT, "01234567890.98e-10"}, + {token.FLOAT, "0.1E+10"}, + {token.FLOAT, "1.1E-10"}, + {token.FLOAT, "42.1E+10"}, + {token.FLOAT, "01234567890.1E-10"}, }, - // "string": []tokenPair{ - // {token.STRING, `" "`}, - // {token.STRING, `"a"`}, - // {token.STRING, `"本"`}, - // {token.STRING, `"\a"`}, - // {token.STRING, `"\b"`}, - // {token.STRING, `"\f"`}, - // {token.STRING, `"\n"`}, - // {token.STRING, `"\r"`}, - // {token.STRING, `"\t"`}, - // {token.STRING, `"\v"`}, - // {token.STRING, `"\""`}, - // {token.STRING, `"\000"`}, - // {token.STRING, `"\777"`}, - // {token.STRING, `"\x00"`}, - // {token.STRING, `"\xff"`}, - // {token.STRING, `"\u0000"`}, - // {token.STRING, `"\ufA16"`}, - // {token.STRING, `"\U00000000"`}, - // {token.STRING, `"\U0000ffAB"`}, - // {token.STRING, `"` + f100 + `"`}, - // }, - // "number": []tokenPair{ - // {token.NUMBER, "0"}, - // {token.NUMBER, "1"}, - // {token.NUMBER, "9"}, - // {token.NUMBER, "42"}, - // {token.NUMBER, "1234567890"}, - // {token.NUMBER, "00"}, - // {token.NUMBER, "01"}, - // {token.NUMBER, "07"}, - // {token.NUMBER, "042"}, - // {token.NUMBER, "01234567"}, - // {token.NUMBER, "0x0"}, - // {token.NUMBER, "0x1"}, - // {token.NUMBER, "0xf"}, - // {token.NUMBER, "0x42"}, - // {token.NUMBER, "0x123456789abcDEF"}, - // {token.NUMBER, "0x" + f100}, - // {token.NUMBER, "0X0"}, - // {token.NUMBER, "0X1"}, - // {token.NUMBER, "0XF"}, - // {token.NUMBER, "0X42"}, - // {token.NUMBER, "0X123456789abcDEF"}, - // {token.NUMBER, "0X" + f100}, - // {token.NUMBER, "0e0"}, - // {token.NUMBER, "1e0"}, - // {token.NUMBER, "42e0"}, - // {token.NUMBER, "01234567890e0"}, - // {token.NUMBER, "0E0"}, - // {token.NUMBER, "1E0"}, - // {token.NUMBER, "42E0"}, - // {token.NUMBER, "01234567890E0"}, - // {token.NUMBER, "0e+10"}, - // {token.NUMBER, "1e-10"}, - // {token.NUMBER, "42e+10"}, - // {token.NUMBER, "01234567890e-10"}, - // {token.NUMBER, "0E+10"}, - // {token.NUMBER, "1E-10"}, - // {token.NUMBER, "42E+10"}, - // {token.NUMBER, "01234567890E-10"}, - // }, - // "float": []tokenPair{ - // {token.FLOAT, "0."}, - // {token.FLOAT, "1."}, - // {token.FLOAT, "42."}, - // {token.FLOAT, "01234567890."}, - // {token.FLOAT, ".0"}, - // {token.FLOAT, ".1"}, - // {token.FLOAT, ".42"}, - // {token.FLOAT, ".0123456789"}, - // {token.FLOAT, "0.0"}, - // {token.FLOAT, "1.0"}, - // {token.FLOAT, "42.0"}, - // {token.FLOAT, "01234567890.0"}, - // {token.FLOAT, "01.8e0"}, - // {token.FLOAT, "1.4e0"}, - // {token.FLOAT, "42.2e0"}, - // {token.FLOAT, "01234567890.12e0"}, - // {token.FLOAT, "0.E0"}, - // {token.FLOAT, "1.12E0"}, - // {token.FLOAT, "42.123E0"}, - // {token.FLOAT, "01234567890.213E0"}, - // {token.FLOAT, "0.2e+10"}, - // {token.FLOAT, "1.2e-10"}, - // {token.FLOAT, "42.54e+10"}, - // {token.FLOAT, "01234567890.98e-10"}, - // {token.FLOAT, "0.1E+10"}, - // {token.FLOAT, "1.1E-10"}, - // {token.FLOAT, "42.1E+10"}, - // {token.FLOAT, "01234567890.1E-10"}, - // }, } func TestComment(t *testing.T) {