lexer: scan strings

This commit is contained in:
Fatih Arslan 2015-10-04 00:20:26 +03:00
parent cc7a91eeab
commit 97fb05dd4a
2 changed files with 76 additions and 1 deletions

View File

@ -4,6 +4,7 @@ import (
"bytes" "bytes"
"io" "io"
"io/ioutil" "io/ioutil"
"log"
"unicode" "unicode"
) )
@ -77,8 +78,8 @@ func (s *Scanner) Scan() (tok Token, lit string) {
// identifier // identifier
if isLetter(ch) { if isLetter(ch) {
s.scanIdentifier()
tok = IDENT tok = IDENT
s.scanIdentifier()
} }
if isDigit(ch) { if isDigit(ch) {
@ -88,6 +89,10 @@ func (s *Scanner) Scan() (tok Token, lit string) {
switch ch { switch ch {
case eof: case eof:
tok = EOF tok = EOF
case '"':
tok = STRING
s.scanString()
s.next() // move forward so we finalize the string
} }
s.tokEnd = s.pos.Offset - s.lastCharLen s.tokEnd = s.pos.Offset - s.lastCharLen
@ -95,6 +100,26 @@ func (s *Scanner) Scan() (tok Token, lit string) {
return tok, s.TokenLiteral() return tok, s.TokenLiteral()
} }
func (s *Scanner) scanString() {
// '"' opening already consumed
ch := s.next() // read character after quote
for ch != '"' {
if ch == '\n' || ch < 0 {
log.Println("[ERROR] literal not terminated")
return
}
if ch == '\\' {
// scanEscape
return
} else {
ch = s.next()
}
}
return
}
func (s *Scanner) scanIdentifier() { func (s *Scanner) scanIdentifier() {
for isLetter(s.ch) || isDigit(s.ch) { for isLetter(s.ch) || isDigit(s.ch) {
s.next() s.next()

View File

@ -6,6 +6,8 @@ import (
"testing" "testing"
) )
var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
type token struct { type token struct {
tok Token tok Token
text string text string
@ -54,3 +56,51 @@ func TestIdent(t *testing.T) {
} }
} }
func TestString(t *testing.T) {
var identList = []token{
{STRING, `" "`},
{STRING, `"a"`},
{STRING, `"本"`},
// {STRING, `"\a"`},
// {STRING, `"\b"`},
// {STRING, `"\f"`},
// {STRING, `"\n"`},
// {STRING, `"\r"`},
// {STRING, `"\t"`},
// {STRING, `"\v"`},
// {STRING, `"\""`},
// {STRING, `"\000"`},
// {STRING, `"\777"`},
// {STRING, `"\x00"`},
// {STRING, `"\xff"`},
// {STRING, `"\u0000"`},
// {STRING, `"\ufA16"`},
// {STRING, `"\U00000000"`},
// {STRING, `"\U0000ffAB"`},
// {STRING, `"` + f100 + `"`},
}
// create artifical source code
buf := new(bytes.Buffer)
for _, ident := range identList {
fmt.Fprintf(buf, " \t%s\n", ident.text)
}
l, err := NewLexer(buf)
if err != nil {
t.Fatal(err)
}
for _, ident := range identList {
tok, lit := l.Scan()
if tok != ident.tok {
t.Errorf("tok = %s want %s for %s\n", tok, ident.tok, ident.text)
}
if lit != ident.text {
t.Errorf("text = %s want %s", lit, ident.text)
}
}
}