From 97fb05dd4abd4af01b5d0a41d65d4db8130d6eda Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 4 Oct 2015 00:20:26 +0300 Subject: [PATCH] lexer: scan strings --- parser/lexer.go | 27 +++++++++++++++++++++++- parser/lexer_test.go | 50 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/parser/lexer.go b/parser/lexer.go index e14fb39..2677647 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -4,6 +4,7 @@ import ( "bytes" "io" "io/ioutil" + "log" "unicode" ) @@ -77,8 +78,8 @@ func (s *Scanner) Scan() (tok Token, lit string) { // identifier if isLetter(ch) { - s.scanIdentifier() tok = IDENT + s.scanIdentifier() } if isDigit(ch) { @@ -88,6 +89,10 @@ func (s *Scanner) Scan() (tok Token, lit string) { switch ch { case eof: tok = EOF + case '"': + tok = STRING + s.scanString() + s.next() // move forward so we finalize the string } s.tokEnd = s.pos.Offset - s.lastCharLen @@ -95,6 +100,26 @@ func (s *Scanner) Scan() (tok Token, lit string) { return tok, s.TokenLiteral() } +func (s *Scanner) scanString() { + // '"' opening already consumed + ch := s.next() // read character after quote + for ch != '"' { + if ch == '\n' || ch < 0 { + log.Println("[ERROR] literal not terminated") + return + } + + if ch == '\\' { + // scanEscape + return + } else { + ch = s.next() + } + } + + return +} + func (s *Scanner) scanIdentifier() { for isLetter(s.ch) || isDigit(s.ch) { s.next() diff --git a/parser/lexer_test.go b/parser/lexer_test.go index e48c117..e69dbb6 100644 --- a/parser/lexer_test.go +++ b/parser/lexer_test.go @@ -6,6 +6,8 @@ import ( "testing" ) +var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + type token struct { tok Token text string @@ -54,3 +56,51 @@ func TestIdent(t *testing.T) { } } + +func TestString(t *testing.T) { + var identList = []token{ + {STRING, `" "`}, + {STRING, `"a"`}, + {STRING, `"本"`}, + // {STRING, `"\a"`}, + // {STRING, `"\b"`}, + // {STRING, `"\f"`}, + // {STRING, `"\n"`}, + // {STRING, `"\r"`}, + // {STRING, `"\t"`}, + // {STRING, `"\v"`}, + // {STRING, `"\""`}, + // {STRING, `"\000"`}, + // {STRING, `"\777"`}, + // {STRING, `"\x00"`}, + // {STRING, `"\xff"`}, + // {STRING, `"\u0000"`}, + // {STRING, `"\ufA16"`}, + // {STRING, `"\U00000000"`}, + // {STRING, `"\U0000ffAB"`}, + // {STRING, `"` + f100 + `"`}, + } + + // create artifical source code + buf := new(bytes.Buffer) + for _, ident := range identList { + fmt.Fprintf(buf, " \t%s\n", ident.text) + } + + l, err := NewLexer(buf) + if err != nil { + t.Fatal(err) + } + + for _, ident := range identList { + tok, lit := l.Scan() + if tok != ident.tok { + t.Errorf("tok = %s want %s for %s\n", tok, ident.tok, ident.text) + } + + if lit != ident.text { + t.Errorf("text = %s want %s", lit, ident.text) + } + + } +}