From 4711a01f764fb7c54d351ed8a639e8e11e4a991c Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 3 Oct 2015 19:45:57 +0300 Subject: [PATCH] lexer: various changes, trying text/scanner --- parser/lexer.go | 62 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/parser/lexer.go b/parser/lexer.go index 6048dfa..267c083 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -2,7 +2,9 @@ package parser import ( "bufio" + "bytes" "io" + "text/scanner" "unicode" ) @@ -11,7 +13,9 @@ const eof = rune(0) // Lexer defines a lexical scanner type Lexer struct { - r *bufio.Reader + src *bufio.Reader // input + ch rune // current character + sc *scanner.Scanner // Start position of most recently scanned token; set by Scan. // Calling Init or Next invalidates the position (Line == 0). @@ -23,38 +27,73 @@ type Lexer struct { } // NewLexer returns a new instance of Lexer. -func NewLexer(r io.Reader) *Lexer { +func NewLexer(src io.Reader) *Lexer { + sc := &scanner.Scanner{} + sc.Init(src) + sc.Mode = 0 + sc.Whitespace = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' ' + return &Lexer{ - r: bufio.NewReader(r), + src: bufio.NewReader(src), + sc: sc, } } // next reads the next rune from the bufferred reader. Returns the rune(0) if // an error occurs (or io.EOF is returned). func (l *Lexer) next() rune { - ch, _, err := l.r.ReadRune() + var err error + l.ch, _, err = l.src.ReadRune() if err != nil { return eof } - return ch + return l.ch } // unread places the previously read rune back on the reader. -func (l *Lexer) unread() { _ = l.r.UnreadRune() } +func (l *Lexer) unread() { + _ = l.src.UnreadRune() +} + +func (l *Lexer) peek() rune { + prev := l.ch + peekCh := l.next() + l.unread() + l.ch = prev + return peekCh +} // Scan scans the next token and returns the token and it's literal string. func (l *Lexer) Scan() (tok Token, lit string) { ch := l.next() - if isWhitespace(ch) { + // skip white space + for isWhitespace(ch) { ch = l.next() } + // identifier + if isLetter(ch) { + return l.scanIdentifier() + } + + switch ch { + case eof: + return EOF, "" + } + return 0, "" } -func (l *Lexer) skipWhitespace() { - l.next() +func (l *Lexer) scanIdentifier() (Token, string) { + // Create a buffer and read the current character into it. + var buf bytes.Buffer + + // write current character before we move to the next + buf.WriteRune(l.ch) + + return 0, "" + } // Pos returns the position of the character immediately after the character or @@ -73,11 +112,6 @@ func isEndOfLine(r rune) bool { return r == '\r' || r == '\n' } -// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. -func isAlphaNumeric(r rune) bool { - return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) -} - func isLetter(ch rune) bool { return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch) }