From 4711a01f764fb7c54d351ed8a639e8e11e4a991c Mon Sep 17 00:00:00 2001
From: Fatih Arslan <ftharsln@gmail.com>
Date: Sat, 3 Oct 2015 19:45:57 +0300
Subject: [PATCH] lexer: various changes, trying text/scanner

---
 parser/lexer.go | 62 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 48 insertions(+), 14 deletions(-)

diff --git a/parser/lexer.go b/parser/lexer.go
index 6048dfa..267c083 100644
--- a/parser/lexer.go
+++ b/parser/lexer.go
@@ -2,7 +2,9 @@ package parser
 
 import (
 	"bufio"
+	"bytes"
 	"io"
+	"text/scanner"
 	"unicode"
 )
 
@@ -11,7 +13,9 @@ const eof = rune(0)
 
 // Lexer defines a lexical scanner
 type Lexer struct {
-	r *bufio.Reader
+	src *bufio.Reader // input
+	ch  rune          // current character
+	sc  *scanner.Scanner
 
 	// Start position of most recently scanned token; set by Scan.
 	// Calling Init or Next invalidates the position (Line == 0).
@@ -23,38 +27,73 @@ type Lexer struct {
 }
 
 // NewLexer returns a new instance of Lexer.
-func NewLexer(r io.Reader) *Lexer {
+func NewLexer(src io.Reader) *Lexer {
+	sc := &scanner.Scanner{}
+	sc.Init(src)
+	sc.Mode = 0
+	sc.Whitespace = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' '
+
 	return &Lexer{
-		r: bufio.NewReader(r),
+		src: bufio.NewReader(src),
+		sc:  sc,
 	}
 }
 
 // next reads the next rune from the bufferred reader.  Returns the rune(0) if
 // an error occurs (or io.EOF is returned).
 func (l *Lexer) next() rune {
-	ch, _, err := l.r.ReadRune()
+	var err error
+	l.ch, _, err = l.src.ReadRune()
 	if err != nil {
 		return eof
 	}
-	return ch
+	return l.ch
 }
 
 // unread places the previously read rune back on the reader.
-func (l *Lexer) unread() { _ = l.r.UnreadRune() }
+func (l *Lexer) unread() {
+	_ = l.src.UnreadRune()
+}
+
+func (l *Lexer) peek() rune {
+	prev := l.ch
+	peekCh := l.next()
+	l.unread()
+	l.ch = prev
+	return peekCh
+}
 
 // Scan scans the next token and returns the token and it's literal string.
 func (l *Lexer) Scan() (tok Token, lit string) {
 	ch := l.next()
 
-	if isWhitespace(ch) {
+	// skip white space
+	for isWhitespace(ch) {
 		ch = l.next()
 	}
 
+	// identifier
+	if isLetter(ch) {
+		return l.scanIdentifier()
+	}
+
+	switch ch {
+	case eof:
+		return EOF, ""
+	}
+
 	return 0, ""
 }
 
-func (l *Lexer) skipWhitespace() {
-	l.next()
+func (l *Lexer) scanIdentifier() (Token, string) {
+	// Create a buffer and read the current character into it.
+	var buf bytes.Buffer
+
+	// write current character before we move to the next
+	buf.WriteRune(l.ch)
+
+	return 0, ""
+
 }
 
 // Pos returns the position of the character immediately after the character or
@@ -73,11 +112,6 @@ func isEndOfLine(r rune) bool {
 	return r == '\r' || r == '\n'
 }
 
-// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
-func isAlphaNumeric(r rune) bool {
-	return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
-}
-
 func isLetter(ch rune) bool {
 	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
 }