From e6ba36eacaabc5971d409f40aecb51561647a89b Mon Sep 17 00:00:00 2001
From: Fatih Arslan <ftharsln@gmail.com>
Date: Sat, 3 Oct 2015 17:08:09 +0300
Subject: [PATCH] parser: initial lexer next method

---
 parser/lexer.go | 92 +++++++++++++++++++++++++++++++++++++++++++++++++
 parser/token.go |  2 --
 2 files changed, 92 insertions(+), 2 deletions(-)
 create mode 100644 parser/lexer.go

diff --git a/parser/lexer.go b/parser/lexer.go
new file mode 100644
index 0000000..6048dfa
--- /dev/null
+++ b/parser/lexer.go
@@ -0,0 +1,92 @@
+package parser
+
+import (
+	"bufio"
+	"io"
+	"unicode"
+)
+
+// eof represents a marker rune for the end of the reader.
+const eof = rune(0)
+
+// Lexer defines a lexical scanner
+type Lexer struct {
+	r *bufio.Reader
+
+	// Start position of most recently scanned token; set by Scan.
+	// Calling Init or Next invalidates the position (Line == 0).
+	// The Filename field is always left untouched by the Scanner.
+	// If an error is reported (via Error) and Position is invalid,
+	// the scanner is not inside a token. Call Pos to obtain an error
+	// position in that case.
+	Position
+}
+
+// NewLexer returns a new instance of Lexer.
+func NewLexer(r io.Reader) *Lexer {
+	return &Lexer{
+		r: bufio.NewReader(r),
+	}
+}
+
+// next reads the next rune from the bufferred reader.  Returns the rune(0) if
+// an error occurs (or io.EOF is returned).
+func (l *Lexer) next() rune {
+	ch, _, err := l.r.ReadRune()
+	if err != nil {
+		return eof
+	}
+	return ch
+}
+
+// unread places the previously read rune back on the reader.
+func (l *Lexer) unread() { _ = l.r.UnreadRune() }
+
+// Scan scans the next token and returns the token and it's literal string.
+func (l *Lexer) Scan() (tok Token, lit string) {
+	ch := l.next()
+
+	if isWhitespace(ch) {
+		ch = l.next()
+	}
+
+	return 0, ""
+}
+
+func (l *Lexer) skipWhitespace() {
+	l.next()
+}
+
+// Pos returns the position of the character immediately after the character or
+// token returned by the last call to Next or Scan.
+func (l *Lexer) Pos() Position {
+	return Position{}
+}
+
+// isSpace reports whether r is a space character.
+func isSpace(r rune) bool {
+	return r == ' ' || r == '\t'
+}
+
+// isEndOfLine reports whether r is an end-of-line character.
+func isEndOfLine(r rune) bool {
+	return r == '\r' || r == '\n'
+}
+
+// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
+func isAlphaNumeric(r rune) bool {
+	return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
+}
+
+func isLetter(ch rune) bool {
+	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
+}
+
+func isDigit(ch rune) bool {
+	return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
+}
+
+// isWhitespace returns true if the rune is a space, tab, newline or carriage return
+func isWhitespace(ch rune) bool {
+	return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
+}
diff --git a/parser/token.go b/parser/token.go
index 3b6d615..8dda237 100644
--- a/parser/token.go
+++ b/parser/token.go
@@ -10,7 +10,6 @@ const (
 	ILLEGAL Token = iota
 	EOF
 	COMMENT
-	NEWLINE
 
 	literal_beg
 	IDENT  // literals
@@ -43,7 +42,6 @@ var tokens = [...]string{
 
 	EOF:     "EOF",
 	COMMENT: "COMMENT",
-	NEWLINE: "NEWLINE",
 
 	IDENT:  "IDENT",
 	NUMBER: "NUMBER",