From e6ba36eacaabc5971d409f40aecb51561647a89b Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 3 Oct 2015 17:08:09 +0300 Subject: [PATCH] parser: initial lexer next method --- parser/lexer.go | 92 +++++++++++++++++++++++++++++++++++++++++++++++++ parser/token.go | 2 -- 2 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 parser/lexer.go diff --git a/parser/lexer.go b/parser/lexer.go new file mode 100644 index 0000000..6048dfa --- /dev/null +++ b/parser/lexer.go @@ -0,0 +1,92 @@ +package parser + +import ( + "bufio" + "io" + "unicode" +) + +// eof represents a marker rune for the end of the reader. +const eof = rune(0) + +// Lexer defines a lexical scanner +type Lexer struct { + r *bufio.Reader + + // Start position of most recently scanned token; set by Scan. + // Calling Init or Next invalidates the position (Line == 0). + // The Filename field is always left untouched by the Scanner. + // If an error is reported (via Error) and Position is invalid, + // the scanner is not inside a token. Call Pos to obtain an error + // position in that case. + Position +} + +// NewLexer returns a new instance of Lexer. +func NewLexer(r io.Reader) *Lexer { + return &Lexer{ + r: bufio.NewReader(r), + } +} + +// next reads the next rune from the bufferred reader. Returns the rune(0) if +// an error occurs (or io.EOF is returned). +func (l *Lexer) next() rune { + ch, _, err := l.r.ReadRune() + if err != nil { + return eof + } + return ch +} + +// unread places the previously read rune back on the reader. +func (l *Lexer) unread() { _ = l.r.UnreadRune() } + +// Scan scans the next token and returns the token and it's literal string. +func (l *Lexer) Scan() (tok Token, lit string) { + ch := l.next() + + if isWhitespace(ch) { + ch = l.next() + } + + return 0, "" +} + +func (l *Lexer) skipWhitespace() { + l.next() +} + +// Pos returns the position of the character immediately after the character or +// token returned by the last call to Next or Scan. +func (l *Lexer) Pos() Position { + return Position{} +} + +// isSpace reports whether r is a space character. +func isSpace(r rune) bool { + return r == ' ' || r == '\t' +} + +// isEndOfLine reports whether r is an end-of-line character. +func isEndOfLine(r rune) bool { + return r == '\r' || r == '\n' +} + +// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. +func isAlphaNumeric(r rune) bool { + return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) +} + +func isLetter(ch rune) bool { + return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch) +} + +func isDigit(ch rune) bool { + return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch) +} + +// isWhitespace returns true if the rune is a space, tab, newline or carriage return +func isWhitespace(ch rune) bool { + return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' +} diff --git a/parser/token.go b/parser/token.go index 3b6d615..8dda237 100644 --- a/parser/token.go +++ b/parser/token.go @@ -10,7 +10,6 @@ const ( ILLEGAL Token = iota EOF COMMENT - NEWLINE literal_beg IDENT // literals @@ -43,7 +42,6 @@ var tokens = [...]string{ EOF: "EOF", COMMENT: "COMMENT", - NEWLINE: "NEWLINE", IDENT: "IDENT", NUMBER: "NUMBER",