lexer: various changes, trying text/scanner
This commit is contained in:
parent
e6ba36eaca
commit
4711a01f76
@ -2,7 +2,9 @@ package parser
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"bytes"
|
||||||
"io"
|
"io"
|
||||||
|
"text/scanner"
|
||||||
"unicode"
|
"unicode"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -11,7 +13,9 @@ const eof = rune(0)
|
|||||||
|
|
||||||
// Lexer defines a lexical scanner
|
// Lexer defines a lexical scanner
|
||||||
type Lexer struct {
|
type Lexer struct {
|
||||||
r *bufio.Reader
|
src *bufio.Reader // input
|
||||||
|
ch rune // current character
|
||||||
|
sc *scanner.Scanner
|
||||||
|
|
||||||
// Start position of most recently scanned token; set by Scan.
|
// Start position of most recently scanned token; set by Scan.
|
||||||
// Calling Init or Next invalidates the position (Line == 0).
|
// Calling Init or Next invalidates the position (Line == 0).
|
||||||
@ -23,38 +27,73 @@ type Lexer struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NewLexer returns a new instance of Lexer.
|
// NewLexer returns a new instance of Lexer.
|
||||||
func NewLexer(r io.Reader) *Lexer {
|
func NewLexer(src io.Reader) *Lexer {
|
||||||
|
sc := &scanner.Scanner{}
|
||||||
|
sc.Init(src)
|
||||||
|
sc.Mode = 0
|
||||||
|
sc.Whitespace = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' '
|
||||||
|
|
||||||
return &Lexer{
|
return &Lexer{
|
||||||
r: bufio.NewReader(r),
|
src: bufio.NewReader(src),
|
||||||
|
sc: sc,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// next reads the next rune from the bufferred reader. Returns the rune(0) if
|
// next reads the next rune from the bufferred reader. Returns the rune(0) if
|
||||||
// an error occurs (or io.EOF is returned).
|
// an error occurs (or io.EOF is returned).
|
||||||
func (l *Lexer) next() rune {
|
func (l *Lexer) next() rune {
|
||||||
ch, _, err := l.r.ReadRune()
|
var err error
|
||||||
|
l.ch, _, err = l.src.ReadRune()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return eof
|
return eof
|
||||||
}
|
}
|
||||||
return ch
|
return l.ch
|
||||||
}
|
}
|
||||||
|
|
||||||
// unread places the previously read rune back on the reader.
|
// unread places the previously read rune back on the reader.
|
||||||
func (l *Lexer) unread() { _ = l.r.UnreadRune() }
|
func (l *Lexer) unread() {
|
||||||
|
_ = l.src.UnreadRune()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Lexer) peek() rune {
|
||||||
|
prev := l.ch
|
||||||
|
peekCh := l.next()
|
||||||
|
l.unread()
|
||||||
|
l.ch = prev
|
||||||
|
return peekCh
|
||||||
|
}
|
||||||
|
|
||||||
// Scan scans the next token and returns the token and it's literal string.
|
// Scan scans the next token and returns the token and it's literal string.
|
||||||
func (l *Lexer) Scan() (tok Token, lit string) {
|
func (l *Lexer) Scan() (tok Token, lit string) {
|
||||||
ch := l.next()
|
ch := l.next()
|
||||||
|
|
||||||
if isWhitespace(ch) {
|
// skip white space
|
||||||
|
for isWhitespace(ch) {
|
||||||
ch = l.next()
|
ch = l.next()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// identifier
|
||||||
|
if isLetter(ch) {
|
||||||
|
return l.scanIdentifier()
|
||||||
|
}
|
||||||
|
|
||||||
|
switch ch {
|
||||||
|
case eof:
|
||||||
|
return EOF, ""
|
||||||
|
}
|
||||||
|
|
||||||
return 0, ""
|
return 0, ""
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *Lexer) skipWhitespace() {
|
func (l *Lexer) scanIdentifier() (Token, string) {
|
||||||
l.next()
|
// Create a buffer and read the current character into it.
|
||||||
|
var buf bytes.Buffer
|
||||||
|
|
||||||
|
// write current character before we move to the next
|
||||||
|
buf.WriteRune(l.ch)
|
||||||
|
|
||||||
|
return 0, ""
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pos returns the position of the character immediately after the character or
|
// Pos returns the position of the character immediately after the character or
|
||||||
@ -73,11 +112,6 @@ func isEndOfLine(r rune) bool {
|
|||||||
return r == '\r' || r == '\n'
|
return r == '\r' || r == '\n'
|
||||||
}
|
}
|
||||||
|
|
||||||
// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
|
|
||||||
func isAlphaNumeric(r rune) bool {
|
|
||||||
return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
|
|
||||||
}
|
|
||||||
|
|
||||||
func isLetter(ch rune) bool {
|
func isLetter(ch rune) bool {
|
||||||
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
|
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user