hcl/parser/lexer.go

package parser

import (
	"bufio"
	"bytes"
	"io"
	"text/scanner"
	"unicode"
)

// eof represents a marker rune for the end of the reader.
const eof = rune(0)

// Lexer defines a lexical scanner
type Lexer struct {
	src *bufio.Reader // input
	ch  rune          // current character
	sc  *scanner.Scanner

	// Start position of most recently scanned token; set by Scan.
	// Calling Init or Next invalidates the position (Line == 0).
	// The Filename field is always left untouched by the Scanner.
	// If an error is reported (via Error) and Position is invalid,
	// the scanner is not inside a token. Call Pos to obtain an error
	// position in that case.
	Position
}

// NewLexer returns a new instance of Lexer.
func NewLexer(src io.Reader) *Lexer {
	sc := &scanner.Scanner{}
	sc.Init(src)
	sc.Mode = 0
	sc.Whitespace = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' '

	return &Lexer{
		src: bufio.NewReader(src),
		sc:  sc,
	}
}

// next reads the next rune from the bufferred reader.  Returns the rune(0) if
// an error occurs (or io.EOF is returned).
func (l *Lexer) next() rune {
	var err error
	l.ch, _, err = l.src.ReadRune()
	if err != nil {
		return eof
	}
	return l.ch
}

// unread places the previously read rune back on the reader.
func (l *Lexer) unread() {
	_ = l.src.UnreadRune()
}

func (l *Lexer) peek() rune {
	prev := l.ch
	peekCh := l.next()
	l.unread()
	l.ch = prev
	return peekCh
}

// Scan scans the next token and returns the token and it's literal string.
func (l *Lexer) Scan() (tok Token, lit string) {
	ch := l.next()

	// skip white space
	for isWhitespace(ch) {
		ch = l.next()
	}

	// identifier
	if isLetter(ch) {
		return l.scanIdentifier()
	}

	switch ch {
	case eof:
		return EOF, ""
	}

	return 0, ""
}

func (l *Lexer) scanIdentifier() (Token, string) {
	// Create a buffer and read the current character into it.
	var buf bytes.Buffer

	// write current character before we move to the next
	buf.WriteRune(l.ch)

	return 0, ""

}

// Pos returns the position of the character immediately after the character or
// token returned by the last call to Next or Scan.
func (l *Lexer) Pos() Position {
	return Position{}
}

// isSpace reports whether r is a space character.
func isSpace(r rune) bool {
	return r == ' ' || r == '\t'
}

// isEndOfLine reports whether r is an end-of-line character.
func isEndOfLine(r rune) bool {
	return r == '\r' || r == '\n'
}

func isLetter(ch rune) bool {
	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
}

func isDigit(ch rune) bool {
	return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
}

// isWhitespace returns true if the rune is a space, tab, newline or carriage return
func isWhitespace(ch rune) bool {
	return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
}
parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`package parser`

			`import (`
			`"bufio"`
lexer: various changes, trying text/scanner 2015-10-03 16:45:57 +00:00			`"bytes"`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`"io"`
lexer: various changes, trying text/scanner 2015-10-03 16:45:57 +00:00			`"text/scanner"`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`"unicode"`
			`)`

			`// eof represents a marker rune for the end of the reader.`
			`const eof = rune(0)`

			`// Lexer defines a lexical scanner`
			`type Lexer struct {`
lexer: various changes, trying text/scanner 2015-10-03 16:45:57 +00:00			`src *bufio.Reader // input`
			`ch rune // current character`
			`sc *scanner.Scanner`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00
			`// Start position of most recently scanned token; set by Scan.`
			`// Calling Init or Next invalidates the position (Line == 0).`
			`// The Filename field is always left untouched by the Scanner.`
			`// If an error is reported (via Error) and Position is invalid,`
			`// the scanner is not inside a token. Call Pos to obtain an error`
			`// position in that case.`
			`Position`
			`}`

			`// NewLexer returns a new instance of Lexer.`
lexer: various changes, trying text/scanner 2015-10-03 16:45:57 +00:00			`func NewLexer(src io.Reader) *Lexer {`
			`sc := &scanner.Scanner{}`
			`sc.Init(src)`
			`sc.Mode = 0`
			`sc.Whitespace = 1<<'\t' \| 1<<'\n' \| 1<<'\r' \| 1<<' '`

parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`return &Lexer{`
lexer: various changes, trying text/scanner 2015-10-03 16:45:57 +00:00			`src: bufio.NewReader(src),`
			`sc: sc,`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`}`
			`}`

			`// next reads the next rune from the bufferred reader. Returns the rune(0) if`
			`// an error occurs (or io.EOF is returned).`
			`func (l *Lexer) next() rune {`
lexer: various changes, trying text/scanner 2015-10-03 16:45:57 +00:00			`var err error`
			`l.ch, _, err = l.src.ReadRune()`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`if err != nil {`
			`return eof`
			`}`
lexer: various changes, trying text/scanner 2015-10-03 16:45:57 +00:00			`return l.ch`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`}`

			`// unread places the previously read rune back on the reader.`
lexer: various changes, trying text/scanner 2015-10-03 16:45:57 +00:00			`func (l *Lexer) unread() {`
			`_ = l.src.UnreadRune()`
			`}`

			`func (l *Lexer) peek() rune {`
			`prev := l.ch`
			`peekCh := l.next()`
			`l.unread()`
			`l.ch = prev`
			`return peekCh`
			`}`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00
			`// Scan scans the next token and returns the token and it's literal string.`
			`func (l *Lexer) Scan() (tok Token, lit string) {`
			`ch := l.next()`

lexer: various changes, trying text/scanner 2015-10-03 16:45:57 +00:00			`// skip white space`
			`for isWhitespace(ch) {`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`ch = l.next()`
			`}`

lexer: various changes, trying text/scanner 2015-10-03 16:45:57 +00:00			`// identifier`
			`if isLetter(ch) {`
			`return l.scanIdentifier()`
			`}`

			`switch ch {`
			`case eof:`
			`return EOF, ""`
			`}`

parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`return 0, ""`
			`}`

lexer: various changes, trying text/scanner 2015-10-03 16:45:57 +00:00			`func (l *Lexer) scanIdentifier() (Token, string) {`
			`// Create a buffer and read the current character into it.`
			`var buf bytes.Buffer`

			`// write current character before we move to the next`
			`buf.WriteRune(l.ch)`

			`return 0, ""`

parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`}`

			`// Pos returns the position of the character immediately after the character or`
			`// token returned by the last call to Next or Scan.`
			`func (l *Lexer) Pos() Position {`
			`return Position{}`
			`}`

			`// isSpace reports whether r is a space character.`
			`func isSpace(r rune) bool {`
			`return r == ' ' \|\| r == '\t'`
			`}`

			`// isEndOfLine reports whether r is an end-of-line character.`
			`func isEndOfLine(r rune) bool {`
			`return r == '\r' \|\| r == '\n'`
			`}`

			`func isLetter(ch rune) bool {`
			`return 'a' <= ch && ch <= 'z' \|\| 'A' <= ch && ch <= 'Z' \|\| ch == '_' \|\| ch >= 0x80 && unicode.IsLetter(ch)`
			`}`

			`func isDigit(ch rune) bool {`
			`return '0' <= ch && ch <= '9' \|\| ch >= 0x80 && unicode.IsDigit(ch)`
			`}`

			`// isWhitespace returns true if the rune is a space, tab, newline or carriage return`
			`func isWhitespace(ch rune) bool {`
			`return ch == ' ' \|\| ch == '\t' \|\| ch == '\n' \|\| ch == '\r'`
			`}`