hcl/parser/lexer.go
2015-10-04 01:32:45 +03:00

188 lines
3.9 KiB
Go

package parser
import (
"bytes"
"io"
"io/ioutil"
"log"
"unicode"
)
// eof represents a marker rune for the end of the reader.
const eof = rune(0)
// Lexer defines a lexical scanner
type Scanner struct {
src *bytes.Buffer
srcBytes []byte
// ch rune // current character
lastCharLen int // length of last character in bytes
currPos Position // current position
prevPos Position // previous position
tokBuf bytes.Buffer // token text buffer
tokPos int // token text tail position (srcBuf index); valid if >= 0
tokEnd int // token text tail end (srcBuf index)
}
// NewLexer returns a new instance of Lexer. Even though src is an io.Reader,
// we fully consume the content.
func NewLexer(src io.Reader) (*Scanner, error) {
buf, err := ioutil.ReadAll(src)
if err != nil {
return nil, err
}
b := bytes.NewBuffer(buf)
return &Scanner{
src: b,
srcBytes: b.Bytes(),
}, nil
}
// next reads the next rune from the bufferred reader. Returns the rune(0) if
// an error occurs (or io.EOF is returned).
func (s *Scanner) next() rune {
ch, size, err := s.src.ReadRune()
if err != nil {
return eof
}
// remember last position
s.prevPos = s.currPos
s.lastCharLen = size
s.currPos.Offset += size
s.currPos.Column += size
if ch == '\n' {
s.currPos.Line++
s.currPos.Column = 0
}
return ch
}
func (s *Scanner) unread() {
if err := s.src.UnreadRune(); err != nil {
panic(err) // this is user fault, we should catch it
}
s.currPos = s.prevPos // put back last position
}
func (s *Scanner) peek() rune {
peek, _, err := s.src.ReadRune()
if err != nil {
return eof
}
s.src.UnreadRune()
return peek
}
// Scan scans the next token and returns the token and it's literal string.
func (s *Scanner) Scan() (tok Token, lit string) {
ch := s.next()
// skip white space
for isWhitespace(ch) {
ch = s.next()
}
// start the token position
s.tokBuf.Reset()
s.tokPos = s.currPos.Offset - s.lastCharLen
if isLetter(ch) {
tok = IDENT
lit = s.scanIdentifier()
if lit == "true" || lit == "false" {
tok = BOOL
}
}
if isDigit(ch) {
// scanDigits()
// TODO(arslan)
}
switch ch {
case eof:
tok = EOF
case '"':
tok = STRING
s.scanString()
}
s.tokEnd = s.currPos.Offset
return tok, s.TokenLiteral()
}
func (s *Scanner) scanString() {
// '"' opening already consumed
ch := s.next() // read character after quote
for ch != '"' {
if ch == '\n' || ch < 0 {
log.Println("[ERROR] literal not terminated")
return
}
if ch == '\\' {
// scanEscape
return
} else {
ch = s.next()
}
}
return
}
func (s *Scanner) scanIdentifier() string {
offs := s.currPos.Offset - s.lastCharLen
ch := s.next()
for isLetter(ch) || isDigit(ch) {
ch = s.next()
}
s.unread() // we got identifier, put back latest char
// return string(s.srcBytes[offs:(s.currPos.Offset - s.lastCharLen)])
return string(s.srcBytes[offs:s.currPos.Offset])
}
// TokenLiteral returns the literal string corresponding to the most recently
// scanned token.
func (s *Scanner) TokenLiteral() string {
if s.tokPos < 0 {
// no token text
return ""
}
// part of the token text was saved in tokBuf: save the rest in
// tokBuf as well and return its content
s.tokBuf.Write(s.srcBytes[s.tokPos:s.tokEnd])
s.tokPos = s.tokEnd // ensure idempotency of TokenText() call
return s.tokBuf.String()
}
// Pos returns the position of the character immediately after the character or
// token returned by the last call to Scan.
func (s *Scanner) Pos() Position {
return s.currPos
}
func isLetter(ch rune) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
}
func isDigit(ch rune) bool {
return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
}
// isWhitespace returns true if the rune is a space, tab, newline or carriage return
func isWhitespace(ch rune) bool {
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
}