2015-10-04 17:16:43 +00:00
|
|
|
package scanner
|
2015-10-03 14:08:09 +00:00
|
|
|
|
|
|
|
import (
|
2015-10-03 16:45:57 +00:00
|
|
|
"bytes"
|
2015-10-04 19:01:10 +00:00
|
|
|
"fmt"
|
2015-10-03 14:08:09 +00:00
|
|
|
"io"
|
2015-10-03 20:50:50 +00:00
|
|
|
"io/ioutil"
|
2015-10-04 19:01:10 +00:00
|
|
|
"os"
|
2015-10-03 14:08:09 +00:00
|
|
|
"unicode"
|
2015-10-04 17:19:39 +00:00
|
|
|
|
|
|
|
"github.com/fatih/hcl/token"
|
2015-10-03 14:08:09 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// eof represents a marker rune for the end of the reader.
|
|
|
|
const eof = rune(0)
|
|
|
|
|
2015-10-04 17:22:37 +00:00
|
|
|
// Scanner defines a lexical scanner
|
2015-10-03 18:25:21 +00:00
|
|
|
type Scanner struct {
|
2015-10-03 20:50:50 +00:00
|
|
|
src *bytes.Buffer
|
|
|
|
srcBytes []byte
|
|
|
|
|
2015-10-03 22:29:13 +00:00
|
|
|
lastCharLen int // length of last character in bytes
|
|
|
|
|
|
|
|
currPos Position // current position
|
|
|
|
prevPos Position // previous position
|
|
|
|
|
|
|
|
tokBuf bytes.Buffer // token text buffer
|
|
|
|
tokPos int // token text tail position (srcBuf index); valid if >= 0
|
|
|
|
tokEnd int // token text tail end (srcBuf index)
|
2015-10-04 19:01:10 +00:00
|
|
|
|
|
|
|
// Error is called for each error encountered. If no Error
|
|
|
|
// function is set, the error is reported to os.Stderr.
|
|
|
|
Error func(pos Position, msg string)
|
|
|
|
|
|
|
|
// ErrorCount is incremented by one for each error encountered.
|
|
|
|
ErrorCount int
|
2015-10-03 14:08:09 +00:00
|
|
|
}
|
|
|
|
|
2015-10-04 17:22:37 +00:00
|
|
|
// NewScanner returns a new instance of Lexer. Even though src is an io.Reader,
|
2015-10-03 20:50:50 +00:00
|
|
|
// we fully consume the content.
|
2015-10-04 17:22:37 +00:00
|
|
|
func NewScanner(src io.Reader) (*Scanner, error) {
|
2015-10-03 20:50:50 +00:00
|
|
|
buf, err := ioutil.ReadAll(src)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2015-10-03 14:08:09 +00:00
|
|
|
}
|
2015-10-03 20:50:50 +00:00
|
|
|
|
|
|
|
b := bytes.NewBuffer(buf)
|
|
|
|
return &Scanner{
|
|
|
|
src: b,
|
|
|
|
srcBytes: b.Bytes(),
|
|
|
|
}, nil
|
2015-10-03 14:08:09 +00:00
|
|
|
}
|
|
|
|
|
2015-10-03 17:32:27 +00:00
|
|
|
// next reads the next rune from the bufferred reader. Returns the rune(0) if
|
2015-10-03 14:08:09 +00:00
|
|
|
// an error occurs (or io.EOF is returned).
|
2015-10-03 18:25:21 +00:00
|
|
|
func (s *Scanner) next() rune {
|
2015-10-03 22:29:13 +00:00
|
|
|
ch, size, err := s.src.ReadRune()
|
2015-10-03 14:08:09 +00:00
|
|
|
if err != nil {
|
|
|
|
return eof
|
|
|
|
}
|
2015-10-03 17:32:27 +00:00
|
|
|
|
2015-10-03 22:29:13 +00:00
|
|
|
// remember last position
|
|
|
|
s.prevPos = s.currPos
|
|
|
|
|
2015-10-03 20:50:50 +00:00
|
|
|
s.lastCharLen = size
|
2015-10-03 22:29:13 +00:00
|
|
|
s.currPos.Offset += size
|
|
|
|
s.currPos.Column += size
|
|
|
|
|
|
|
|
if ch == '\n' {
|
|
|
|
s.currPos.Line++
|
|
|
|
s.currPos.Column = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
return ch
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Scanner) unread() {
|
|
|
|
if err := s.src.UnreadRune(); err != nil {
|
|
|
|
panic(err) // this is user fault, we should catch it
|
|
|
|
}
|
|
|
|
s.currPos = s.prevPos // put back last position
|
|
|
|
}
|
2015-10-03 14:08:09 +00:00
|
|
|
|
2015-10-03 22:29:13 +00:00
|
|
|
func (s *Scanner) peek() rune {
|
|
|
|
peek, _, err := s.src.ReadRune()
|
|
|
|
if err != nil {
|
|
|
|
return eof
|
2015-10-03 20:50:50 +00:00
|
|
|
}
|
2015-10-03 14:08:09 +00:00
|
|
|
|
2015-10-03 22:29:13 +00:00
|
|
|
s.src.UnreadRune()
|
|
|
|
return peek
|
2015-10-03 17:33:51 +00:00
|
|
|
}
|
|
|
|
|
2015-10-03 22:35:29 +00:00
|
|
|
// Scan scans the next token and returns the token.
|
2015-10-04 17:19:39 +00:00
|
|
|
func (s *Scanner) Scan() (tok token.Token) {
|
2015-10-03 18:25:21 +00:00
|
|
|
ch := s.next()
|
2015-10-03 14:08:09 +00:00
|
|
|
|
2015-10-03 16:45:57 +00:00
|
|
|
// skip white space
|
|
|
|
for isWhitespace(ch) {
|
2015-10-03 18:25:21 +00:00
|
|
|
ch = s.next()
|
2015-10-03 14:08:09 +00:00
|
|
|
}
|
|
|
|
|
2015-10-03 20:50:50 +00:00
|
|
|
// start the token position
|
|
|
|
s.tokBuf.Reset()
|
2015-10-03 22:29:13 +00:00
|
|
|
s.tokPos = s.currPos.Offset - s.lastCharLen
|
2015-10-03 20:50:50 +00:00
|
|
|
|
2015-10-04 19:53:20 +00:00
|
|
|
switch {
|
|
|
|
case isLetter(ch):
|
2015-10-04 17:19:39 +00:00
|
|
|
tok = token.IDENT
|
2015-10-03 22:35:29 +00:00
|
|
|
lit := s.scanIdentifier()
|
2015-10-03 22:29:13 +00:00
|
|
|
if lit == "true" || lit == "false" {
|
2015-10-04 17:19:39 +00:00
|
|
|
tok = token.BOOL
|
2015-10-03 22:29:13 +00:00
|
|
|
}
|
2015-10-03 20:50:50 +00:00
|
|
|
|
2015-10-04 20:21:34 +00:00
|
|
|
case isDecimal(ch):
|
|
|
|
tok = s.scanNumber(ch)
|
2015-10-04 19:53:20 +00:00
|
|
|
default:
|
|
|
|
switch ch {
|
|
|
|
case eof:
|
|
|
|
tok = token.EOF
|
|
|
|
case '"':
|
|
|
|
tok = token.STRING
|
|
|
|
s.scanString()
|
|
|
|
}
|
2015-10-03 16:45:57 +00:00
|
|
|
}
|
|
|
|
|
2015-10-03 22:29:13 +00:00
|
|
|
s.tokEnd = s.currPos.Offset
|
2015-10-03 22:35:29 +00:00
|
|
|
return tok
|
2015-10-03 20:50:50 +00:00
|
|
|
}
|
2015-10-03 16:45:57 +00:00
|
|
|
|
2015-10-04 20:21:34 +00:00
|
|
|
// scanNumber scans a HCL number definition starting with the given rune
|
|
|
|
func (s *Scanner) scanNumber(ch rune) token.Token {
|
|
|
|
if ch == '0' {
|
|
|
|
// check hexadecimal or float
|
|
|
|
// ch = s.next()
|
|
|
|
// return token.ILLEGAL
|
|
|
|
}
|
|
|
|
|
|
|
|
s.scanMantissa(ch)
|
|
|
|
return token.NUMBER
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Scanner) scanMantissa(ch rune) {
|
|
|
|
for isDecimal(ch) {
|
|
|
|
ch = s.next()
|
|
|
|
}
|
|
|
|
s.unread()
|
2015-10-04 19:17:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// scanString scans a quoted string
|
2015-10-03 21:20:26 +00:00
|
|
|
func (s *Scanner) scanString() {
|
2015-10-04 19:01:10 +00:00
|
|
|
for {
|
|
|
|
// '"' opening already consumed
|
|
|
|
// read character after quote
|
|
|
|
ch := s.next()
|
|
|
|
|
|
|
|
if ch == '\n' || ch < 0 || ch == eof {
|
|
|
|
s.err("literal not terminated")
|
2015-10-03 21:20:26 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2015-10-04 19:01:10 +00:00
|
|
|
if ch == '"' {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
2015-10-03 21:20:26 +00:00
|
|
|
if ch == '\\' {
|
2015-10-04 19:01:10 +00:00
|
|
|
s.scanEscape()
|
2015-10-03 21:20:26 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2015-10-04 19:01:10 +00:00
|
|
|
// scanEscape scans an escape sequence
|
|
|
|
func (s *Scanner) scanEscape() rune {
|
|
|
|
// http://en.cppreference.com/w/cpp/language/escape
|
|
|
|
ch := s.next() // read character after '/'
|
|
|
|
switch ch {
|
|
|
|
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
|
|
|
|
// nothing to do
|
|
|
|
case '0', '1', '2', '3', '4', '5', '6', '7':
|
|
|
|
// octal notation
|
|
|
|
ch = s.scanDigits(ch, 8, 3)
|
|
|
|
case 'x':
|
|
|
|
// hexademical notation
|
|
|
|
ch = s.scanDigits(s.next(), 16, 2)
|
|
|
|
case 'u':
|
|
|
|
// universal character name
|
|
|
|
ch = s.scanDigits(s.next(), 16, 4)
|
|
|
|
case 'U':
|
|
|
|
// universal character name
|
|
|
|
ch = s.scanDigits(s.next(), 16, 8)
|
|
|
|
default:
|
|
|
|
s.err("illegal char escape")
|
|
|
|
}
|
|
|
|
return ch
|
|
|
|
}
|
|
|
|
|
|
|
|
// scanDigits scans a rune with the given base for n times. For example an
|
|
|
|
// octan notation \184 would yield in scanDigits(ch, 8, 3)
|
|
|
|
func (s *Scanner) scanDigits(ch rune, base, n int) rune {
|
|
|
|
for n > 0 && digitVal(ch) < base {
|
|
|
|
ch = s.next()
|
|
|
|
n--
|
|
|
|
}
|
|
|
|
if n > 0 {
|
|
|
|
s.err("illegal char escape")
|
|
|
|
}
|
|
|
|
|
|
|
|
// we scanned all digits, put the last non digit char back
|
|
|
|
s.unread()
|
|
|
|
return ch
|
|
|
|
}
|
|
|
|
|
|
|
|
// scanIdentifier scans an identifier and returns the literal string
|
2015-10-03 22:29:13 +00:00
|
|
|
func (s *Scanner) scanIdentifier() string {
|
|
|
|
offs := s.currPos.Offset - s.lastCharLen
|
|
|
|
ch := s.next()
|
|
|
|
for isLetter(ch) || isDigit(ch) {
|
|
|
|
ch = s.next()
|
2015-10-03 18:06:30 +00:00
|
|
|
}
|
2015-10-03 22:29:13 +00:00
|
|
|
s.unread() // we got identifier, put back latest char
|
|
|
|
|
|
|
|
// return string(s.srcBytes[offs:(s.currPos.Offset - s.lastCharLen)])
|
|
|
|
return string(s.srcBytes[offs:s.currPos.Offset])
|
2015-10-03 20:50:50 +00:00
|
|
|
}
|
|
|
|
|
2015-10-03 22:35:29 +00:00
|
|
|
// TokenText returns the literal string corresponding to the most recently
|
2015-10-03 20:50:50 +00:00
|
|
|
// scanned token.
|
2015-10-03 22:35:29 +00:00
|
|
|
func (s *Scanner) TokenText() string {
|
2015-10-03 20:50:50 +00:00
|
|
|
if s.tokPos < 0 {
|
|
|
|
// no token text
|
|
|
|
return ""
|
|
|
|
}
|
2015-10-03 16:45:57 +00:00
|
|
|
|
2015-10-03 20:50:50 +00:00
|
|
|
// part of the token text was saved in tokBuf: save the rest in
|
|
|
|
// tokBuf as well and return its content
|
|
|
|
s.tokBuf.Write(s.srcBytes[s.tokPos:s.tokEnd])
|
|
|
|
s.tokPos = s.tokEnd // ensure idempotency of TokenText() call
|
|
|
|
return s.tokBuf.String()
|
2015-10-03 14:08:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Pos returns the position of the character immediately after the character or
|
2015-10-03 22:32:45 +00:00
|
|
|
// token returned by the last call to Scan.
|
2015-10-03 18:25:21 +00:00
|
|
|
func (s *Scanner) Pos() Position {
|
2015-10-03 22:32:45 +00:00
|
|
|
return s.currPos
|
2015-10-03 14:08:09 +00:00
|
|
|
}
|
|
|
|
|
2015-10-04 19:01:10 +00:00
|
|
|
func (s *Scanner) err(msg string) {
|
|
|
|
s.ErrorCount++
|
|
|
|
if s.Error != nil {
|
|
|
|
s.Error(s.currPos, msg)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
fmt.Fprintf(os.Stderr, "%s: %s\n", s.currPos, msg)
|
|
|
|
}
|
|
|
|
|
2015-10-03 14:08:09 +00:00
|
|
|
func isLetter(ch rune) bool {
|
|
|
|
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
|
|
|
|
}
|
|
|
|
|
|
|
|
func isDigit(ch rune) bool {
|
|
|
|
return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
|
|
|
|
}
|
|
|
|
|
2015-10-04 20:21:34 +00:00
|
|
|
func isDecimal(ch rune) bool {
|
|
|
|
return '0' <= ch && ch <= '9'
|
|
|
|
}
|
|
|
|
|
2015-10-03 14:08:09 +00:00
|
|
|
// isWhitespace returns true if the rune is a space, tab, newline or carriage return
|
|
|
|
func isWhitespace(ch rune) bool {
|
|
|
|
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
|
|
|
|
}
|
2015-10-04 19:01:10 +00:00
|
|
|
|
|
|
|
func digitVal(ch rune) int {
|
|
|
|
switch {
|
|
|
|
case '0' <= ch && ch <= '9':
|
|
|
|
return int(ch - '0')
|
|
|
|
case 'a' <= ch && ch <= 'f':
|
|
|
|
return int(ch - 'a' + 10)
|
|
|
|
case 'A' <= ch && ch <= 'F':
|
|
|
|
return int(ch - 'A' + 10)
|
|
|
|
}
|
|
|
|
return 16 // larger than any legal digit val
|
|
|
|
}
|