2015-10-06 16:53:56 +00:00
|
|
|
// Package scanner implements a scanner for HCL (HashiCorp Configuration
|
|
|
|
// Language) source text.
|
2015-10-04 17:16:43 +00:00
|
|
|
package scanner
|
2015-10-03 14:08:09 +00:00
|
|
|
|
|
|
|
import (
|
2015-10-03 16:45:57 +00:00
|
|
|
"bytes"
|
2015-10-04 19:01:10 +00:00
|
|
|
"fmt"
|
|
|
|
"os"
|
2015-10-03 14:08:09 +00:00
|
|
|
"unicode"
|
2015-10-05 15:48:26 +00:00
|
|
|
"unicode/utf8"
|
2015-10-03 14:08:09 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// eof represents a marker rune for the end of the reader.
|
|
|
|
const eof = rune(0)
|
|
|
|
|
2015-10-04 17:22:37 +00:00
|
|
|
// Scanner defines a lexical scanner
|
2015-10-03 18:25:21 +00:00
|
|
|
type Scanner struct {
|
2015-10-06 16:59:12 +00:00
|
|
|
buf *bytes.Buffer // Source buffer for advancing and scanning
|
|
|
|
src []byte // Source buffer for immutable access
|
2015-10-03 22:29:13 +00:00
|
|
|
|
2015-10-05 14:34:45 +00:00
|
|
|
// Source Position
|
2015-10-07 22:38:39 +00:00
|
|
|
srcPos Pos // current position
|
|
|
|
prevPos Pos // previous position, used for peek() method
|
2015-10-03 22:29:13 +00:00
|
|
|
|
2015-10-05 14:34:45 +00:00
|
|
|
lastCharLen int // length of last character in bytes
|
|
|
|
lastLineLen int // length of last line in characters (for correct column reporting)
|
|
|
|
|
2015-10-07 12:04:34 +00:00
|
|
|
tokStart int // token text start position
|
|
|
|
tokEnd int // token text end position
|
2015-10-04 19:01:10 +00:00
|
|
|
|
|
|
|
// Error is called for each error encountered. If no Error
|
|
|
|
// function is set, the error is reported to os.Stderr.
|
2015-10-07 22:38:39 +00:00
|
|
|
Error func(pos Pos, msg string)
|
2015-10-04 19:01:10 +00:00
|
|
|
|
|
|
|
// ErrorCount is incremented by one for each error encountered.
|
|
|
|
ErrorCount int
|
2015-10-05 14:34:45 +00:00
|
|
|
|
2015-10-06 16:53:56 +00:00
|
|
|
// tokPos is the start position of most recently scanned token; set by
|
|
|
|
// Scan. The Filename field is always left untouched by the Scanner. If
|
|
|
|
// an error is reported (via Error) and Position is invalid, the scanner is
|
|
|
|
// not inside a token.
|
2015-10-07 22:38:39 +00:00
|
|
|
tokPos Pos
|
2015-10-03 14:08:09 +00:00
|
|
|
}
|
|
|
|
|
2015-10-11 21:20:17 +00:00
|
|
|
// New creates and initializes a new instance of Scanner using src as
|
2015-10-07 09:11:52 +00:00
|
|
|
// its source content.
|
2015-10-11 21:20:17 +00:00
|
|
|
func New(src []byte) *Scanner {
|
2015-10-06 16:59:12 +00:00
|
|
|
// even though we accept a src, we read from a io.Reader compatible type
|
|
|
|
// (*bytes.Buffer). So in the future we might easily change it to streaming
|
|
|
|
// read.
|
2015-10-06 16:53:56 +00:00
|
|
|
b := bytes.NewBuffer(src)
|
2015-10-05 14:34:45 +00:00
|
|
|
s := &Scanner{
|
2015-10-06 16:59:12 +00:00
|
|
|
buf: b,
|
|
|
|
src: src,
|
2015-10-05 14:34:45 +00:00
|
|
|
}
|
|
|
|
|
2015-10-05 14:43:29 +00:00
|
|
|
// srcPosition always starts with 1
|
|
|
|
s.srcPos.Line = 1
|
2015-10-06 16:53:56 +00:00
|
|
|
return s
|
2015-10-03 14:08:09 +00:00
|
|
|
}
|
|
|
|
|
2015-10-03 17:32:27 +00:00
|
|
|
// next reads the next rune from the bufferred reader. Returns the rune(0) if
|
2015-10-03 14:08:09 +00:00
|
|
|
// an error occurs (or io.EOF is returned).
|
2015-10-03 18:25:21 +00:00
|
|
|
func (s *Scanner) next() rune {
|
2015-10-06 16:59:12 +00:00
|
|
|
ch, size, err := s.buf.ReadRune()
|
2015-10-03 14:08:09 +00:00
|
|
|
if err != nil {
|
2015-10-05 22:11:02 +00:00
|
|
|
// advance for error reporting
|
|
|
|
s.srcPos.Column++
|
|
|
|
s.srcPos.Offset += size
|
|
|
|
s.lastCharLen = size
|
2015-10-03 14:08:09 +00:00
|
|
|
return eof
|
|
|
|
}
|
2015-10-03 17:32:27 +00:00
|
|
|
|
2015-10-05 15:48:26 +00:00
|
|
|
if ch == utf8.RuneError && size == 1 {
|
|
|
|
s.srcPos.Column++
|
|
|
|
s.srcPos.Offset += size
|
2015-10-05 22:11:02 +00:00
|
|
|
s.lastCharLen = size
|
2015-10-05 15:48:26 +00:00
|
|
|
s.err("illegal UTF-8 encoding")
|
2015-10-05 22:11:02 +00:00
|
|
|
return ch
|
2015-10-05 15:48:26 +00:00
|
|
|
}
|
|
|
|
|
2015-10-03 22:29:13 +00:00
|
|
|
// remember last position
|
2015-10-05 14:34:45 +00:00
|
|
|
s.prevPos = s.srcPos
|
2015-10-03 22:29:13 +00:00
|
|
|
|
2015-10-05 15:18:09 +00:00
|
|
|
s.srcPos.Column++
|
2015-10-05 22:11:02 +00:00
|
|
|
s.lastCharLen = size
|
|
|
|
s.srcPos.Offset += size
|
|
|
|
|
2015-10-03 22:29:13 +00:00
|
|
|
if ch == '\n' {
|
2015-10-05 14:34:45 +00:00
|
|
|
s.srcPos.Line++
|
|
|
|
s.lastLineLen = s.srcPos.Column
|
2015-10-05 22:11:02 +00:00
|
|
|
s.srcPos.Column = 0
|
2015-10-03 22:29:13 +00:00
|
|
|
}
|
|
|
|
|
2015-10-05 15:18:09 +00:00
|
|
|
// debug
|
2015-10-05 22:11:02 +00:00
|
|
|
// fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
|
2015-10-03 22:29:13 +00:00
|
|
|
return ch
|
|
|
|
}
|
|
|
|
|
2015-10-07 09:11:52 +00:00
|
|
|
// unread unreads the previous read Rune and updates the source position
|
2015-10-03 22:29:13 +00:00
|
|
|
func (s *Scanner) unread() {
|
2015-10-06 16:59:12 +00:00
|
|
|
if err := s.buf.UnreadRune(); err != nil {
|
2015-10-03 22:29:13 +00:00
|
|
|
panic(err) // this is user fault, we should catch it
|
|
|
|
}
|
2015-10-05 14:34:45 +00:00
|
|
|
s.srcPos = s.prevPos // put back last position
|
2015-10-03 22:29:13 +00:00
|
|
|
}
|
2015-10-03 14:08:09 +00:00
|
|
|
|
2015-10-06 16:53:56 +00:00
|
|
|
// peek returns the next rune without advancing the reader.
|
2015-10-03 22:29:13 +00:00
|
|
|
func (s *Scanner) peek() rune {
|
2015-10-06 16:59:12 +00:00
|
|
|
peek, _, err := s.buf.ReadRune()
|
2015-10-03 22:29:13 +00:00
|
|
|
if err != nil {
|
|
|
|
return eof
|
2015-10-03 20:50:50 +00:00
|
|
|
}
|
2015-10-03 14:08:09 +00:00
|
|
|
|
2015-10-06 16:59:12 +00:00
|
|
|
s.buf.UnreadRune()
|
2015-10-03 22:29:13 +00:00
|
|
|
return peek
|
2015-10-03 17:33:51 +00:00
|
|
|
}
|
|
|
|
|
2015-10-03 22:35:29 +00:00
|
|
|
// Scan scans the next token and returns the token.
|
2015-10-07 09:20:35 +00:00
|
|
|
func (s *Scanner) Scan() Token {
|
2015-10-05 15:18:09 +00:00
|
|
|
ch := s.next()
|
2015-10-03 14:08:09 +00:00
|
|
|
|
2015-10-03 16:45:57 +00:00
|
|
|
// skip white space
|
|
|
|
for isWhitespace(ch) {
|
2015-10-03 18:25:21 +00:00
|
|
|
ch = s.next()
|
2015-10-03 14:08:09 +00:00
|
|
|
}
|
|
|
|
|
2015-10-07 09:20:35 +00:00
|
|
|
var tok TokenType
|
2015-10-07 09:11:52 +00:00
|
|
|
|
2015-10-05 14:34:45 +00:00
|
|
|
// token text markings
|
|
|
|
s.tokStart = s.srcPos.Offset - s.lastCharLen
|
|
|
|
|
2015-10-05 15:18:09 +00:00
|
|
|
// token position, initial next() is moving the offset by one(size of rune
|
|
|
|
// actually), though we are interested with the starting point
|
2015-10-06 16:03:32 +00:00
|
|
|
s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen
|
2015-10-05 14:34:45 +00:00
|
|
|
if s.srcPos.Column > 0 {
|
|
|
|
// common case: last character was not a '\n'
|
2015-10-06 16:03:32 +00:00
|
|
|
s.tokPos.Line = s.srcPos.Line
|
|
|
|
s.tokPos.Column = s.srcPos.Column
|
2015-10-05 14:34:45 +00:00
|
|
|
} else {
|
|
|
|
// last character was a '\n'
|
|
|
|
// (we cannot be at the beginning of the source
|
|
|
|
// since we have called next() at least once)
|
2015-10-06 16:03:32 +00:00
|
|
|
s.tokPos.Line = s.srcPos.Line - 1
|
|
|
|
s.tokPos.Column = s.lastLineLen
|
2015-10-05 14:34:45 +00:00
|
|
|
}
|
2015-10-03 20:50:50 +00:00
|
|
|
|
2015-10-04 19:53:20 +00:00
|
|
|
switch {
|
|
|
|
case isLetter(ch):
|
2015-10-07 09:20:35 +00:00
|
|
|
tok = IDENT
|
2015-10-03 22:35:29 +00:00
|
|
|
lit := s.scanIdentifier()
|
2015-10-03 22:29:13 +00:00
|
|
|
if lit == "true" || lit == "false" {
|
2015-10-07 09:20:35 +00:00
|
|
|
tok = BOOL
|
2015-10-03 22:29:13 +00:00
|
|
|
}
|
2015-10-04 20:21:34 +00:00
|
|
|
case isDecimal(ch):
|
|
|
|
tok = s.scanNumber(ch)
|
2015-10-04 19:53:20 +00:00
|
|
|
default:
|
|
|
|
switch ch {
|
|
|
|
case eof:
|
2015-10-07 09:20:35 +00:00
|
|
|
tok = EOF
|
2015-10-04 19:53:20 +00:00
|
|
|
case '"':
|
2015-10-07 09:20:35 +00:00
|
|
|
tok = STRING
|
2015-10-04 19:53:20 +00:00
|
|
|
s.scanString()
|
2015-10-05 10:26:18 +00:00
|
|
|
case '#', '/':
|
2015-10-07 09:20:35 +00:00
|
|
|
tok = COMMENT
|
2015-10-05 10:24:38 +00:00
|
|
|
s.scanComment(ch)
|
2015-10-05 09:31:26 +00:00
|
|
|
case '.':
|
2015-10-07 09:20:35 +00:00
|
|
|
tok = PERIOD
|
2015-10-05 10:12:48 +00:00
|
|
|
ch = s.peek()
|
2015-10-05 09:31:26 +00:00
|
|
|
if isDecimal(ch) {
|
2015-10-07 09:20:35 +00:00
|
|
|
tok = FLOAT
|
2015-10-05 09:31:26 +00:00
|
|
|
ch = s.scanMantissa(ch)
|
|
|
|
ch = s.scanExponent(ch)
|
|
|
|
}
|
2015-10-05 10:12:48 +00:00
|
|
|
case '[':
|
2015-10-07 09:20:35 +00:00
|
|
|
tok = LBRACK
|
2015-10-05 10:12:48 +00:00
|
|
|
case ']':
|
2015-10-07 09:20:35 +00:00
|
|
|
tok = RBRACK
|
2015-10-05 10:12:48 +00:00
|
|
|
case '{':
|
2015-10-07 09:20:35 +00:00
|
|
|
tok = LBRACE
|
2015-10-05 10:12:48 +00:00
|
|
|
case '}':
|
2015-10-07 09:20:35 +00:00
|
|
|
tok = RBRACE
|
2015-10-05 10:12:48 +00:00
|
|
|
case ',':
|
2015-10-07 09:20:35 +00:00
|
|
|
tok = COMMA
|
2015-10-05 10:12:48 +00:00
|
|
|
case '=':
|
2015-10-07 09:20:35 +00:00
|
|
|
tok = ASSIGN
|
2015-10-05 10:12:48 +00:00
|
|
|
case '+':
|
2015-10-07 09:20:35 +00:00
|
|
|
tok = ADD
|
2015-10-05 10:12:48 +00:00
|
|
|
case '-':
|
2015-10-07 09:20:35 +00:00
|
|
|
tok = SUB
|
2015-10-05 22:11:02 +00:00
|
|
|
default:
|
|
|
|
s.err("illegal char")
|
2015-10-04 19:53:20 +00:00
|
|
|
}
|
2015-10-03 16:45:57 +00:00
|
|
|
}
|
|
|
|
|
2015-10-07 12:04:34 +00:00
|
|
|
// finish token ending
|
2015-10-05 14:34:45 +00:00
|
|
|
s.tokEnd = s.srcPos.Offset
|
2015-10-07 09:20:35 +00:00
|
|
|
|
2015-10-07 12:04:34 +00:00
|
|
|
// create token literal
|
|
|
|
var tokenText string
|
|
|
|
if s.tokStart >= 0 {
|
|
|
|
tokenText = string(s.src[s.tokStart:s.tokEnd])
|
|
|
|
}
|
|
|
|
s.tokStart = s.tokEnd // ensure idempotency of tokenText() call
|
|
|
|
|
2015-10-07 09:20:35 +00:00
|
|
|
return Token{
|
2015-10-11 23:27:43 +00:00
|
|
|
Type: tok,
|
|
|
|
Pos: s.tokPos,
|
|
|
|
Text: tokenText,
|
2015-10-06 16:53:56 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-05 10:24:38 +00:00
|
|
|
func (s *Scanner) scanComment(ch rune) {
|
2015-10-05 10:36:28 +00:00
|
|
|
// single line comments
|
2015-10-05 22:11:02 +00:00
|
|
|
if ch == '#' || (ch == '/' && s.peek() != '*') {
|
2015-10-05 10:24:38 +00:00
|
|
|
ch = s.next()
|
|
|
|
for ch != '\n' && ch >= 0 {
|
|
|
|
ch = s.next()
|
|
|
|
}
|
|
|
|
s.unread()
|
2015-10-05 10:36:28 +00:00
|
|
|
return
|
2015-10-05 10:24:38 +00:00
|
|
|
}
|
2015-10-05 22:11:02 +00:00
|
|
|
|
|
|
|
// be sure we get the character after /* This allows us to find comment's
|
|
|
|
// that are not erminated
|
|
|
|
if ch == '/' {
|
|
|
|
s.next()
|
|
|
|
ch = s.next() // read character after "/*"
|
|
|
|
}
|
|
|
|
|
|
|
|
// look for /* - style comments
|
|
|
|
for {
|
|
|
|
if ch < 0 || ch == eof {
|
|
|
|
s.err("comment not terminated")
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
ch0 := ch
|
|
|
|
ch = s.next()
|
|
|
|
if ch0 == '*' && ch == '/' {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
2015-10-05 10:24:38 +00:00
|
|
|
}
|
|
|
|
|
2015-10-04 20:21:34 +00:00
|
|
|
// scanNumber scans a HCL number definition starting with the given rune
|
2015-10-07 09:20:35 +00:00
|
|
|
func (s *Scanner) scanNumber(ch rune) TokenType {
|
2015-10-04 20:21:34 +00:00
|
|
|
if ch == '0' {
|
2015-10-05 08:56:11 +00:00
|
|
|
// check for hexadecimal, octal or float
|
2015-10-04 20:47:06 +00:00
|
|
|
ch = s.next()
|
|
|
|
if ch == 'x' || ch == 'X' {
|
2015-10-05 08:56:11 +00:00
|
|
|
// hexadecimal
|
2015-10-04 20:47:06 +00:00
|
|
|
ch = s.next()
|
2015-10-05 08:56:11 +00:00
|
|
|
found := false
|
|
|
|
for isHexadecimal(ch) {
|
|
|
|
ch = s.next()
|
|
|
|
found = true
|
|
|
|
}
|
|
|
|
|
|
|
|
if !found {
|
|
|
|
s.err("illegal hexadecimal number")
|
|
|
|
}
|
|
|
|
|
2015-10-05 22:11:02 +00:00
|
|
|
if ch != eof {
|
|
|
|
s.unread()
|
|
|
|
}
|
|
|
|
|
2015-10-07 09:20:35 +00:00
|
|
|
return NUMBER
|
2015-10-04 20:47:06 +00:00
|
|
|
}
|
|
|
|
|
2015-10-05 08:56:11 +00:00
|
|
|
// now it's either something like: 0421(octal) or 0.1231(float)
|
|
|
|
illegalOctal := false
|
2015-10-05 09:26:22 +00:00
|
|
|
for isDecimal(ch) {
|
2015-10-05 08:56:11 +00:00
|
|
|
ch = s.next()
|
|
|
|
if ch == '8' || ch == '9' {
|
2015-10-05 09:26:22 +00:00
|
|
|
// this is just a possibility. For example 0159 is illegal, but
|
2015-10-05 09:59:55 +00:00
|
|
|
// 0159.23 is valid. So we mark a possible illegal octal. If
|
|
|
|
// the next character is not a period, we'll print the error.
|
2015-10-05 08:56:11 +00:00
|
|
|
illegalOctal = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-05 14:34:45 +00:00
|
|
|
// literals of form 01e10 are treates as Numbers in HCL, which differs from Go.
|
|
|
|
if ch == 'e' || ch == 'E' {
|
2015-10-05 09:26:22 +00:00
|
|
|
ch = s.scanExponent(ch)
|
2015-10-07 09:20:35 +00:00
|
|
|
return NUMBER
|
2015-10-05 14:34:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if ch == '.' {
|
|
|
|
ch = s.scanFraction(ch)
|
|
|
|
|
|
|
|
if ch == 'e' || ch == 'E' {
|
|
|
|
ch = s.next()
|
|
|
|
ch = s.scanExponent(ch)
|
|
|
|
}
|
2015-10-07 09:20:35 +00:00
|
|
|
return FLOAT
|
2015-10-05 08:56:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if illegalOctal {
|
|
|
|
s.err("illegal octal number")
|
|
|
|
}
|
|
|
|
|
2015-10-05 22:11:02 +00:00
|
|
|
if ch != eof {
|
|
|
|
s.unread()
|
|
|
|
}
|
2015-10-07 09:20:35 +00:00
|
|
|
return NUMBER
|
2015-10-04 20:21:34 +00:00
|
|
|
}
|
|
|
|
|
2015-10-05 14:43:29 +00:00
|
|
|
s.scanMantissa(ch)
|
|
|
|
ch = s.next() // seek forward
|
2015-10-05 14:34:45 +00:00
|
|
|
// literals of form 1e10 are treates as Numbers in HCL, which differs from Go.
|
|
|
|
if ch == 'e' || ch == 'E' {
|
|
|
|
ch = s.scanExponent(ch)
|
2015-10-07 09:20:35 +00:00
|
|
|
return NUMBER
|
2015-10-05 14:34:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if ch == '.' {
|
2015-10-05 09:26:22 +00:00
|
|
|
ch = s.scanFraction(ch)
|
2015-10-05 14:34:45 +00:00
|
|
|
if ch == 'e' || ch == 'E' {
|
|
|
|
ch = s.next()
|
|
|
|
ch = s.scanExponent(ch)
|
|
|
|
}
|
2015-10-07 09:20:35 +00:00
|
|
|
return FLOAT
|
2015-10-05 09:26:22 +00:00
|
|
|
}
|
2015-10-05 14:43:29 +00:00
|
|
|
|
|
|
|
s.unread()
|
2015-10-07 09:20:35 +00:00
|
|
|
return NUMBER
|
2015-10-04 20:21:34 +00:00
|
|
|
}
|
|
|
|
|
2015-10-05 09:59:55 +00:00
|
|
|
// scanMantissa scans the mantissa begining from the rune. It returns the next
|
|
|
|
// non decimal rune. It's used to determine wheter it's a fraction or exponent.
|
|
|
|
func (s *Scanner) scanMantissa(ch rune) rune {
|
|
|
|
scanned := false
|
|
|
|
for isDecimal(ch) {
|
|
|
|
ch = s.next()
|
|
|
|
scanned = true
|
|
|
|
}
|
|
|
|
|
|
|
|
if scanned {
|
|
|
|
s.unread()
|
|
|
|
}
|
|
|
|
return ch
|
|
|
|
}
|
|
|
|
|
2015-10-06 16:53:56 +00:00
|
|
|
// scanFraction scans the fraction after the '.' rune
|
2015-10-05 09:26:22 +00:00
|
|
|
func (s *Scanner) scanFraction(ch rune) rune {
|
|
|
|
if ch == '.' {
|
2015-10-05 10:03:46 +00:00
|
|
|
ch = s.peek() // we peek just to see if we can move forward
|
|
|
|
ch = s.scanMantissa(ch)
|
2015-10-05 09:26:22 +00:00
|
|
|
}
|
|
|
|
return ch
|
|
|
|
}
|
|
|
|
|
2015-10-06 16:53:56 +00:00
|
|
|
// scanExponent scans the remaining parts of an exponent after the 'e' or 'E'
|
|
|
|
// rune.
|
2015-10-05 09:26:22 +00:00
|
|
|
func (s *Scanner) scanExponent(ch rune) rune {
|
|
|
|
if ch == 'e' || ch == 'E' {
|
|
|
|
ch = s.next()
|
|
|
|
if ch == '-' || ch == '+' {
|
|
|
|
ch = s.next()
|
|
|
|
}
|
|
|
|
ch = s.scanMantissa(ch)
|
|
|
|
}
|
|
|
|
return ch
|
|
|
|
}
|
|
|
|
|
2015-10-04 19:17:59 +00:00
|
|
|
// scanString scans a quoted string
|
2015-10-03 21:20:26 +00:00
|
|
|
func (s *Scanner) scanString() {
|
2015-10-04 19:01:10 +00:00
|
|
|
for {
|
|
|
|
// '"' opening already consumed
|
|
|
|
// read character after quote
|
|
|
|
ch := s.next()
|
|
|
|
|
|
|
|
if ch == '\n' || ch < 0 || ch == eof {
|
|
|
|
s.err("literal not terminated")
|
2015-10-03 21:20:26 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2015-10-04 19:01:10 +00:00
|
|
|
if ch == '"' {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
2015-10-03 21:20:26 +00:00
|
|
|
if ch == '\\' {
|
2015-10-04 19:01:10 +00:00
|
|
|
s.scanEscape()
|
2015-10-03 21:20:26 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2015-10-04 19:01:10 +00:00
|
|
|
// scanEscape scans an escape sequence
|
|
|
|
func (s *Scanner) scanEscape() rune {
|
|
|
|
// http://en.cppreference.com/w/cpp/language/escape
|
|
|
|
ch := s.next() // read character after '/'
|
|
|
|
switch ch {
|
|
|
|
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
|
|
|
|
// nothing to do
|
|
|
|
case '0', '1', '2', '3', '4', '5', '6', '7':
|
|
|
|
// octal notation
|
|
|
|
ch = s.scanDigits(ch, 8, 3)
|
|
|
|
case 'x':
|
|
|
|
// hexademical notation
|
|
|
|
ch = s.scanDigits(s.next(), 16, 2)
|
|
|
|
case 'u':
|
|
|
|
// universal character name
|
|
|
|
ch = s.scanDigits(s.next(), 16, 4)
|
|
|
|
case 'U':
|
|
|
|
// universal character name
|
|
|
|
ch = s.scanDigits(s.next(), 16, 8)
|
|
|
|
default:
|
|
|
|
s.err("illegal char escape")
|
|
|
|
}
|
|
|
|
return ch
|
|
|
|
}
|
|
|
|
|
|
|
|
// scanDigits scans a rune with the given base for n times. For example an
|
|
|
|
// octan notation \184 would yield in scanDigits(ch, 8, 3)
|
|
|
|
func (s *Scanner) scanDigits(ch rune, base, n int) rune {
|
|
|
|
for n > 0 && digitVal(ch) < base {
|
|
|
|
ch = s.next()
|
|
|
|
n--
|
|
|
|
}
|
|
|
|
if n > 0 {
|
|
|
|
s.err("illegal char escape")
|
|
|
|
}
|
|
|
|
|
|
|
|
// we scanned all digits, put the last non digit char back
|
|
|
|
s.unread()
|
|
|
|
return ch
|
|
|
|
}
|
|
|
|
|
|
|
|
// scanIdentifier scans an identifier and returns the literal string
|
2015-10-03 22:29:13 +00:00
|
|
|
func (s *Scanner) scanIdentifier() string {
|
2015-10-05 14:34:45 +00:00
|
|
|
offs := s.srcPos.Offset - s.lastCharLen
|
2015-10-03 22:29:13 +00:00
|
|
|
ch := s.next()
|
|
|
|
for isLetter(ch) || isDigit(ch) {
|
|
|
|
ch = s.next()
|
2015-10-03 18:06:30 +00:00
|
|
|
}
|
2015-10-03 22:29:13 +00:00
|
|
|
s.unread() // we got identifier, put back latest char
|
|
|
|
|
2015-10-06 16:59:12 +00:00
|
|
|
return string(s.src[offs:s.srcPos.Offset])
|
2015-10-03 20:50:50 +00:00
|
|
|
}
|
|
|
|
|
2015-10-06 16:03:32 +00:00
|
|
|
// recentPosition returns the position of the character immediately after the
|
|
|
|
// character or token returned by the last call to Scan.
|
2015-10-07 22:38:39 +00:00
|
|
|
func (s *Scanner) recentPosition() (pos Pos) {
|
2015-10-05 22:11:02 +00:00
|
|
|
pos.Offset = s.srcPos.Offset - s.lastCharLen
|
|
|
|
switch {
|
|
|
|
case s.srcPos.Column > 0:
|
|
|
|
// common case: last character was not a '\n'
|
|
|
|
pos.Line = s.srcPos.Line
|
|
|
|
pos.Column = s.srcPos.Column
|
|
|
|
case s.lastLineLen > 0:
|
|
|
|
// last character was a '\n'
|
|
|
|
// (we cannot be at the beginning of the source
|
|
|
|
// since we have called next() at least once)
|
|
|
|
pos.Line = s.srcPos.Line - 1
|
|
|
|
pos.Column = s.lastLineLen
|
|
|
|
default:
|
|
|
|
// at the beginning of the source
|
|
|
|
pos.Line = 1
|
|
|
|
pos.Column = 1
|
|
|
|
}
|
|
|
|
return
|
2015-10-03 14:08:09 +00:00
|
|
|
}
|
|
|
|
|
2015-10-05 15:48:26 +00:00
|
|
|
// err prints the error of any scanning to s.Error function. If the function is
|
|
|
|
// not defined, by default it prints them to os.Stderr
|
2015-10-04 19:01:10 +00:00
|
|
|
func (s *Scanner) err(msg string) {
|
|
|
|
s.ErrorCount++
|
2015-10-06 16:03:32 +00:00
|
|
|
pos := s.recentPosition()
|
2015-10-05 22:11:02 +00:00
|
|
|
|
2015-10-04 19:01:10 +00:00
|
|
|
if s.Error != nil {
|
2015-10-05 22:11:02 +00:00
|
|
|
s.Error(pos, msg)
|
2015-10-04 19:01:10 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2015-10-05 22:11:02 +00:00
|
|
|
fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
|
2015-10-04 19:01:10 +00:00
|
|
|
}
|
|
|
|
|
2015-10-05 15:48:26 +00:00
|
|
|
// isHexadecimal returns true if the given rune is a letter
|
2015-10-03 14:08:09 +00:00
|
|
|
func isLetter(ch rune) bool {
|
|
|
|
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
|
|
|
|
}
|
|
|
|
|
2015-10-05 15:48:26 +00:00
|
|
|
// isHexadecimal returns true if the given rune is a decimal digit
|
2015-10-03 14:08:09 +00:00
|
|
|
func isDigit(ch rune) bool {
|
|
|
|
return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
|
|
|
|
}
|
|
|
|
|
2015-10-05 15:48:26 +00:00
|
|
|
// isHexadecimal returns true if the given rune is a decimal number
|
2015-10-04 20:21:34 +00:00
|
|
|
func isDecimal(ch rune) bool {
|
|
|
|
return '0' <= ch && ch <= '9'
|
|
|
|
}
|
|
|
|
|
2015-10-05 15:48:26 +00:00
|
|
|
// isHexadecimal returns true if the given rune is an hexadecimal number
|
2015-10-04 20:47:06 +00:00
|
|
|
func isHexadecimal(ch rune) bool {
|
|
|
|
return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
|
|
|
|
}
|
|
|
|
|
2015-10-03 14:08:09 +00:00
|
|
|
// isWhitespace returns true if the rune is a space, tab, newline or carriage return
|
|
|
|
func isWhitespace(ch rune) bool {
|
|
|
|
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
|
|
|
|
}
|
2015-10-04 19:01:10 +00:00
|
|
|
|
2015-10-05 15:48:26 +00:00
|
|
|
// digitVal returns the integer value of a given octal,decimal or hexadecimal rune
|
2015-10-04 19:01:10 +00:00
|
|
|
func digitVal(ch rune) int {
|
|
|
|
switch {
|
|
|
|
case '0' <= ch && ch <= '9':
|
|
|
|
return int(ch - '0')
|
|
|
|
case 'a' <= ch && ch <= 'f':
|
|
|
|
return int(ch - 'a' + 10)
|
|
|
|
case 'A' <= ch && ch <= 'F':
|
|
|
|
return int(ch - 'A' + 10)
|
|
|
|
}
|
|
|
|
return 16 // larger than any legal digit val
|
|
|
|
}
|