hcl/scanner/scanner.go

package scanner

import (
	"bytes"
	"fmt"
	"io"
	"io/ioutil"
	"os"
	"unicode"

	"github.com/fatih/hcl/token"
)

// eof represents a marker rune for the end of the reader.
const eof = rune(0)

// Scanner defines a lexical scanner
type Scanner struct {
	src      *bytes.Buffer
	srcBytes []byte

	lastCharLen int // length of last character in bytes

	currPos Position // current position
	prevPos Position // previous position

	tokBuf bytes.Buffer // token text buffer
	tokPos int          // token text tail position (srcBuf index); valid if >= 0
	tokEnd int          // token text tail end (srcBuf index)

	// Error is called for each error encountered. If no Error
	// function is set, the error is reported to os.Stderr.
	Error func(pos Position, msg string)

	// ErrorCount is incremented by one for each error encountered.
	ErrorCount int
}

// NewScanner returns a new instance of Lexer. Even though src is an io.Reader,
// we fully consume the content.
func NewScanner(src io.Reader) (*Scanner, error) {
	buf, err := ioutil.ReadAll(src)
	if err != nil {
		return nil, err
	}

	b := bytes.NewBuffer(buf)
	return &Scanner{
		src:      b,
		srcBytes: b.Bytes(),
	}, nil
}

// next reads the next rune from the bufferred reader. Returns the rune(0) if
// an error occurs (or io.EOF is returned).
func (s *Scanner) next() rune {
	ch, size, err := s.src.ReadRune()
	if err != nil {
		return eof
	}

	// remember last position
	s.prevPos = s.currPos

	s.lastCharLen = size
	s.currPos.Offset += size
	s.currPos.Column += size

	if ch == '\n' {
		s.currPos.Line++
		s.currPos.Column = 0
	}

	return ch
}

func (s *Scanner) unread() {
	if err := s.src.UnreadRune(); err != nil {
		panic(err) // this is user fault, we should catch it
	}
	s.currPos = s.prevPos // put back last position
}

func (s *Scanner) peek() rune {
	peek, _, err := s.src.ReadRune()
	if err != nil {
		return eof
	}

	s.src.UnreadRune()
	return peek
}

// Scan scans the next token and returns the token.
func (s *Scanner) Scan() (tok token.Token) {
	ch := s.next()

	// skip white space
	for isWhitespace(ch) {
		ch = s.next()
	}

	// start the token position
	s.tokBuf.Reset()
	s.tokPos = s.currPos.Offset - s.lastCharLen

	switch {
	case isLetter(ch):
		tok = token.IDENT
		lit := s.scanIdentifier()
		if lit == "true" || lit == "false" {
			tok = token.BOOL
		}
	case isDecimal(ch):
		tok = s.scanNumber(ch)
	default:
		switch ch {
		case eof:
			tok = token.EOF
		case '"':
			tok = token.STRING
			s.scanString()
		case '#', '/':
			tok = token.COMMENT
			s.scanComment(ch)
		case '.':
			tok = token.PERIOD
			ch = s.peek()
			if isDecimal(ch) {
				tok = token.FLOAT
				ch = s.scanMantissa(ch)
				ch = s.scanExponent(ch)
			}
		case '[':
			tok = token.LBRACK
		case ']':
			tok = token.RBRACK
		case '{':
			tok = token.LBRACE
		case '}':
			tok = token.RBRACE
		case ',':
			tok = token.COMMA
		case '=':
			tok = token.ASSIGN
		case '+':
			tok = token.ADD
		case '-':
			tok = token.SUB
		}
	}

	s.tokEnd = s.currPos.Offset
	return tok
}

func (s *Scanner) scanComment(ch rune) {
	// look for /* - style comments
	if ch == '/' && s.peek() == '*' {
		for {
			if ch < 0 {
				s.err("comment not terminated")
				break
			}

			ch0 := ch
			ch = s.next()
			if ch0 == '*' && ch == '/' {
				break
			}
		}
	}

	// single line comments
	if ch == '#' || ch == '/' {
		ch = s.next()
		for ch != '\n' && ch >= 0 {
			ch = s.next()
		}
		s.unread()
		return
	}
}

// scanNumber scans a HCL number definition starting with the given rune
func (s *Scanner) scanNumber(ch rune) token.Token {
	if ch == '0' {
		// check for hexadecimal, octal or float
		ch = s.next()
		if ch == 'x' || ch == 'X' {
			// hexadecimal
			ch = s.next()
			found := false
			for isHexadecimal(ch) {
				ch = s.next()
				found = true
			}
			s.unread()

			if !found {
				s.err("illegal hexadecimal number")
			}

			return token.NUMBER
		}

		// now it's either something like: 0421(octal) or 0.1231(float)
		illegalOctal := false
		for isDecimal(ch) {
			ch = s.next()
			if ch == '8' || ch == '9' {
				// this is just a possibility. For example 0159 is illegal, but
				// 0159.23 is valid. So we mark a possible illegal octal. If
				// the next character is not a period, we'll print the error.
				illegalOctal = true

			}

		}
		s.unread()

		if ch == '.' || ch == 'e' || ch == 'E' {
			ch = s.next()
			ch = s.scanFraction(ch)
			ch = s.scanExponent(ch)
			return token.FLOAT
		}

		if illegalOctal {
			s.err("illegal octal number")
		}

		return token.NUMBER
	}

	ch = s.scanMantissa(ch)
	if ch == '.' || ch == 'e' || ch == 'E' {
		ch = s.next() // seek forward
		ch = s.scanFraction(ch)
		ch = s.scanExponent(ch)
		return token.FLOAT
	}
	return token.NUMBER
}

// scanMantissa scans the mantissa begining from the rune. It returns the next
// non decimal rune. It's used to determine wheter it's a fraction or exponent.
func (s *Scanner) scanMantissa(ch rune) rune {
	scanned := false
	for isDecimal(ch) {
		ch = s.next()
		scanned = true
	}

	if scanned {
		s.unread()
	}
	return ch
}

func (s *Scanner) scanFraction(ch rune) rune {
	if ch == '.' {
		ch = s.peek() // we peek just to see if we can move forward
		ch = s.scanMantissa(ch)
	}
	return ch
}

func (s *Scanner) scanExponent(ch rune) rune {
	if ch == 'e' || ch == 'E' {
		ch = s.next()
		if ch == '-' || ch == '+' {
			ch = s.next()
		}
		ch = s.scanMantissa(ch)
	}
	return ch
}

// scanString scans a quoted string
func (s *Scanner) scanString() {
	for {
		// '"' opening already consumed
		// read character after quote
		ch := s.next()

		if ch == '\n' || ch < 0 || ch == eof {
			s.err("literal not terminated")
			return
		}

		if ch == '"' {
			break
		}

		if ch == '\\' {
			s.scanEscape()
		}
	}

	return
}

// scanEscape scans an escape sequence
func (s *Scanner) scanEscape() rune {
	// http://en.cppreference.com/w/cpp/language/escape
	ch := s.next() // read character after '/'
	switch ch {
	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
		// nothing to do
	case '0', '1', '2', '3', '4', '5', '6', '7':
		// octal notation
		ch = s.scanDigits(ch, 8, 3)
	case 'x':
		// hexademical notation
		ch = s.scanDigits(s.next(), 16, 2)
	case 'u':
		// universal character name
		ch = s.scanDigits(s.next(), 16, 4)
	case 'U':
		// universal character name
		ch = s.scanDigits(s.next(), 16, 8)
	default:
		s.err("illegal char escape")
	}
	return ch
}

// scanDigits scans a rune with the given base for n times. For example an
// octan notation \184 would yield in scanDigits(ch, 8, 3)
func (s *Scanner) scanDigits(ch rune, base, n int) rune {
	for n > 0 && digitVal(ch) < base {
		ch = s.next()
		n--
	}
	if n > 0 {
		s.err("illegal char escape")
	}

	// we scanned all digits, put the last non digit char back
	s.unread()
	return ch
}

// scanIdentifier scans an identifier and returns the literal string
func (s *Scanner) scanIdentifier() string {
	offs := s.currPos.Offset - s.lastCharLen
	ch := s.next()
	for isLetter(ch) || isDigit(ch) {
		ch = s.next()
	}
	s.unread() // we got identifier, put back latest char

	// return string(s.srcBytes[offs:(s.currPos.Offset - s.lastCharLen)])
	return string(s.srcBytes[offs:s.currPos.Offset])
}

// TokenText returns the literal string corresponding to the most recently
// scanned token.
func (s *Scanner) TokenText() string {
	if s.tokPos < 0 {
		// no token text
		return ""
	}

	// part of the token text was saved in tokBuf: save the rest in
	// tokBuf as well and return its content
	s.tokBuf.Write(s.srcBytes[s.tokPos:s.tokEnd])
	s.tokPos = s.tokEnd // ensure idempotency of TokenText() call
	return s.tokBuf.String()
}

// Pos returns the position of the character immediately after the character or
// token returned by the last call to Scan.
func (s *Scanner) Pos() Position {
	return s.currPos
}

func (s *Scanner) err(msg string) {
	s.ErrorCount++
	if s.Error != nil {
		s.Error(s.currPos, msg)
		return
	}

	fmt.Fprintf(os.Stderr, "%s: %s\n", s.currPos, msg)
}

func isLetter(ch rune) bool {
	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
}

func isDigit(ch rune) bool {
	return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
}

func isOctal(ch rune) bool {
	return '0' <= ch && ch <= '7'
}

func isDecimal(ch rune) bool {
	return '0' <= ch && ch <= '9'
}

func isHexadecimal(ch rune) bool {
	return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
}

// isWhitespace returns true if the rune is a space, tab, newline or carriage return
func isWhitespace(ch rune) bool {
	return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
}

func digitVal(ch rune) int {
	switch {
	case '0' <= ch && ch <= '9':
		return int(ch - '0')
	case 'a' <= ch && ch <= 'f':
		return int(ch - 'a' + 10)
	case 'A' <= ch && ch <= 'F':
		return int(ch - 'A' + 10)
	}
	return 16 // larger than any legal digit val
}
hcl: split up package for more control 2015-10-04 17:16:43 +00:00			`package scanner`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00
			`import (`
lexer: various changes, trying text/scanner 2015-10-03 16:45:57 +00:00			`"bytes"`
scanner: implement string scanning 2015-10-04 19:01:10 +00:00			`"fmt"`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`"io"`
lexer: implement positions 2015-10-03 20:50:50 +00:00			`"io/ioutil"`
scanner: implement string scanning 2015-10-04 19:01:10 +00:00			`"os"`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`"unicode"`
scanner: use new hcl/token package 2015-10-04 17:19:39 +00:00
			`"github.com/fatih/hcl/token"`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`)`

			`// eof represents a marker rune for the end of the reader.`
			`const eof = rune(0)`

scanner: small fixes 2015-10-04 17:22:37 +00:00			`// Scanner defines a lexical scanner`
lexer: scanner is more Go idiomatic 2015-10-03 18:25:21 +00:00			`type Scanner struct {`
lexer: implement positions 2015-10-03 20:50:50 +00:00			`src *bytes.Buffer`
			`srcBytes []byte`

lexer: more robust implementation 2015-10-03 22:29:13 +00:00			`lastCharLen int // length of last character in bytes`

			`currPos Position // current position`
			`prevPos Position // previous position`

			`tokBuf bytes.Buffer // token text buffer`
			`tokPos int // token text tail position (srcBuf index); valid if >= 0`
			`tokEnd int // token text tail end (srcBuf index)`
scanner: implement string scanning 2015-10-04 19:01:10 +00:00
			`// Error is called for each error encountered. If no Error`
			`// function is set, the error is reported to os.Stderr.`
			`Error func(pos Position, msg string)`

			`// ErrorCount is incremented by one for each error encountered.`
			`ErrorCount int`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`}`

scanner: small fixes 2015-10-04 17:22:37 +00:00			`// NewScanner returns a new instance of Lexer. Even though src is an io.Reader,`
lexer: implement positions 2015-10-03 20:50:50 +00:00			`// we fully consume the content.`
scanner: small fixes 2015-10-04 17:22:37 +00:00			`func NewScanner(src io.Reader) (*Scanner, error) {`
lexer: implement positions 2015-10-03 20:50:50 +00:00			`buf, err := ioutil.ReadAll(src)`
			`if err != nil {`
			`return nil, err`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`}`
lexer: implement positions 2015-10-03 20:50:50 +00:00
			`b := bytes.NewBuffer(buf)`
			`return &Scanner{`
			`src: b,`
			`srcBytes: b.Bytes(),`
			`}, nil`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`}`

lexer: back to the roots 2015-10-03 17:32:27 +00:00			`// next reads the next rune from the bufferred reader. Returns the rune(0) if`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`// an error occurs (or io.EOF is returned).`
lexer: scanner is more Go idiomatic 2015-10-03 18:25:21 +00:00			`func (s *Scanner) next() rune {`
lexer: more robust implementation 2015-10-03 22:29:13 +00:00			`ch, size, err := s.src.ReadRune()`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`if err != nil {`
			`return eof`
			`}`
lexer: back to the roots 2015-10-03 17:32:27 +00:00
lexer: more robust implementation 2015-10-03 22:29:13 +00:00			`// remember last position`
			`s.prevPos = s.currPos`

lexer: implement positions 2015-10-03 20:50:50 +00:00			`s.lastCharLen = size`
lexer: more robust implementation 2015-10-03 22:29:13 +00:00			`s.currPos.Offset += size`
			`s.currPos.Column += size`

			`if ch == '\n' {`
			`s.currPos.Line++`
			`s.currPos.Column = 0`
			`}`

			`return ch`
			`}`

			`func (s *Scanner) unread() {`
			`if err := s.src.UnreadRune(); err != nil {`
			`panic(err) // this is user fault, we should catch it`
			`}`
			`s.currPos = s.prevPos // put back last position`
			`}`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00
lexer: more robust implementation 2015-10-03 22:29:13 +00:00			`func (s *Scanner) peek() rune {`
			`peek, _, err := s.src.ReadRune()`
			`if err != nil {`
			`return eof`
lexer: implement positions 2015-10-03 20:50:50 +00:00			`}`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00
lexer: more robust implementation 2015-10-03 22:29:13 +00:00			`s.src.UnreadRune()`
			`return peek`
lexer: add peek() method 2015-10-03 17:33:51 +00:00			`}`

parser: more idiomatic call 2015-10-03 22:35:29 +00:00			`// Scan scans the next token and returns the token.`
scanner: use new hcl/token package 2015-10-04 17:19:39 +00:00			`func (s *Scanner) Scan() (tok token.Token) {`
lexer: scanner is more Go idiomatic 2015-10-03 18:25:21 +00:00			`ch := s.next()`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00
lexer: various changes, trying text/scanner 2015-10-03 16:45:57 +00:00			`// skip white space`
			`for isWhitespace(ch) {`
lexer: scanner is more Go idiomatic 2015-10-03 18:25:21 +00:00			`ch = s.next()`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`}`

lexer: implement positions 2015-10-03 20:50:50 +00:00			`// start the token position`
			`s.tokBuf.Reset()`
lexer: more robust implementation 2015-10-03 22:29:13 +00:00			`s.tokPos = s.currPos.Offset - s.lastCharLen`
lexer: implement positions 2015-10-03 20:50:50 +00:00
scanner: organize Scan() so it's easier to read 2015-10-04 19:53:20 +00:00			`switch {`
			`case isLetter(ch):`
scanner: use new hcl/token package 2015-10-04 17:19:39 +00:00			`tok = token.IDENT`
parser: more idiomatic call 2015-10-03 22:35:29 +00:00			`lit := s.scanIdentifier()`
lexer: more robust implementation 2015-10-03 22:29:13 +00:00			`if lit == "true" \|\| lit == "false" {`
scanner: use new hcl/token package 2015-10-04 17:19:39 +00:00			`tok = token.BOOL`
lexer: more robust implementation 2015-10-03 22:29:13 +00:00			`}`
scanner: initial number lexing 2015-10-04 20:21:34 +00:00			`case isDecimal(ch):`
			`tok = s.scanNumber(ch)`
scanner: organize Scan() so it's easier to read 2015-10-04 19:53:20 +00:00			`default:`
			`switch ch {`
			`case eof:`
			`tok = token.EOF`
			`case '"':`
			`tok = token.STRING`
			`s.scanString()`
scanner: // style comments are implemented too 2015-10-05 10:26:18 +00:00			`case '#', '/':`
scanner: # style line comment scanning implemented 2015-10-05 10:24:38 +00:00			`tok = token.COMMENT`
			`s.scanComment(ch)`
scanner: parse floats in form of .9 , .123 2015-10-05 09:31:26 +00:00			`case '.':`
scanner: implement comments 2015-10-05 10:36:28 +00:00			`tok = token.PERIOD`
scanner: implement remaning tokens 2015-10-05 10:12:48 +00:00			`ch = s.peek()`
scanner: parse floats in form of .9 , .123 2015-10-05 09:31:26 +00:00			`if isDecimal(ch) {`
			`tok = token.FLOAT`
			`ch = s.scanMantissa(ch)`
			`ch = s.scanExponent(ch)`
			`}`
scanner: implement remaning tokens 2015-10-05 10:12:48 +00:00			`case '[':`
			`tok = token.LBRACK`
			`case ']':`
			`tok = token.RBRACK`
			`case '{':`
			`tok = token.LBRACE`
			`case '}':`
			`tok = token.RBRACE`
			`case ',':`
			`tok = token.COMMA`
			`case '=':`
			`tok = token.ASSIGN`
			`case '+':`
			`tok = token.ADD`
			`case '-':`
			`tok = token.SUB`
scanner: organize Scan() so it's easier to read 2015-10-04 19:53:20 +00:00			`}`
lexer: various changes, trying text/scanner 2015-10-03 16:45:57 +00:00			`}`

lexer: more robust implementation 2015-10-03 22:29:13 +00:00			`s.tokEnd = s.currPos.Offset`
parser: more idiomatic call 2015-10-03 22:35:29 +00:00			`return tok`
lexer: implement positions 2015-10-03 20:50:50 +00:00			`}`
lexer: various changes, trying text/scanner 2015-10-03 16:45:57 +00:00
scanner: # style line comment scanning implemented 2015-10-05 10:24:38 +00:00			`func (s *Scanner) scanComment(ch rune) {`
scanner: implement comments 2015-10-05 10:36:28 +00:00			`// look for /* - style comments`
			`if ch == '/' && s.peek() == '*' {`
			`for {`
			`if ch < 0 {`
			`s.err("comment not terminated")`
			`break`
			`}`

			`ch0 := ch`
			`ch = s.next()`
			`if ch0 == '*' && ch == '/' {`
			`break`
			`}`
			`}`
			`}`

			`// single line comments`
scanner: // style comments are implemented too 2015-10-05 10:26:18 +00:00			`if ch == '#' \|\| ch == '/' {`
scanner: # style line comment scanning implemented 2015-10-05 10:24:38 +00:00			`ch = s.next()`
			`for ch != '\n' && ch >= 0 {`
			`ch = s.next()`
			`}`
			`s.unread()`
scanner: implement comments 2015-10-05 10:36:28 +00:00			`return`
scanner: # style line comment scanning implemented 2015-10-05 10:24:38 +00:00			`}`
			`}`

scanner: initial number lexing 2015-10-04 20:21:34 +00:00			`// scanNumber scans a HCL number definition starting with the given rune`
			`func (s *Scanner) scanNumber(ch rune) token.Token {`
			`if ch == '0' {`
scanner: implement parsing octals 2015-10-05 08:56:11 +00:00			`// check for hexadecimal, octal or float`
scnaner: implement scanning hexadecimal numbers 2015-10-04 20:47:06 +00:00			`ch = s.next()`
			`if ch == 'x' \|\| ch == 'X' {`
scanner: implement parsing octals 2015-10-05 08:56:11 +00:00			`// hexadecimal`
scnaner: implement scanning hexadecimal numbers 2015-10-04 20:47:06 +00:00			`ch = s.next()`
scanner: implement parsing octals 2015-10-05 08:56:11 +00:00			`found := false`
			`for isHexadecimal(ch) {`
			`ch = s.next()`
			`found = true`
			`}`
			`s.unread()`

			`if !found {`
			`s.err("illegal hexadecimal number")`
			`}`

scnaner: implement scanning hexadecimal numbers 2015-10-04 20:47:06 +00:00			`return token.NUMBER`
			`}`

scanner: implement parsing octals 2015-10-05 08:56:11 +00:00			`// now it's either something like: 0421(octal) or 0.1231(float)`
			`illegalOctal := false`
scanner: parse a set of fractions 2015-10-05 09:26:22 +00:00			`for isDecimal(ch) {`
scanner: implement parsing octals 2015-10-05 08:56:11 +00:00			`ch = s.next()`
			`if ch == '8' \|\| ch == '9' {`
scanner: parse a set of fractions 2015-10-05 09:26:22 +00:00			`// this is just a possibility. For example 0159 is illegal, but`
scanner: finalize float scanning 2015-10-05 09:59:55 +00:00			`// 0159.23 is valid. So we mark a possible illegal octal. If`
			`// the next character is not a period, we'll print the error.`
scanner: implement parsing octals 2015-10-05 08:56:11 +00:00			`illegalOctal = true`
scanner: parse a set of fractions 2015-10-05 09:26:22 +00:00
scanner: implement parsing octals 2015-10-05 08:56:11 +00:00			`}`
scanner: finalize float scanning 2015-10-05 09:59:55 +00:00
scanner: implement parsing octals 2015-10-05 08:56:11 +00:00			`}`
			`s.unread()`

			`if ch == '.' \|\| ch == 'e' \|\| ch == 'E' {`
scanner: finalize float scanning 2015-10-05 09:59:55 +00:00			`ch = s.next()`
scanner: parse a set of fractions 2015-10-05 09:26:22 +00:00			`ch = s.scanFraction(ch)`
			`ch = s.scanExponent(ch)`
scanner: implement parsing octals 2015-10-05 08:56:11 +00:00			`return token.FLOAT`
			`}`

			`if illegalOctal {`
			`s.err("illegal octal number")`
			`}`

			`return token.NUMBER`
scanner: initial number lexing 2015-10-04 20:21:34 +00:00			`}`

scanner: parse a set of fractions 2015-10-05 09:26:22 +00:00			`ch = s.scanMantissa(ch)`
			`if ch == '.' \|\| ch == 'e' \|\| ch == 'E' {`
scanner: finalize float scanning 2015-10-05 09:59:55 +00:00			`ch = s.next() // seek forward`
scanner: parse a set of fractions 2015-10-05 09:26:22 +00:00			`ch = s.scanFraction(ch)`
			`ch = s.scanExponent(ch)`
			`return token.FLOAT`
			`}`
scanner: initial number lexing 2015-10-04 20:21:34 +00:00			`return token.NUMBER`
			`}`

scanner: finalize float scanning 2015-10-05 09:59:55 +00:00			`// scanMantissa scans the mantissa begining from the rune. It returns the next`
			`// non decimal rune. It's used to determine wheter it's a fraction or exponent.`
			`func (s *Scanner) scanMantissa(ch rune) rune {`
			`scanned := false`
			`for isDecimal(ch) {`
			`ch = s.next()`
			`scanned = true`
			`}`

			`if scanned {`
			`s.unread()`
			`}`
			`return ch`
			`}`

scanner: parse a set of fractions 2015-10-05 09:26:22 +00:00			`func (s *Scanner) scanFraction(ch rune) rune {`
			`if ch == '.' {`
scanner: peek instead of next. 2015-10-05 10:03:46 +00:00			`ch = s.peek() // we peek just to see if we can move forward`
			`ch = s.scanMantissa(ch)`
scanner: parse a set of fractions 2015-10-05 09:26:22 +00:00			`}`
			`return ch`
			`}`

			`func (s *Scanner) scanExponent(ch rune) rune {`
			`if ch == 'e' \|\| ch == 'E' {`
			`ch = s.next()`
			`if ch == '-' \|\| ch == '+' {`
			`ch = s.next()`
			`}`
			`ch = s.scanMantissa(ch)`
			`}`
			`return ch`
			`}`

scanner: reuse tests code 2015-10-04 19:17:59 +00:00			`// scanString scans a quoted string`
lexer: scan strings 2015-10-03 21:20:26 +00:00			`func (s *Scanner) scanString() {`
scanner: implement string scanning 2015-10-04 19:01:10 +00:00			`for {`
			`// '"' opening already consumed`
			`// read character after quote`
			`ch := s.next()`

			`if ch == '\n' \|\| ch < 0 \|\| ch == eof {`
			`s.err("literal not terminated")`
lexer: scan strings 2015-10-03 21:20:26 +00:00			`return`
			`}`

scanner: implement string scanning 2015-10-04 19:01:10 +00:00			`if ch == '"' {`
			`break`
			`}`

lexer: scan strings 2015-10-03 21:20:26 +00:00			`if ch == '\\' {`
scanner: implement string scanning 2015-10-04 19:01:10 +00:00			`s.scanEscape()`
lexer: scan strings 2015-10-03 21:20:26 +00:00			`}`
			`}`

			`return`
			`}`

scanner: implement string scanning 2015-10-04 19:01:10 +00:00			`// scanEscape scans an escape sequence`
			`func (s *Scanner) scanEscape() rune {`
			`// http://en.cppreference.com/w/cpp/language/escape`
			`ch := s.next() // read character after '/'`
			`switch ch {`
			`case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':`
			`// nothing to do`
			`case '0', '1', '2', '3', '4', '5', '6', '7':`
			`// octal notation`
			`ch = s.scanDigits(ch, 8, 3)`
			`case 'x':`
			`// hexademical notation`
			`ch = s.scanDigits(s.next(), 16, 2)`
			`case 'u':`
			`// universal character name`
			`ch = s.scanDigits(s.next(), 16, 4)`
			`case 'U':`
			`// universal character name`
			`ch = s.scanDigits(s.next(), 16, 8)`
			`default:`
			`s.err("illegal char escape")`
			`}`
			`return ch`
			`}`

			`// scanDigits scans a rune with the given base for n times. For example an`
			`// octan notation \184 would yield in scanDigits(ch, 8, 3)`
			`func (s *Scanner) scanDigits(ch rune, base, n int) rune {`
			`for n > 0 && digitVal(ch) < base {`
			`ch = s.next()`
			`n--`
			`}`
			`if n > 0 {`
			`s.err("illegal char escape")`
			`}`

			`// we scanned all digits, put the last non digit char back`
			`s.unread()`
			`return ch`
			`}`

			`// scanIdentifier scans an identifier and returns the literal string`
lexer: more robust implementation 2015-10-03 22:29:13 +00:00			`func (s *Scanner) scanIdentifier() string {`
			`offs := s.currPos.Offset - s.lastCharLen`
			`ch := s.next()`
			`for isLetter(ch) \|\| isDigit(ch) {`
			`ch = s.next()`
parser: add scanning ident test 2015-10-03 18:06:30 +00:00			`}`
lexer: more robust implementation 2015-10-03 22:29:13 +00:00			`s.unread() // we got identifier, put back latest char`

			`// return string(s.srcBytes[offs:(s.currPos.Offset - s.lastCharLen)])`
			`return string(s.srcBytes[offs:s.currPos.Offset])`
lexer: implement positions 2015-10-03 20:50:50 +00:00			`}`

parser: more idiomatic call 2015-10-03 22:35:29 +00:00			`// TokenText returns the literal string corresponding to the most recently`
lexer: implement positions 2015-10-03 20:50:50 +00:00			`// scanned token.`
parser: more idiomatic call 2015-10-03 22:35:29 +00:00			`func (s *Scanner) TokenText() string {`
lexer: implement positions 2015-10-03 20:50:50 +00:00			`if s.tokPos < 0 {`
			`// no token text`
			`return ""`
			`}`
lexer: various changes, trying text/scanner 2015-10-03 16:45:57 +00:00
lexer: implement positions 2015-10-03 20:50:50 +00:00			`// part of the token text was saved in tokBuf: save the rest in`
			`// tokBuf as well and return its content`
			`s.tokBuf.Write(s.srcBytes[s.tokPos:s.tokEnd])`
			`s.tokPos = s.tokEnd // ensure idempotency of TokenText() call`
			`return s.tokBuf.String()`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`}`

			`// Pos returns the position of the character immediately after the character or`
lexer: fix Position() call 2015-10-03 22:32:45 +00:00			`// token returned by the last call to Scan.`
lexer: scanner is more Go idiomatic 2015-10-03 18:25:21 +00:00			`func (s *Scanner) Pos() Position {`
lexer: fix Position() call 2015-10-03 22:32:45 +00:00			`return s.currPos`
parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`}`

scanner: implement string scanning 2015-10-04 19:01:10 +00:00			`func (s *Scanner) err(msg string) {`
			`s.ErrorCount++`
			`if s.Error != nil {`
			`s.Error(s.currPos, msg)`
			`return`
			`}`

			`fmt.Fprintf(os.Stderr, "%s: %s\n", s.currPos, msg)`
			`}`

parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`func isLetter(ch rune) bool {`
			`return 'a' <= ch && ch <= 'z' \|\| 'A' <= ch && ch <= 'Z' \|\| ch == '_' \|\| ch >= 0x80 && unicode.IsLetter(ch)`
			`}`

			`func isDigit(ch rune) bool {`
			`return '0' <= ch && ch <= '9' \|\| ch >= 0x80 && unicode.IsDigit(ch)`
			`}`

scanner: implement parsing octals 2015-10-05 08:56:11 +00:00			`func isOctal(ch rune) bool {`
			`return '0' <= ch && ch <= '7'`
			`}`

scanner: initial number lexing 2015-10-04 20:21:34 +00:00			`func isDecimal(ch rune) bool {`
			`return '0' <= ch && ch <= '9'`
			`}`

scnaner: implement scanning hexadecimal numbers 2015-10-04 20:47:06 +00:00			`func isHexadecimal(ch rune) bool {`
			`return '0' <= ch && ch <= '9' \|\| 'a' <= ch && ch <= 'f' \|\| 'A' <= ch && ch <= 'F'`
			`}`

parser: initial lexer next method 2015-10-03 14:08:09 +00:00			`// isWhitespace returns true if the rune is a space, tab, newline or carriage return`
			`func isWhitespace(ch rune) bool {`
			`return ch == ' ' \|\| ch == '\t' \|\| ch == '\n' \|\| ch == '\r'`
			`}`
scanner: implement string scanning 2015-10-04 19:01:10 +00:00
			`func digitVal(ch rune) int {`
			`switch {`
			`case '0' <= ch && ch <= '9':`
			`return int(ch - '0')`
			`case 'a' <= ch && ch <= 'f':`
			`return int(ch - 'a' + 10)`
			`case 'A' <= ch && ch <= 'F':`
			`return int(ch - 'A' + 10)`
			`}`
			`return 16 // larger than any legal digit val`
			`}`