scanner: implement string scanning
This commit is contained in:
parent
94bd4afe4d
commit
1f011b4e82
@ -2,9 +2,10 @@ package scanner
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"log"
|
"os"
|
||||||
"unicode"
|
"unicode"
|
||||||
|
|
||||||
"github.com/fatih/hcl/token"
|
"github.com/fatih/hcl/token"
|
||||||
@ -26,6 +27,13 @@ type Scanner struct {
|
|||||||
tokBuf bytes.Buffer // token text buffer
|
tokBuf bytes.Buffer // token text buffer
|
||||||
tokPos int // token text tail position (srcBuf index); valid if >= 0
|
tokPos int // token text tail position (srcBuf index); valid if >= 0
|
||||||
tokEnd int // token text tail end (srcBuf index)
|
tokEnd int // token text tail end (srcBuf index)
|
||||||
|
|
||||||
|
// Error is called for each error encountered. If no Error
|
||||||
|
// function is set, the error is reported to os.Stderr.
|
||||||
|
Error func(pos Position, msg string)
|
||||||
|
|
||||||
|
// ErrorCount is incremented by one for each error encountered.
|
||||||
|
ErrorCount int
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewScanner returns a new instance of Lexer. Even though src is an io.Reader,
|
// NewScanner returns a new instance of Lexer. Even though src is an io.Reader,
|
||||||
@ -122,25 +130,70 @@ func (s *Scanner) Scan() (tok token.Token) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *Scanner) scanString() {
|
func (s *Scanner) scanString() {
|
||||||
// '"' opening already consumed
|
for {
|
||||||
ch := s.next() // read character after quote
|
// '"' opening already consumed
|
||||||
for ch != '"' {
|
// read character after quote
|
||||||
if ch == '\n' || ch < 0 {
|
ch := s.next()
|
||||||
log.Println("[ERROR] literal not terminated")
|
|
||||||
|
if ch == '\n' || ch < 0 || ch == eof {
|
||||||
|
s.err("literal not terminated")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ch == '"' {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
if ch == '\\' {
|
if ch == '\\' {
|
||||||
// scanEscape
|
s.scanEscape()
|
||||||
return
|
|
||||||
} else {
|
|
||||||
ch = s.next()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// scanEscape scans an escape sequence
|
||||||
|
func (s *Scanner) scanEscape() rune {
|
||||||
|
// http://en.cppreference.com/w/cpp/language/escape
|
||||||
|
ch := s.next() // read character after '/'
|
||||||
|
switch ch {
|
||||||
|
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
|
||||||
|
// nothing to do
|
||||||
|
case '0', '1', '2', '3', '4', '5', '6', '7':
|
||||||
|
// octal notation
|
||||||
|
ch = s.scanDigits(ch, 8, 3)
|
||||||
|
case 'x':
|
||||||
|
// hexademical notation
|
||||||
|
ch = s.scanDigits(s.next(), 16, 2)
|
||||||
|
case 'u':
|
||||||
|
// universal character name
|
||||||
|
ch = s.scanDigits(s.next(), 16, 4)
|
||||||
|
case 'U':
|
||||||
|
// universal character name
|
||||||
|
ch = s.scanDigits(s.next(), 16, 8)
|
||||||
|
default:
|
||||||
|
s.err("illegal char escape")
|
||||||
|
}
|
||||||
|
return ch
|
||||||
|
}
|
||||||
|
|
||||||
|
// scanDigits scans a rune with the given base for n times. For example an
|
||||||
|
// octan notation \184 would yield in scanDigits(ch, 8, 3)
|
||||||
|
func (s *Scanner) scanDigits(ch rune, base, n int) rune {
|
||||||
|
for n > 0 && digitVal(ch) < base {
|
||||||
|
ch = s.next()
|
||||||
|
n--
|
||||||
|
}
|
||||||
|
if n > 0 {
|
||||||
|
s.err("illegal char escape")
|
||||||
|
}
|
||||||
|
|
||||||
|
// we scanned all digits, put the last non digit char back
|
||||||
|
s.unread()
|
||||||
|
return ch
|
||||||
|
}
|
||||||
|
|
||||||
|
// scanIdentifier scans an identifier and returns the literal string
|
||||||
func (s *Scanner) scanIdentifier() string {
|
func (s *Scanner) scanIdentifier() string {
|
||||||
offs := s.currPos.Offset - s.lastCharLen
|
offs := s.currPos.Offset - s.lastCharLen
|
||||||
ch := s.next()
|
ch := s.next()
|
||||||
@ -174,6 +227,16 @@ func (s *Scanner) Pos() Position {
|
|||||||
return s.currPos
|
return s.currPos
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *Scanner) err(msg string) {
|
||||||
|
s.ErrorCount++
|
||||||
|
if s.Error != nil {
|
||||||
|
s.Error(s.currPos, msg)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(os.Stderr, "%s: %s\n", s.currPos, msg)
|
||||||
|
}
|
||||||
|
|
||||||
func isLetter(ch rune) bool {
|
func isLetter(ch rune) bool {
|
||||||
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
|
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
|
||||||
}
|
}
|
||||||
@ -186,3 +249,15 @@ func isDigit(ch rune) bool {
|
|||||||
func isWhitespace(ch rune) bool {
|
func isWhitespace(ch rune) bool {
|
||||||
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
|
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func digitVal(ch rune) int {
|
||||||
|
switch {
|
||||||
|
case '0' <= ch && ch <= '9':
|
||||||
|
return int(ch - '0')
|
||||||
|
case 'a' <= ch && ch <= 'f':
|
||||||
|
return int(ch - 'a' + 10)
|
||||||
|
case 'A' <= ch && ch <= 'F':
|
||||||
|
return int(ch - 'A' + 10)
|
||||||
|
}
|
||||||
|
return 16 // larger than any legal digit val
|
||||||
|
}
|
||||||
|
@ -94,23 +94,23 @@ func TestString(t *testing.T) {
|
|||||||
{token.STRING, `" "`},
|
{token.STRING, `" "`},
|
||||||
{token.STRING, `"a"`},
|
{token.STRING, `"a"`},
|
||||||
{token.STRING, `"本"`},
|
{token.STRING, `"本"`},
|
||||||
// {STRING, `"\a"`},
|
{token.STRING, `"\a"`},
|
||||||
// {STRING, `"\b"`},
|
{token.STRING, `"\b"`},
|
||||||
// {STRING, `"\f"`},
|
{token.STRING, `"\f"`},
|
||||||
// {STRING, `"\n"`},
|
{token.STRING, `"\n"`},
|
||||||
// {STRING, `"\r"`},
|
{token.STRING, `"\r"`},
|
||||||
// {STRING, `"\t"`},
|
{token.STRING, `"\t"`},
|
||||||
// {STRING, `"\v"`},
|
{token.STRING, `"\v"`},
|
||||||
// {STRING, `"\""`},
|
{token.STRING, `"\""`},
|
||||||
// {STRING, `"\000"`},
|
{token.STRING, `"\000"`},
|
||||||
// {STRING, `"\777"`},
|
{token.STRING, `"\777"`},
|
||||||
// {STRING, `"\x00"`},
|
{token.STRING, `"\x00"`},
|
||||||
// {STRING, `"\xff"`},
|
{token.STRING, `"\xff"`},
|
||||||
// {STRING, `"\u0000"`},
|
{token.STRING, `"\u0000"`},
|
||||||
// {STRING, `"\ufA16"`},
|
{token.STRING, `"\ufA16"`},
|
||||||
// {STRING, `"\U00000000"`},
|
{token.STRING, `"\U00000000"`},
|
||||||
// {STRING, `"\U0000ffAB"`},
|
{token.STRING, `"\U0000ffAB"`},
|
||||||
// {STRING, `"` + f100 + `"`},
|
{token.STRING, `"` + f100 + `"`},
|
||||||
}
|
}
|
||||||
|
|
||||||
// create artifical source code
|
// create artifical source code
|
||||||
|
Loading…
Reference in New Issue
Block a user