scanner: implement string scanning
This commit is contained in:
parent
94bd4afe4d
commit
1f011b4e82
@ -2,9 +2,10 @@ package scanner
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"unicode"
|
||||
|
||||
"github.com/fatih/hcl/token"
|
||||
@ -26,6 +27,13 @@ type Scanner struct {
|
||||
tokBuf bytes.Buffer // token text buffer
|
||||
tokPos int // token text tail position (srcBuf index); valid if >= 0
|
||||
tokEnd int // token text tail end (srcBuf index)
|
||||
|
||||
// Error is called for each error encountered. If no Error
|
||||
// function is set, the error is reported to os.Stderr.
|
||||
Error func(pos Position, msg string)
|
||||
|
||||
// ErrorCount is incremented by one for each error encountered.
|
||||
ErrorCount int
|
||||
}
|
||||
|
||||
// NewScanner returns a new instance of Lexer. Even though src is an io.Reader,
|
||||
@ -122,25 +130,70 @@ func (s *Scanner) Scan() (tok token.Token) {
|
||||
}
|
||||
|
||||
func (s *Scanner) scanString() {
|
||||
for {
|
||||
// '"' opening already consumed
|
||||
ch := s.next() // read character after quote
|
||||
for ch != '"' {
|
||||
if ch == '\n' || ch < 0 {
|
||||
log.Println("[ERROR] literal not terminated")
|
||||
// read character after quote
|
||||
ch := s.next()
|
||||
|
||||
if ch == '\n' || ch < 0 || ch == eof {
|
||||
s.err("literal not terminated")
|
||||
return
|
||||
}
|
||||
|
||||
if ch == '"' {
|
||||
break
|
||||
}
|
||||
|
||||
if ch == '\\' {
|
||||
// scanEscape
|
||||
return
|
||||
} else {
|
||||
ch = s.next()
|
||||
s.scanEscape()
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// scanEscape scans an escape sequence
|
||||
func (s *Scanner) scanEscape() rune {
|
||||
// http://en.cppreference.com/w/cpp/language/escape
|
||||
ch := s.next() // read character after '/'
|
||||
switch ch {
|
||||
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
|
||||
// nothing to do
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7':
|
||||
// octal notation
|
||||
ch = s.scanDigits(ch, 8, 3)
|
||||
case 'x':
|
||||
// hexademical notation
|
||||
ch = s.scanDigits(s.next(), 16, 2)
|
||||
case 'u':
|
||||
// universal character name
|
||||
ch = s.scanDigits(s.next(), 16, 4)
|
||||
case 'U':
|
||||
// universal character name
|
||||
ch = s.scanDigits(s.next(), 16, 8)
|
||||
default:
|
||||
s.err("illegal char escape")
|
||||
}
|
||||
return ch
|
||||
}
|
||||
|
||||
// scanDigits scans a rune with the given base for n times. For example an
|
||||
// octan notation \184 would yield in scanDigits(ch, 8, 3)
|
||||
func (s *Scanner) scanDigits(ch rune, base, n int) rune {
|
||||
for n > 0 && digitVal(ch) < base {
|
||||
ch = s.next()
|
||||
n--
|
||||
}
|
||||
if n > 0 {
|
||||
s.err("illegal char escape")
|
||||
}
|
||||
|
||||
// we scanned all digits, put the last non digit char back
|
||||
s.unread()
|
||||
return ch
|
||||
}
|
||||
|
||||
// scanIdentifier scans an identifier and returns the literal string
|
||||
func (s *Scanner) scanIdentifier() string {
|
||||
offs := s.currPos.Offset - s.lastCharLen
|
||||
ch := s.next()
|
||||
@ -174,6 +227,16 @@ func (s *Scanner) Pos() Position {
|
||||
return s.currPos
|
||||
}
|
||||
|
||||
func (s *Scanner) err(msg string) {
|
||||
s.ErrorCount++
|
||||
if s.Error != nil {
|
||||
s.Error(s.currPos, msg)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Fprintf(os.Stderr, "%s: %s\n", s.currPos, msg)
|
||||
}
|
||||
|
||||
func isLetter(ch rune) bool {
|
||||
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
|
||||
}
|
||||
@ -186,3 +249,15 @@ func isDigit(ch rune) bool {
|
||||
func isWhitespace(ch rune) bool {
|
||||
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
|
||||
}
|
||||
|
||||
func digitVal(ch rune) int {
|
||||
switch {
|
||||
case '0' <= ch && ch <= '9':
|
||||
return int(ch - '0')
|
||||
case 'a' <= ch && ch <= 'f':
|
||||
return int(ch - 'a' + 10)
|
||||
case 'A' <= ch && ch <= 'F':
|
||||
return int(ch - 'A' + 10)
|
||||
}
|
||||
return 16 // larger than any legal digit val
|
||||
}
|
||||
|
@ -94,23 +94,23 @@ func TestString(t *testing.T) {
|
||||
{token.STRING, `" "`},
|
||||
{token.STRING, `"a"`},
|
||||
{token.STRING, `"本"`},
|
||||
// {STRING, `"\a"`},
|
||||
// {STRING, `"\b"`},
|
||||
// {STRING, `"\f"`},
|
||||
// {STRING, `"\n"`},
|
||||
// {STRING, `"\r"`},
|
||||
// {STRING, `"\t"`},
|
||||
// {STRING, `"\v"`},
|
||||
// {STRING, `"\""`},
|
||||
// {STRING, `"\000"`},
|
||||
// {STRING, `"\777"`},
|
||||
// {STRING, `"\x00"`},
|
||||
// {STRING, `"\xff"`},
|
||||
// {STRING, `"\u0000"`},
|
||||
// {STRING, `"\ufA16"`},
|
||||
// {STRING, `"\U00000000"`},
|
||||
// {STRING, `"\U0000ffAB"`},
|
||||
// {STRING, `"` + f100 + `"`},
|
||||
{token.STRING, `"\a"`},
|
||||
{token.STRING, `"\b"`},
|
||||
{token.STRING, `"\f"`},
|
||||
{token.STRING, `"\n"`},
|
||||
{token.STRING, `"\r"`},
|
||||
{token.STRING, `"\t"`},
|
||||
{token.STRING, `"\v"`},
|
||||
{token.STRING, `"\""`},
|
||||
{token.STRING, `"\000"`},
|
||||
{token.STRING, `"\777"`},
|
||||
{token.STRING, `"\x00"`},
|
||||
{token.STRING, `"\xff"`},
|
||||
{token.STRING, `"\u0000"`},
|
||||
{token.STRING, `"\ufA16"`},
|
||||
{token.STRING, `"\U00000000"`},
|
||||
{token.STRING, `"\U0000ffAB"`},
|
||||
{token.STRING, `"` + f100 + `"`},
|
||||
}
|
||||
|
||||
// create artifical source code
|
||||
|
Loading…
Reference in New Issue
Block a user