zclsyntax: scanner to return whole token slice at once

On reflection, it seems easier to maintain the necessary state we need
by doing all of the scanning in a single pass, since we can then just
use local variables within the scanner function.
This commit is contained in:
Martin Atkins 2017-05-28 07:11:24 -07:00
parent d57901de5f
commit 76c0ca70f0
4 changed files with 52 additions and 41 deletions

View File

@ -1,5 +1,5 @@
package zclsyntax
//go:generate go run expression_vars_gen.go
//go:generate ragel -Z scan_token.rl
//go:generate gofmt -w scan_token.go
//go:generate ragel -Z scan_tokens.rl
//go:generate gofmt -w scan_tokens.go

View File

@ -1,13 +1,13 @@
// line 1 "scan_token.rl"
// line 1 "scan_tokens.rl"
package zclsyntax
import (
"github.com/zclconf/go-zcl/zcl"
)
// This file is generated from scan_token.rl. DO NOT EDIT.
// This file is generated from scan_tokens.rl. DO NOT EDIT.
// line 12 "scan_token.go"
// line 12 "scan_tokens.go"
var _zcltok_actions []byte = []byte{
0, 1, 0, 1, 1, 1, 2, 1, 3,
1, 4, 1, 5, 1, 6, 1, 7,
@ -66,18 +66,18 @@ const zcltok_error int = 0
const zcltok_en_token int = 4
const zcltok_en_main int = 3
// line 13 "scan_token.rl"
// line 13 "scan_tokens.rl"
func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
func scanTokens(data []byte, filename string, start zcl.Pos) []Token {
offset := 0
f := tokenFactory{
f := &tokenAccum{
Filename: filename,
Bytes: data,
Start: start,
}
// line 69 "scan_token.rl"
// line 69 "scan_tokens.rl"
// Ragel state
cs := 0 // Current State
@ -94,7 +94,7 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
_ = act
_ = eof
// line 104 "scan_token.go"
// line 104 "scan_tokens.go"
{
cs = zcltok_start
ts = 0
@ -102,7 +102,7 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
act = 0
}
// line 112 "scan_token.go"
// line 112 "scan_tokens.go"
{
var _klen int
var _trans int
@ -127,7 +127,7 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
ts = p
// line 136 "scan_token.go"
// line 136 "scan_tokens.go"
}
}
@ -198,7 +198,7 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
_acts++
switch _zcltok_actions[_acts-1] {
case 0:
// line 25 "scan_token.rl"
// line 25 "scan_tokens.rl"
offset = p
cs = 4
@ -210,35 +210,35 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
te = p + 1
case 4:
// line 30 "scan_token.rl"
// line 30 "scan_tokens.rl"
te = p + 1
{
return f.makeToken(TokenInvalid, offset, p+1)
f.emitToken(TokenInvalid, offset, p+1)
}
case 5:
// line 34 "scan_token.rl"
// line 34 "scan_tokens.rl"
te = p + 1
{
return f.makeToken(TokenBadUTF8, offset, p+1)
f.emitToken(TokenBadUTF8, offset, p+1)
}
case 6:
// line 34 "scan_token.rl"
// line 34 "scan_tokens.rl"
te = p
p--
{
return f.makeToken(TokenBadUTF8, offset, p+1)
f.emitToken(TokenBadUTF8, offset, p+1)
}
case 7:
// line 34 "scan_token.rl"
// line 34 "scan_tokens.rl"
p = (te) - 1
{
return f.makeToken(TokenBadUTF8, offset, p+1)
f.emitToken(TokenBadUTF8, offset, p+1)
}
// line 248 "scan_token.go"
// line 248 "scan_tokens.go"
}
}
@ -254,7 +254,7 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
ts = 0
// line 263 "scan_token.go"
// line 263 "scan_tokens.go"
}
}
@ -280,9 +280,14 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
}
}
// line 89 "scan_token.rl"
// line 89 "scan_tokens.rl"
// If we fall out here then we'll just classify the remainder of the
// file as invalid.
return f.makeToken(TokenInvalid, 0, len(data))
// If we fall out here without being in a final state then we've
// encountered something that the scanner can't match, which we'll
// deal with as an invalid.
if cs < zcltok_first_final {
f.emitToken(TokenInvalid, p, len(data))
}
return f.Tokens
}

View File

@ -4,18 +4,18 @@ import (
"github.com/zclconf/go-zcl/zcl"
)
// This file is generated from scan_token.rl. DO NOT EDIT.
// This file is generated from scan_tokens.rl. DO NOT EDIT.
%%{
# (except you are actually in scan_token.rl here, so edit away!)
# (except you are actually in scan_tokens.rl here, so edit away!)
machine zcltok;
write data;
}%%
func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
func scanTokens(data []byte, filename string, start zcl.Pos) []Token {
offset := 0
f := tokenFactory{
f := &tokenAccum{
Filename: filename,
Bytes: data,
Start: start,
@ -28,15 +28,15 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
}
action EmitInvalid {
return f.makeToken(TokenInvalid, offset, p+1)
f.emitToken(TokenInvalid, offset, p+1)
}
action EmitBadUTF8 {
return f.makeToken(TokenBadUTF8, offset, p+1)
f.emitToken(TokenBadUTF8, offset, p+1)
}
action EmitEOF {
return f.makeToken(TokenEOF, offset, offset)
f.emitToken(TokenEOF, offset, offset)
}
UTF8Cont = 0x80 .. 0xBF;
@ -88,7 +88,12 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
write exec;
}%%
// If we fall out here then we'll just classify the remainder of the
// file as invalid.
return f.makeToken(TokenInvalid, 0, len(data))
// If we fall out here without being in a final state then we've
// encountered something that the scanner can't match, which we'll
// deal with as an invalid.
if cs < zcltok_first_final {
f.emitToken(TokenInvalid, p, len(data))
}
return f.Tokens
}

View File

@ -79,13 +79,14 @@ const (
TokenBadUTF8 TokenType = '💩'
)
type tokenFactory struct {
type tokenAccum struct {
Filename string
Bytes []byte
Start zcl.Pos
Tokens []Token
}
func (f tokenFactory) makeToken(ty TokenType, startOfs int, endOfs int) (Token, []byte) {
func (f *tokenAccum) emitToken(ty TokenType, startOfs int, endOfs int) {
// Walk through our buffer to figure out how much we need to adjust
// the start pos to get our end pos.
@ -107,7 +108,7 @@ func (f tokenFactory) makeToken(ty TokenType, startOfs int, endOfs int) (Token,
b = b[advance:]
}
return Token{
f.Tokens = append(f.Tokens, Token{
Type: ty,
Bytes: f.Bytes[startOfs:endOfs],
Range: zcl.Range{
@ -115,5 +116,5 @@ func (f tokenFactory) makeToken(ty TokenType, startOfs int, endOfs int) (Token,
Start: start,
End: end,
},
}, f.Bytes[endOfs:]
})
}