From 76c0ca70f079aee8fc2a5cc26f0310d1bfa07abb Mon Sep 17 00:00:00 2001 From: Martin Atkins Date: Sun, 28 May 2017 07:11:24 -0700 Subject: [PATCH] zclsyntax: scanner to return whole token slice at once On reflection, it seems easier to maintain the necessary state we need by doing all of the scanning in a single pass, since we can then just use local variables within the scanner function. --- zcl/zclsyntax/generate.go | 4 +- .../{scan_token.go => scan_tokens.go} | 55 ++++++++++--------- .../{scan_token.rl => scan_tokens.rl} | 25 +++++---- zcl/zclsyntax/token.go | 9 +-- 4 files changed, 52 insertions(+), 41 deletions(-) rename zcl/zclsyntax/{scan_token.go => scan_tokens.go} (79%) rename zcl/zclsyntax/{scan_token.rl => scan_tokens.rl} (69%) diff --git a/zcl/zclsyntax/generate.go b/zcl/zclsyntax/generate.go index cc36b24..d20f41a 100644 --- a/zcl/zclsyntax/generate.go +++ b/zcl/zclsyntax/generate.go @@ -1,5 +1,5 @@ package zclsyntax //go:generate go run expression_vars_gen.go -//go:generate ragel -Z scan_token.rl -//go:generate gofmt -w scan_token.go +//go:generate ragel -Z scan_tokens.rl +//go:generate gofmt -w scan_tokens.go diff --git a/zcl/zclsyntax/scan_token.go b/zcl/zclsyntax/scan_tokens.go similarity index 79% rename from zcl/zclsyntax/scan_token.go rename to zcl/zclsyntax/scan_tokens.go index 403f8c4..21d34c7 100644 --- a/zcl/zclsyntax/scan_token.go +++ b/zcl/zclsyntax/scan_tokens.go @@ -1,13 +1,13 @@ -// line 1 "scan_token.rl" +// line 1 "scan_tokens.rl" package zclsyntax import ( "github.com/zclconf/go-zcl/zcl" ) -// This file is generated from scan_token.rl. DO NOT EDIT. +// This file is generated from scan_tokens.rl. DO NOT EDIT. -// line 12 "scan_token.go" +// line 12 "scan_tokens.go" var _zcltok_actions []byte = []byte{ 0, 1, 0, 1, 1, 1, 2, 1, 3, 1, 4, 1, 5, 1, 6, 1, 7, @@ -66,18 +66,18 @@ const zcltok_error int = 0 const zcltok_en_token int = 4 const zcltok_en_main int = 3 -// line 13 "scan_token.rl" +// line 13 "scan_tokens.rl" -func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) { +func scanTokens(data []byte, filename string, start zcl.Pos) []Token { offset := 0 - f := tokenFactory{ + f := &tokenAccum{ Filename: filename, Bytes: data, Start: start, } - // line 69 "scan_token.rl" + // line 69 "scan_tokens.rl" // Ragel state cs := 0 // Current State @@ -94,7 +94,7 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) { _ = act _ = eof - // line 104 "scan_token.go" + // line 104 "scan_tokens.go" { cs = zcltok_start ts = 0 @@ -102,7 +102,7 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) { act = 0 } - // line 112 "scan_token.go" + // line 112 "scan_tokens.go" { var _klen int var _trans int @@ -127,7 +127,7 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) { ts = p - // line 136 "scan_token.go" + // line 136 "scan_tokens.go" } } @@ -198,7 +198,7 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) { _acts++ switch _zcltok_actions[_acts-1] { case 0: - // line 25 "scan_token.rl" + // line 25 "scan_tokens.rl" offset = p cs = 4 @@ -210,35 +210,35 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) { te = p + 1 case 4: - // line 30 "scan_token.rl" + // line 30 "scan_tokens.rl" te = p + 1 { - return f.makeToken(TokenInvalid, offset, p+1) + f.emitToken(TokenInvalid, offset, p+1) } case 5: - // line 34 "scan_token.rl" + // line 34 "scan_tokens.rl" te = p + 1 { - return f.makeToken(TokenBadUTF8, offset, p+1) + f.emitToken(TokenBadUTF8, offset, p+1) } case 6: - // line 34 "scan_token.rl" + // line 34 "scan_tokens.rl" te = p p-- { - return f.makeToken(TokenBadUTF8, offset, p+1) + f.emitToken(TokenBadUTF8, offset, p+1) } case 7: - // line 34 "scan_token.rl" + // line 34 "scan_tokens.rl" p = (te) - 1 { - return f.makeToken(TokenBadUTF8, offset, p+1) + f.emitToken(TokenBadUTF8, offset, p+1) } - // line 248 "scan_token.go" + // line 248 "scan_tokens.go" } } @@ -254,7 +254,7 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) { ts = 0 - // line 263 "scan_token.go" + // line 263 "scan_tokens.go" } } @@ -280,9 +280,14 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) { } } - // line 89 "scan_token.rl" + // line 89 "scan_tokens.rl" - // If we fall out here then we'll just classify the remainder of the - // file as invalid. - return f.makeToken(TokenInvalid, 0, len(data)) + // If we fall out here without being in a final state then we've + // encountered something that the scanner can't match, which we'll + // deal with as an invalid. + if cs < zcltok_first_final { + f.emitToken(TokenInvalid, p, len(data)) + } + + return f.Tokens } diff --git a/zcl/zclsyntax/scan_token.rl b/zcl/zclsyntax/scan_tokens.rl similarity index 69% rename from zcl/zclsyntax/scan_token.rl rename to zcl/zclsyntax/scan_tokens.rl index db587cf..656c8de 100644 --- a/zcl/zclsyntax/scan_token.rl +++ b/zcl/zclsyntax/scan_tokens.rl @@ -4,18 +4,18 @@ import ( "github.com/zclconf/go-zcl/zcl" ) -// This file is generated from scan_token.rl. DO NOT EDIT. +// This file is generated from scan_tokens.rl. DO NOT EDIT. %%{ - # (except you are actually in scan_token.rl here, so edit away!) + # (except you are actually in scan_tokens.rl here, so edit away!) machine zcltok; write data; }%% -func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) { +func scanTokens(data []byte, filename string, start zcl.Pos) []Token { offset := 0 - f := tokenFactory{ + f := &tokenAccum{ Filename: filename, Bytes: data, Start: start, @@ -28,15 +28,15 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) { } action EmitInvalid { - return f.makeToken(TokenInvalid, offset, p+1) + f.emitToken(TokenInvalid, offset, p+1) } action EmitBadUTF8 { - return f.makeToken(TokenBadUTF8, offset, p+1) + f.emitToken(TokenBadUTF8, offset, p+1) } action EmitEOF { - return f.makeToken(TokenEOF, offset, offset) + f.emitToken(TokenEOF, offset, offset) } UTF8Cont = 0x80 .. 0xBF; @@ -88,7 +88,12 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) { write exec; }%% - // If we fall out here then we'll just classify the remainder of the - // file as invalid. - return f.makeToken(TokenInvalid, 0, len(data)) + // If we fall out here without being in a final state then we've + // encountered something that the scanner can't match, which we'll + // deal with as an invalid. + if cs < zcltok_first_final { + f.emitToken(TokenInvalid, p, len(data)) + } + + return f.Tokens } diff --git a/zcl/zclsyntax/token.go b/zcl/zclsyntax/token.go index 5c0e9db..318f738 100644 --- a/zcl/zclsyntax/token.go +++ b/zcl/zclsyntax/token.go @@ -79,13 +79,14 @@ const ( TokenBadUTF8 TokenType = '💩' ) -type tokenFactory struct { +type tokenAccum struct { Filename string Bytes []byte Start zcl.Pos + Tokens []Token } -func (f tokenFactory) makeToken(ty TokenType, startOfs int, endOfs int) (Token, []byte) { +func (f *tokenAccum) emitToken(ty TokenType, startOfs int, endOfs int) { // Walk through our buffer to figure out how much we need to adjust // the start pos to get our end pos. @@ -107,7 +108,7 @@ func (f tokenFactory) makeToken(ty TokenType, startOfs int, endOfs int) (Token, b = b[advance:] } - return Token{ + f.Tokens = append(f.Tokens, Token{ Type: ty, Bytes: f.Bytes[startOfs:endOfs], Range: zcl.Range{ @@ -115,5 +116,5 @@ func (f tokenFactory) makeToken(ty TokenType, startOfs int, endOfs int) (Token, Start: start, End: end, }, - }, f.Bytes[endOfs:] + }) }