zclsyntax: scanner to return whole token slice at once

On reflection, it seems easier to maintain the necessary state we need by doing all of the scanning in a single pass, since we can then just use local variables within the scanner function.
2017-05-28 07:11:24 -07:00 · 2017-05-28 07:11:24 -07:00 · 76c0ca70f0
commit 76c0ca70f0
parent d57901de5f
4 changed files with 52 additions and 41 deletions
--- a/zcl/zclsyntax/generate.go
+++ b/zcl/zclsyntax/generate.go
@ -1,5 +1,5 @@
 package zclsyntax

 //go:generate go run expression_vars_gen.go
-//go:generate ragel -Z scan_token.rl
-//go:generate gofmt -w scan_token.go
+//go:generate ragel -Z scan_tokens.rl
+//go:generate gofmt -w scan_tokens.go
--- a/zcl/zclsyntax/scan_tokens.go
+++ b/zcl/zclsyntax/scan_tokens.go
@ -1,13 +1,13 @@
-// line 1 "scan_token.rl"
+// line 1 "scan_tokens.rl"
 package zclsyntax

 import (
 	"github.com/zclconf/go-zcl/zcl"
 )

-// This file is generated from scan_token.rl. DO NOT EDIT.
+// This file is generated from scan_tokens.rl. DO NOT EDIT.

-// line 12 "scan_token.go"
+// line 12 "scan_tokens.go"
 var _zcltok_actions []byte = []byte{
 	0, 1, 0, 1, 1, 1, 2, 1, 3,
 	1, 4, 1, 5, 1, 6, 1, 7,
@ -66,18 +66,18 @@ const zcltok_error int = 0
 const zcltok_en_token int = 4
 const zcltok_en_main int = 3

-// line 13 "scan_token.rl"
+// line 13 "scan_tokens.rl"

-func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
+func scanTokens(data []byte, filename string, start zcl.Pos) []Token {
 	offset := 0

-	f := tokenFactory{
+	f := &tokenAccum{
 		Filename: filename,
 		Bytes:    data,
 		Start:    start,
 	}

-	// line 69 "scan_token.rl"
+	// line 69 "scan_tokens.rl"

 	// Ragel state
 	cs := 0         // Current State
@ -94,7 +94,7 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
 	_ = act
 	_ = eof

-	// line 104 "scan_token.go"
+	// line 104 "scan_tokens.go"
 	{
 		cs = zcltok_start
 		ts = 0
@ -102,7 +102,7 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
 		act = 0
 	}

-	// line 112 "scan_token.go"
+	// line 112 "scan_tokens.go"
 	{
 		var _klen int
 		var _trans int
@ -127,7 +127,7 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {

 				ts = p

-				// line 136 "scan_token.go"
+				// line 136 "scan_tokens.go"
 			}
 		}

@ -198,7 +198,7 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
 			_acts++
 			switch _zcltok_actions[_acts-1] {
 			case 0:
-				// line 25 "scan_token.rl"
+				// line 25 "scan_tokens.rl"

 				offset = p
 				cs = 4
@ -210,35 +210,35 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
 				te = p + 1

 			case 4:
-				// line 30 "scan_token.rl"
+				// line 30 "scan_tokens.rl"

 				te = p + 1
 				{
-					return f.makeToken(TokenInvalid, offset, p+1)
+					f.emitToken(TokenInvalid, offset, p+1)
 				}
 			case 5:
-				// line 34 "scan_token.rl"
+				// line 34 "scan_tokens.rl"

 				te = p + 1
 				{
-					return f.makeToken(TokenBadUTF8, offset, p+1)
+					f.emitToken(TokenBadUTF8, offset, p+1)
 				}
 			case 6:
-				// line 34 "scan_token.rl"
+				// line 34 "scan_tokens.rl"

 				te = p
 				p--
 				{
-					return f.makeToken(TokenBadUTF8, offset, p+1)
+					f.emitToken(TokenBadUTF8, offset, p+1)
 				}
 			case 7:
-				// line 34 "scan_token.rl"
+				// line 34 "scan_tokens.rl"

 				p = (te) - 1
 				{
-					return f.makeToken(TokenBadUTF8, offset, p+1)
+					f.emitToken(TokenBadUTF8, offset, p+1)
 				}
-				// line 248 "scan_token.go"
+				// line 248 "scan_tokens.go"
 			}
 		}

@ -254,7 +254,7 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {

 				ts = 0

-				// line 263 "scan_token.go"
+				// line 263 "scan_tokens.go"
 			}
 		}

@ -280,9 +280,14 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
 		}
 	}

-	// line 89 "scan_token.rl"
+	// line 89 "scan_tokens.rl"

-	// If we fall out here then we'll just classify the remainder of the
-	// file as invalid.
-	return f.makeToken(TokenInvalid, 0, len(data))
+	// If we fall out here without being in a final state then we've
+	// encountered something that the scanner can't match, which we'll
+	// deal with as an invalid.
+	if cs < zcltok_first_final {
+		f.emitToken(TokenInvalid, p, len(data))
+	}
+
+	return f.Tokens
 }
--- a/zcl/zclsyntax/scan_tokens.rl
+++ b/zcl/zclsyntax/scan_tokens.rl
@ -4,18 +4,18 @@ import (
    "github.com/zclconf/go-zcl/zcl"
 )

-// This file is generated from scan_token.rl. DO NOT EDIT.
+// This file is generated from scan_tokens.rl. DO NOT EDIT.
 %%{
-  # (except you are actually in scan_token.rl here, so edit away!)
+  # (except you are actually in scan_tokens.rl here, so edit away!)

  machine zcltok;
  write data;
 }%%

-func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
+func scanTokens(data []byte, filename string, start zcl.Pos) []Token {
    offset := 0

-    f := tokenFactory{
+    f := &tokenAccum{
        Filename: filename,
        Bytes:    data,
        Start:    start,
@ -28,15 +28,15 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
        }

        action EmitInvalid {
-            return f.makeToken(TokenInvalid, offset, p+1)
+            f.emitToken(TokenInvalid, offset, p+1)
        }

        action EmitBadUTF8 {
-            return f.makeToken(TokenBadUTF8, offset, p+1)
+            f.emitToken(TokenBadUTF8, offset, p+1)
        }

        action EmitEOF {
-            return f.makeToken(TokenEOF, offset, offset)
+            f.emitToken(TokenEOF, offset, offset)
        }

        UTF8Cont = 0x80 .. 0xBF;
@ -88,7 +88,12 @@ func nextToken(data []byte, filename string, start zcl.Pos) (Token, []byte) {
        write exec;
    }%%

-    // If we fall out here then we'll just classify the remainder of the
-    // file as invalid.
-    return f.makeToken(TokenInvalid, 0, len(data))
+    // If we fall out here without being in a final state then we've
+    // encountered something that the scanner can't match, which we'll
+    // deal with as an invalid.
+    if cs < zcltok_first_final {
+        f.emitToken(TokenInvalid, p, len(data))
+    }
+
+    return f.Tokens
 }
--- a/zcl/zclsyntax/token.go
+++ b/zcl/zclsyntax/token.go
@ -79,13 +79,14 @@ const (
 	TokenBadUTF8    TokenType = '💩'
 )

-type tokenFactory struct {
+type tokenAccum struct {
 	Filename string
 	Bytes    []byte
 	Start    zcl.Pos
+	Tokens   []Token
 }

-func (f tokenFactory) makeToken(ty TokenType, startOfs int, endOfs int) (Token, []byte) {
+func (f *tokenAccum) emitToken(ty TokenType, startOfs int, endOfs int) {
 	// Walk through our buffer to figure out how much we need to adjust
 	// the start pos to get our end pos.

@ -107,7 +108,7 @@ func (f tokenFactory) makeToken(ty TokenType, startOfs int, endOfs int) (Token,
 		b = b[advance:]
 	}

-	return Token{
+	f.Tokens = append(f.Tokens, Token{
 		Type:  ty,
 		Bytes: f.Bytes[startOfs:endOfs],
 		Range: zcl.Range{
@ -115,5 +116,5 @@ func (f tokenFactory) makeToken(ty TokenType, startOfs int, endOfs int) (Token,
 			Start:    start,
 			End:      end,
 		},
-	}, f.Bytes[endOfs:]
+	})
 }