hcl/zcl/zclsyntax/scan_tokens.rl

package zclsyntax

import (
    "github.com/zclconf/go-zcl/zcl"
)

// This file is generated from scan_tokens.rl. DO NOT EDIT.
%%{
  # (except you are actually in scan_tokens.rl here, so edit away!)

  machine zcltok;
  write data;
}%%

func scanTokens(data []byte, filename string, start zcl.Pos) []Token {
    f := &tokenAccum{
        Filename: filename,
        Bytes:    data,
        Pos:      start,
    }

    %%{
        include UnicodeDerived "unicode_derived.rl";

        UTF8Cont = 0x80 .. 0xBF;
        AnyUTF8 = (
            0x00..0x7F |
            0xC0..0xDF . UTF8Cont |
            0xE0..0xEF . UTF8Cont . UTF8Cont |
            0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont
        );
        BrokenUTF8 = any - AnyUTF8;

        NumberLit = digit (digit|'.'|('e'|'E') ('+'|'-')? digit)*;
        Ident = ID_Start ID_Continue*;

        # Symbols that just represent themselves are handled as a single rule.
        SelfToken = "{" | "}" | "[" | "]" | "(" | ")" | "." | "*" | "/" | "+" | "-" | "=" | "<" | ">" | "!" | "?" | ":" | "\n" | "&" | "|" | "~" | "^" | ";" | "`";

        # Tabs are not valid, but we accept them in the scanner and mark them
        # as tokens so that we can produce diagnostics advising the user to
        # use spaces instead.
        Tabs = 0x09+;

        Spaces = ' '+;

        main := |*
            Spaces           => {};
            NumberLit        => { token(TokenNumberLit) };
            Ident            => { token(TokenIdent) };
            SelfToken        => { selfToken() };
            Tabs             => { token(TokenTabs) };
            AnyUTF8          => { token(TokenInvalid) };
            BrokenUTF8       => { token(TokenBadUTF8) };
        *|;

    }%%

    // Ragel state
	cs := 0 // Current State
	p := 0  // "Pointer" into data
	pe := len(data) // End-of-data "pointer"
    ts := 0
    te := 0
    act := 0
    eof := pe

    // Make Go compiler happy
    _ = ts
    _ = te
    _ = act
    _ = eof

    token := func (ty TokenType) {
        f.emitToken(ty, ts, te)
    }
    selfToken := func () {
        b := data[ts:te]
        if len(b) != 1 {
            // should never happen
            panic("selfToken only works for single-character tokens")
        }
        f.emitToken(TokenType(b[0]), ts, te)
    }

    %%{
        write init;
        write exec;
    }%%

    // If we fall out here without being in a final state then we've
    // encountered something that the scanner can't match, which we'll
    // deal with as an invalid.
    if cs < zcltok_first_final {
        f.emitToken(TokenInvalid, p, len(data))
    }

    // We always emit a synthetic EOF token at the end, since it gives the
    // parser position information for an "unexpected EOF" diagnostic.
    f.emitToken(TokenEOF, len(data), len(data))

    return f.Tokens
}
zclsyntax: start of a ragel-based scanner Using Ragel here because the scanner is going to be somewhat complex due to the need to switch back and forth between normal and template states, etc. This should be easier to maintain than a hand-written scanner, while ragel gives us the extra features we need to implement things that would normally be too complex for a "regular" scanner generator. 2017-05-28 02:01:43 +00:00			`package zclsyntax`

			`import (`
			`"github.com/zclconf/go-zcl/zcl"`
			`)`

zclsyntax: scanner to return whole token slice at once On reflection, it seems easier to maintain the necessary state we need by doing all of the scanning in a single pass, since we can then just use local variables within the scanner function. 2017-05-28 14:11:24 +00:00			`// This file is generated from scan_tokens.rl. DO NOT EDIT.`
zclsyntax: start of a ragel-based scanner Using Ragel here because the scanner is going to be somewhat complex due to the need to switch back and forth between normal and template states, etc. This should be easier to maintain than a hand-written scanner, while ragel gives us the extra features we need to implement things that would normally be too complex for a "regular" scanner generator. 2017-05-28 02:01:43 +00:00			`%%{`
zclsyntax: scanner to return whole token slice at once On reflection, it seems easier to maintain the necessary state we need by doing all of the scanning in a single pass, since we can then just use local variables within the scanner function. 2017-05-28 14:11:24 +00:00			`# (except you are actually in scan_tokens.rl here, so edit away!)`
zclsyntax: start of a ragel-based scanner Using Ragel here because the scanner is going to be somewhat complex due to the need to switch back and forth between normal and template states, etc. This should be easier to maintain than a hand-written scanner, while ragel gives us the extra features we need to implement things that would normally be too complex for a "regular" scanner generator. 2017-05-28 02:01:43 +00:00
			`machine zcltok;`
			`write data;`
			`}%%`

zclsyntax: scanner to return whole token slice at once On reflection, it seems easier to maintain the necessary state we need by doing all of the scanning in a single pass, since we can then just use local variables within the scanner function. 2017-05-28 14:11:24 +00:00			`func scanTokens(data []byte, filename string, start zcl.Pos) []Token {`
			`f := &tokenAccum{`
zclsyntax: start of a ragel-based scanner Using Ragel here because the scanner is going to be somewhat complex due to the need to switch back and forth between normal and template states, etc. This should be easier to maintain than a hand-written scanner, while ragel gives us the extra features we need to implement things that would normally be too complex for a "regular" scanner generator. 2017-05-28 02:01:43 +00:00			`Filename: filename,`
			`Bytes: data,`
zclsyntax: re-organize and simplify the scanner 2017-05-28 15:38:13 +00:00			`Pos: start,`
zclsyntax: start of a ragel-based scanner Using Ragel here because the scanner is going to be somewhat complex due to the need to switch back and forth between normal and template states, etc. This should be easier to maintain than a hand-written scanner, while ragel gives us the extra features we need to implement things that would normally be too complex for a "regular" scanner generator. 2017-05-28 02:01:43 +00:00			`}`

			`%%{`
zclsyntax: identifiers in the scanner 2017-05-28 16:16:53 +00:00			`include UnicodeDerived "unicode_derived.rl";`
zclsyntax: start of a ragel-based scanner Using Ragel here because the scanner is going to be somewhat complex due to the need to switch back and forth between normal and template states, etc. This should be easier to maintain than a hand-written scanner, while ragel gives us the extra features we need to implement things that would normally be too complex for a "regular" scanner generator. 2017-05-28 02:01:43 +00:00
			`UTF8Cont = 0x80 .. 0xBF;`
			`AnyUTF8 = (`
			`0x00..0x7F \|`
			`0xC0..0xDF . UTF8Cont \|`
			`0xE0..0xEF . UTF8Cont . UTF8Cont \|`
			`0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont`
			`);`
			`BrokenUTF8 = any - AnyUTF8;`

zclsyntax: scanning of numeric literals 2017-05-28 15:56:43 +00:00			`NumberLit = digit (digit\|'.'\|('e'\|'E') ('+'\|'-')? digit)*;`
zclsyntax: identifiers in the scanner 2017-05-28 16:16:53 +00:00			`Ident = ID_Start ID_Continue*;`
zclsyntax: scanning of numeric literals 2017-05-28 15:56:43 +00:00
zclsyntax: scan single-character tokens that represent themselves For convenience we use the rune values of these tokens as their token enum values, so we can handle them all via a single rule. 2017-05-28 16:34:20 +00:00			`# Symbols that just represent themselves are handled as a single rule.`
			SelfToken = "{" \| "}" \| "[" \| "]" \| "(" \| ")" \| "." \| "*" \| "/" \| "+" \| "-" \| "=" \| "<" \| ">" \| "!" \| "?" \| ":" \| "\n" \| "&" \| "\|" \| "~" \| "^" \| ";" \| "`";

zclsyntax: start of a ragel-based scanner Using Ragel here because the scanner is going to be somewhat complex due to the need to switch back and forth between normal and template states, etc. This should be easier to maintain than a hand-written scanner, while ragel gives us the extra features we need to implement things that would normally be too complex for a "regular" scanner generator. 2017-05-28 02:01:43 +00:00			`# Tabs are not valid, but we accept them in the scanner and mark them`
			`# as tokens so that we can produce diagnostics advising the user to`
			`# use spaces instead.`
zclsyntax: re-organize and simplify the scanner 2017-05-28 15:38:13 +00:00			`Tabs = 0x09+;`
zclsyntax: start of a ragel-based scanner Using Ragel here because the scanner is going to be somewhat complex due to the need to switch back and forth between normal and template states, etc. This should be easier to maintain than a hand-written scanner, while ragel gives us the extra features we need to implement things that would normally be too complex for a "regular" scanner generator. 2017-05-28 02:01:43 +00:00
zclsyntax: re-organize and simplify the scanner 2017-05-28 15:38:13 +00:00			`Spaces = ' '+;`
zclsyntax: start of a ragel-based scanner Using Ragel here because the scanner is going to be somewhat complex due to the need to switch back and forth between normal and template states, etc. This should be easier to maintain than a hand-written scanner, while ragel gives us the extra features we need to implement things that would normally be too complex for a "regular" scanner generator. 2017-05-28 02:01:43 +00:00
zclsyntax: re-organize and simplify the scanner 2017-05-28 15:38:13 +00:00			`main := \|*`
zclsyntax: scanning of numeric literals 2017-05-28 15:56:43 +00:00			`Spaces => {};`
			`NumberLit => { token(TokenNumberLit) };`
zclsyntax: identifiers in the scanner 2017-05-28 16:16:53 +00:00			`Ident => { token(TokenIdent) };`
zclsyntax: scan single-character tokens that represent themselves For convenience we use the rune values of these tokens as their token enum values, so we can handle them all via a single rule. 2017-05-28 16:34:20 +00:00			`SelfToken => { selfToken() };`
zclsyntax: scanning of numeric literals 2017-05-28 15:56:43 +00:00			`Tabs => { token(TokenTabs) };`
			`AnyUTF8 => { token(TokenInvalid) };`
			`BrokenUTF8 => { token(TokenBadUTF8) };`
zclsyntax: re-organize and simplify the scanner 2017-05-28 15:38:13 +00:00			`*\|;`
zclsyntax: start of a ragel-based scanner Using Ragel here because the scanner is going to be somewhat complex due to the need to switch back and forth between normal and template states, etc. This should be easier to maintain than a hand-written scanner, while ragel gives us the extra features we need to implement things that would normally be too complex for a "regular" scanner generator. 2017-05-28 02:01:43 +00:00
			`}%%`

			`// Ragel state`
			`cs := 0 // Current State`
			`p := 0 // "Pointer" into data`
			`pe := len(data) // End-of-data "pointer"`
			`ts := 0`
			`te := 0`
			`act := 0`
			`eof := pe`

			`// Make Go compiler happy`
			`_ = ts`
			`_ = te`
			`_ = act`
			`_ = eof`

zclsyntax: re-organize and simplify the scanner 2017-05-28 15:38:13 +00:00			`token := func (ty TokenType) {`
			`f.emitToken(ty, ts, te)`
			`}`
zclsyntax: scan single-character tokens that represent themselves For convenience we use the rune values of these tokens as their token enum values, so we can handle them all via a single rule. 2017-05-28 16:34:20 +00:00			`selfToken := func () {`
			`b := data[ts:te]`
			`if len(b) != 1 {`
			`// should never happen`
			`panic("selfToken only works for single-character tokens")`
			`}`
			`f.emitToken(TokenType(b[0]), ts, te)`
			`}`
zclsyntax: re-organize and simplify the scanner 2017-05-28 15:38:13 +00:00
zclsyntax: start of a ragel-based scanner Using Ragel here because the scanner is going to be somewhat complex due to the need to switch back and forth between normal and template states, etc. This should be easier to maintain than a hand-written scanner, while ragel gives us the extra features we need to implement things that would normally be too complex for a "regular" scanner generator. 2017-05-28 02:01:43 +00:00			`%%{`
			`write init;`
			`write exec;`
			`}%%`

zclsyntax: scanner to return whole token slice at once On reflection, it seems easier to maintain the necessary state we need by doing all of the scanning in a single pass, since we can then just use local variables within the scanner function. 2017-05-28 14:11:24 +00:00			`// If we fall out here without being in a final state then we've`
			`// encountered something that the scanner can't match, which we'll`
			`// deal with as an invalid.`
			`if cs < zcltok_first_final {`
			`f.emitToken(TokenInvalid, p, len(data))`
			`}`

zclsyntax: re-organize and simplify the scanner 2017-05-28 15:38:13 +00:00			`// We always emit a synthetic EOF token at the end, since it gives the`
			`// parser position information for an "unexpected EOF" diagnostic.`
			`f.emitToken(TokenEOF, len(data), len(data))`

zclsyntax: scanner to return whole token slice at once On reflection, it seems easier to maintain the necessary state we need by doing all of the scanning in a single pass, since we can then just use local variables within the scanner function. 2017-05-28 14:11:24 +00:00			`return f.Tokens`
zclsyntax: start of a ragel-based scanner Using Ragel here because the scanner is going to be somewhat complex due to the need to switch back and forth between normal and template states, etc. This should be easier to maintain than a hand-written scanner, while ragel gives us the extra features we need to implement things that would normally be too complex for a "regular" scanner generator. 2017-05-28 02:01:43 +00:00			`}`