From 954939b49f873d69d7fbe42e9b2638dfbd4653dc Mon Sep 17 00:00:00 2001 From: Martin Atkins Date: Mon, 29 May 2017 19:28:10 -0700 Subject: [PATCH] zclsyntax: initial pass at body parsing Only able to parse empty bodies so far. --- zcl/zclsyntax/parser.go | 157 +++++++++++++++++++++++++++++++++++ zcl/zclsyntax/parser_test.go | 57 +++++++++++++ zcl/zclsyntax/public.go | 11 ++- zcl/zclsyntax/token.go | 5 ++ 4 files changed, 226 insertions(+), 4 deletions(-) create mode 100644 zcl/zclsyntax/parser.go create mode 100644 zcl/zclsyntax/parser_test.go diff --git a/zcl/zclsyntax/parser.go b/zcl/zclsyntax/parser.go new file mode 100644 index 0000000..c8e372b --- /dev/null +++ b/zcl/zclsyntax/parser.go @@ -0,0 +1,157 @@ +package zclsyntax + +import ( + "fmt" + + "github.com/zclconf/go-zcl/zcl" +) + +type parser struct { + *peeker +} + +func (p *parser) ParseBody(end TokenType) (*Body, zcl.Diagnostics) { + attrs := Attributes{} + blocks := Blocks{} + var diags zcl.Diagnostics + + startRange := p.NextRange() + var endRange zcl.Range + +Token: + for { + next := p.Peek() + if next.Type == end { + endRange = p.NextRange() + p.Read() + break Token + } + + switch next.Type { + case TokenNewline: + p.Read() + continue + case TokenIdent, TokenOQuote: + item, itemDiags := p.ParseBodyItem() + diags = append(diags, itemDiags...) + switch titem := item.(type) { + case *Block: + blocks = append(blocks, titem) + case *Attribute: + if existing, exists := attrs[titem.Name]; exists { + diags = append(diags, &zcl.Diagnostic{ + Severity: zcl.DiagError, + Summary: "Attribute redefined", + Detail: fmt.Sprintf( + "The attribute %q was already defined at %s. Each attribute may be defined only once.", + titem.Name, existing.NameRange.String(), + ), + Subject: &titem.NameRange, + }) + } else { + attrs[titem.Name] = titem + } + default: + // This should never happen for valid input, but may if a + // syntax error was detected in ParseBodyItem that prevented + // it from even producing a partially-broken item. In that + // case, it would've left at least one error in the diagnostics + // slice we already dealt with above. + // + // We'll assume ParseBodyItem attempted recovery to leave + // us in a reasonable position to try parsing the next item. + continue + } + default: + bad := p.Read() + diags = append(diags, &zcl.Diagnostic{ + Severity: zcl.DiagError, + Summary: "Attribute or block definition required", + Detail: "An attribute or block definition is required here.", + Subject: &bad.Range, + }) + endRange = p.NextRange() // arbitrary, but somewhere inside the body means better diagnostics + + p.recover(end) // attempt to recover to the token after the end of this body + break Token + } + } + + return &Body{ + Attributes: attrs, + Blocks: blocks, + + SrcRange: zcl.RangeBetween(startRange, endRange), + EndRange: zcl.Range{ + Filename: endRange.Filename, + Start: endRange.End, + End: endRange.End, + }, + }, diags +} + +func (p *parser) ParseBodyItem() (Node, zcl.Diagnostics) { + return nil, nil +} + +// recover seeks forward in the token stream until it finds TokenType "end", +// then returns with the peeker pointed at the following token. +// +// If the given token type is a bracketer, this function will additionally +// count nested instances of the brackets to try to leave the peeker at +// the end of the _current_ instance of that bracketer, skipping over any +// nested instances. This is a best-effort operation and may have +// unpredictable results on input with bad bracketer nesting. +func (p *parser) recover(end TokenType) { + start := p.oppositeBracket(end) + + nest := 0 + for { + tok := p.Read() + switch tok.Type { + case start: + nest++ + case end: + if nest < 1 { + return + } + + nest-- + } + } +} + +// oppositeBracket finds the bracket that opposes the given bracketer, or +// NilToken if the given token isn't a bracketer. +// +// "Bracketer", for the sake of this function, is one end of a matching +// open/close set of tokens that establish a bracketing context. +func (p *parser) oppositeBracket(ty TokenType) TokenType { + switch ty { + + case TokenOBrace: + return TokenCBrace + case TokenOBrack: + return TokenCBrack + case TokenOParen: + return TokenCParen + case TokenOQuote: + return TokenCQuote + case TokenOHeredoc: + return TokenCHeredoc + + case TokenCBrace: + return TokenOBrace + case TokenCBrack: + return TokenOBrack + case TokenCParen: + return TokenOParen + case TokenCQuote: + return TokenOQuote + case TokenCHeredoc: + return TokenOHeredoc + + default: + return TokenNil + } +} diff --git a/zcl/zclsyntax/parser_test.go b/zcl/zclsyntax/parser_test.go new file mode 100644 index 0000000..ca1d9f5 --- /dev/null +++ b/zcl/zclsyntax/parser_test.go @@ -0,0 +1,57 @@ +package zclsyntax + +import ( + "reflect" + "testing" + + "github.com/kylelemons/godebug/pretty" + "github.com/zclconf/go-zcl/zcl" +) + +func TestParseConfig(t *testing.T) { + tests := []struct { + input string + diagCount int + want *Body + }{ + { + ``, + 0, + &Body{ + Attributes: Attributes{}, + Blocks: Blocks{}, + SrcRange: zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + }, + EndRange: zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + }, + }, + }, + } + + prettyConfig := &pretty.Config{ + Diffable: true, + IncludeUnexported: true, + PrintStringers: true, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + got, diags := ParseConfig([]byte(test.input), "", zcl.Pos{Byte: 0, Line: 1, Column: 1}) + if len(diags) != test.diagCount { + t.Errorf("wrong number of diagnostics %d; want %d", len(diags), test.diagCount) + for _, diag := range diags { + t.Logf(" - %s", diag.Error()) + } + } + + if !reflect.DeepEqual(got, test.want) { + diff := prettyConfig.Compare(test.want, got) + t.Errorf("wrong result\ninput: %s\ndiff: %s", test.input, diff) + } + }) + } +} diff --git a/zcl/zclsyntax/public.go b/zcl/zclsyntax/public.go index f44b578..362337f 100644 --- a/zcl/zclsyntax/public.go +++ b/zcl/zclsyntax/public.go @@ -8,19 +8,22 @@ import ( // a Body representing its contents. If HasErrors called on the returned // diagnostics returns true, the returned body is likely to be incomplete // and should therefore be used with care. -func ParseConfig(src []byte, filename string, start zcl.Pos) Body { - panic("ParseConfig is not yet implemented") +func ParseConfig(src []byte, filename string, start zcl.Pos) (*Body, zcl.Diagnostics) { + tokens := LexConfig(src, filename, start) + peeker := newPeeker(tokens, false) + parser := &parser{peeker} + return parser.ParseBody(TokenEOF) } // ParseExpression parses the given buffer as a standalone zcl expression, // returning it as an instance of Expression. -func ParseExpression(src []byte, filename string, start zcl.Pos) Expression { +func ParseExpression(src []byte, filename string, start zcl.Pos) (*Expression, zcl.Diagnostics) { panic("ParseExpression is not yet implemented") } // ParseTemplate parses the given buffer as a standalone zcl template, // returning it as an instance of Expression. -func ParseTemplate(src []byte, filename string, start zcl.Pos) Expression { +func ParseTemplate(src []byte, filename string, start zcl.Pos) (*Expression, zcl.Diagnostics) { panic("ParseTemplate is not yet implemented") } diff --git a/zcl/zclsyntax/token.go b/zcl/zclsyntax/token.go index f163212..9a1cd3b 100644 --- a/zcl/zclsyntax/token.go +++ b/zcl/zclsyntax/token.go @@ -88,6 +88,11 @@ const ( TokenTabs TokenType = '␉' TokenInvalid TokenType = '�' TokenBadUTF8 TokenType = '💩' + + // TokenNil is a placeholder for when a token is required but none is + // available, e.g. when reporting errors. The scanner will never produce + // this as part of a token stream. + TokenNil TokenType = '\x00' ) func (t TokenType) GoString() string {