From 4bbfa6d6ab944367eb41ccafa15dabfa99f68b49 Mon Sep 17 00:00:00 2001 From: Martin Atkins Date: Sat, 20 May 2017 13:32:12 -0700 Subject: [PATCH] json: recovery behavior for the parser When using the parser to do static analysis and editor integrations, it's still useful to be able to get the incomplete AST resulting from a parse error. Callers that intend to use the returned value to take real actions (as opposed to just analysis) must check diags.HasError() to determine if the returned file can be considered valid. --- zcl/json/ast.go | 15 +++++ zcl/json/parser.go | 101 ++++++++++++++++++++++++++++----- zcl/json/parser_test.go | 121 +++++++++++++++++++++++++++++++++------- zcl/json/public.go | 44 ++++++++++----- zcl/json/public_test.go | 10 +++- zcl/json/scanner.go | 3 +- 6 files changed, 244 insertions(+), 50 deletions(-) diff --git a/zcl/json/ast.go b/zcl/json/ast.go index 88638ea..dab1a98 100644 --- a/zcl/json/ast.go +++ b/zcl/json/ast.go @@ -103,3 +103,18 @@ func (n *nullVal) Range() zcl.Range { func (n *nullVal) StartRange() zcl.Range { return n.SrcRange } + +// invalidVal is used as a placeholder where a value is needed for a valid +// parse tree but the input was invalid enough to prevent one from being +// created. +type invalidVal struct { + SrcRange zcl.Range +} + +func (n invalidVal) Range() zcl.Range { + return n.SrcRange +} + +func (n invalidVal) StartRange() zcl.Range { + return n.SrcRange +} diff --git a/zcl/json/parser.go b/zcl/json/parser.go index 7c0981f..01cd56a 100644 --- a/zcl/json/parser.go +++ b/zcl/json/parser.go @@ -33,53 +33,60 @@ func parseFileContent(buf []byte, filename string) (node, zcl.Diagnostics) { func parseValue(p *peeker) (node, zcl.Diagnostics) { tok := p.Peek() + wrapInvalid := func(n node, diags zcl.Diagnostics) (node, zcl.Diagnostics) { + if n != nil { + return n, diags + } + return invalidVal{tok.Range}, diags + } + switch tok.Type { case tokenBraceO: - return parseObject(p) + return wrapInvalid(parseObject(p)) case tokenBrackO: - return parseArray(p) + return wrapInvalid(parseArray(p)) case tokenNumber: - return parseNumber(p) + return wrapInvalid(parseNumber(p)) case tokenString: - return parseString(p) + return wrapInvalid(parseString(p)) case tokenKeyword: - return parseKeyword(p) + return wrapInvalid(parseKeyword(p)) case tokenBraceC: - return nil, zcl.Diagnostics{ + return wrapInvalid(nil, zcl.Diagnostics{ { Severity: zcl.DiagError, Summary: "Missing attribute value", Detail: "A JSON value must start with a brace, a bracket, a number, a string, or a keyword.", Subject: &tok.Range, }, - } + }) case tokenBrackC: - return nil, zcl.Diagnostics{ + return wrapInvalid(nil, zcl.Diagnostics{ { Severity: zcl.DiagError, Summary: "Missing array element value", Detail: "A JSON value must start with a brace, a bracket, a number, a string, or a keyword.", Subject: &tok.Range, }, - } + }) case tokenEOF: - return nil, zcl.Diagnostics{ + return wrapInvalid(nil, zcl.Diagnostics{ { Severity: zcl.DiagError, Summary: "Missing value", Detail: "The JSON data ends prematurely.", Subject: &tok.Range, }, - } + }) default: - return nil, zcl.Diagnostics{ + return wrapInvalid(nil, zcl.Diagnostics{ { Severity: zcl.DiagError, Summary: "Invalid start of value", Detail: "A JSON value must start with a brace, a bracket, a number, a string, or a keyword.", Subject: &tok.Range, }, - } + }) } } @@ -98,6 +105,29 @@ func parseObject(p *peeker) (node, zcl.Diagnostics) { open := p.Read() attrs := map[string]*objectAttr{} + // recover is used to shift the peeker to what seems to be the end of + // our object, so that when we encounter an error we leave the peeker + // at a reasonable point in the token stream to continue parsing. + recover := func(tok token) { + open := 1 + for { + switch tok.Type { + case tokenBraceO: + open++ + case tokenBraceC: + open-- + if open <= 1 { + return + } + case tokenEOF: + // Ran out of source before we were able to recover, + // so we'll bail here and let the caller deal with it. + return + } + tok = p.Read() + } + } + Token: for { if p.Peek().Type == tokenBraceC { @@ -124,9 +154,11 @@ Token: colon := p.Read() if colon.Type != tokenColon { + recover(colon) + if colon.Type == tokenBraceC || colon.Type == tokenComma { // Catch common mistake of using braces instead of brackets - // for an array. + // for an object. return nil, diags.Append(&zcl.Diagnostic{ Severity: zcl.DiagError, Summary: "Missing object value", @@ -135,6 +167,16 @@ Token: }) } + if colon.Type == tokenEquals { + // Possible confusion with native zcl syntax. + return nil, diags.Append(&zcl.Diagnostic{ + Severity: zcl.DiagError, + Summary: "Missing attribute value colon", + Detail: "JSON uses a colon as its name/value delimiter, not an equals sign.", + Subject: &colon.Range, + }) + } + return nil, diags.Append(&zcl.Diagnostic{ Severity: zcl.DiagError, Summary: "Missing attribute value colon", @@ -189,6 +231,9 @@ Token: Subject: &open.Range, }) case tokenBrackC: + // Consume the bracket anyway, so that we don't return with the peeker + // at a strange place. + p.Read() return nil, diags.Append(&zcl.Diagnostic{ Severity: zcl.DiagError, Summary: "Mismatched braces", @@ -198,6 +243,7 @@ Token: case tokenBraceC: break Token default: + recover(p.Read()) return nil, diags.Append(&zcl.Diagnostic{ Severity: zcl.DiagError, Summary: "Missing attribute seperator comma", @@ -222,6 +268,29 @@ func parseArray(p *peeker) (node, zcl.Diagnostics) { open := p.Read() vals := []node{} + // recover is used to shift the peeker to what seems to be the end of + // our array, so that when we encounter an error we leave the peeker + // at a reasonable point in the token stream to continue parsing. + recover := func(tok token) { + open := 1 + for { + switch tok.Type { + case tokenBrackO: + open++ + case tokenBrackC: + open-- + if open <= 1 { + return + } + case tokenEOF: + // Ran out of source before we were able to recover, + // so we'll bail here and let the caller deal with it. + return + } + tok = p.Read() + } + } + Token: for { if p.Peek().Type == tokenBrackC { @@ -250,6 +319,7 @@ Token: } continue Token case tokenColon: + recover(p.Read()) return nil, diags.Append(&zcl.Diagnostic{ Severity: zcl.DiagError, Summary: "Invalid array value", @@ -257,6 +327,7 @@ Token: Subject: p.Peek().Range.Ptr(), }) case tokenEOF: + recover(p.Read()) return nil, diags.Append(&zcl.Diagnostic{ Severity: zcl.DiagError, Summary: "Unclosed object", @@ -264,6 +335,7 @@ Token: Subject: &open.Range, }) case tokenBraceC: + recover(p.Read()) return nil, diags.Append(&zcl.Diagnostic{ Severity: zcl.DiagError, Summary: "Mismatched brackets", @@ -273,6 +345,7 @@ Token: case tokenBrackC: break Token default: + recover(p.Read()) return nil, diags.Append(&zcl.Diagnostic{ Severity: zcl.DiagError, Summary: "Missing attribute seperator comma", diff --git a/zcl/json/parser_test.go b/zcl/json/parser_test.go index 62e5fd0..fdfd944 100644 --- a/zcl/json/parser_test.go +++ b/zcl/json/parser_test.go @@ -50,12 +50,18 @@ func TestParse(t *testing.T) { }, { `undefined`, - nil, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 10, Byte: 9}, + }}, 1, }, { `flase`, - nil, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 6, Byte: 5}, + }}, 1, }, { @@ -104,12 +110,18 @@ func TestParse(t *testing.T) { }, { `"hello`, - nil, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 7, Byte: 6}, + }}, 1, }, { `"he\llo"`, - nil, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 9, Byte: 8}, + }}, 1, }, { @@ -180,12 +192,18 @@ func TestParse(t *testing.T) { }, { `.1`, - nil, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 3, Byte: 2}, + }}, 1, }, { `+2`, - nil, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 3, Byte: 2}, + }}, 1, }, { @@ -292,22 +310,34 @@ func TestParse(t *testing.T) { }, { `{"hello":true`, - nil, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 2, Byte: 1}, + }}, 1, }, { `{"hello":true]`, - nil, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 2, Byte: 1}, + }}, 1, }, { `{"hello":true,}`, - nil, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 2, Byte: 1}, + }}, 1, }, { `{true:false}`, - nil, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 2, Byte: 1}, + }}, 1, }, { @@ -342,12 +372,18 @@ func TestParse(t *testing.T) { }, { `{"hello": true, "hello": true, "hello", true}`, - nil, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 2, Byte: 1}, + }}, 2, }, { `{"hello", "world"}`, - nil, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 2, Byte: 1}, + }}, 1, }, { @@ -447,37 +483,82 @@ func TestParse(t *testing.T) { }, { `[`, - nil, - 1, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 2, Byte: 1}, + }}, + 2, }, { `[true`, - nil, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 2, Byte: 1}, + }}, 1, }, { `]`, - nil, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 2, Byte: 1}, + }}, 1, }, { `[true,]`, - nil, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 2, Byte: 1}, + }}, 1, }, { `[[],]`, - nil, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 2, Byte: 1}, + }}, 1, }, { `["hello":true]`, - nil, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 2, Byte: 1}, + }}, 1, }, { `[true}`, - nil, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 2, Byte: 1}, + }}, + 1, + }, + { + `{"wrong"=true}`, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 2, Byte: 1}, + }}, + 1, + }, + { + `{"wrong" = true}`, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 2, Byte: 1}, + }}, + 1, + }, + { + `{"wrong" true}`, + invalidVal{zcl.Range{ + Start: zcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: zcl.Pos{Line: 1, Column: 2, Byte: 1}, + }}, 1, }, } diff --git a/zcl/json/public.go b/zcl/json/public.go index 09b17b2..894d16f 100644 --- a/zcl/json/public.go +++ b/zcl/json/public.go @@ -14,28 +14,46 @@ import ( // This is not a generic JSON parser. Instead, it deals only with the profile // of JSON used to express zcl configuration. // -// The returned file is valid if it is non-nil, regardless of whether the -// diagnostics are also non-nil. If both are returned, the diagnostics should -// still be presented to the user because they may contain warnings. +// The returned file is valid only if the returned diagnostics returns false +// from its HasErrors method. If HasErrors returns true, the file represents +// the subset of data that was able to be parsed, which may be none. func Parse(src []byte, filename string) (*zcl.File, zcl.Diagnostics) { - var file *zcl.File rootNode, diags := parseFileContent(src, filename) if _, ok := rootNode.(*objectVal); !ok { - return nil, diags.Append(&zcl.Diagnostic{ + diags = diags.Append(&zcl.Diagnostic{ Severity: zcl.DiagError, Summary: "Root value must be object", Detail: "The root value in a JSON-based configuration must be a JSON object.", Subject: rootNode.StartRange().Ptr(), }) - } - if rootNode != nil { - file = &zcl.File{ - Body: &body{ - obj: rootNode.(*objectVal), - }, - Bytes: src, - Nav: navigation{rootNode.(*objectVal)}, + // Put in a placeholder objectVal just so the caller always gets + // a valid file, even if it appears empty. This is useful for callers + // that are doing static analysis of possibly-erroneous source code, + // which will try to process the returned file even if we return + // diagnostics of severity error. This way, they'll get a file that + // has an empty body rather than a body that panics when probed. + fakePos := zcl.Pos{ + Byte: 0, + Line: 1, + Column: 1, } + fakeRange := zcl.Range{ + Filename: filename, + Start: fakePos, + End: fakePos, + } + rootNode = &objectVal{ + Attrs: map[string]*objectAttr{}, + SrcRange: fakeRange, + OpenRange: fakeRange, + } + } + file := &zcl.File{ + Body: &body{ + obj: rootNode.(*objectVal), + }, + Bytes: src, + Nav: navigation{rootNode.(*objectVal)}, } return file, diags } diff --git a/zcl/json/public_test.go b/zcl/json/public_test.go index 4949764..ef6fa8c 100644 --- a/zcl/json/public_test.go +++ b/zcl/json/public_test.go @@ -10,7 +10,13 @@ func TestParse_nonObject(t *testing.T) { if len(diags) != 1 { t.Errorf("got %d diagnostics; want 1", len(diags)) } - if file != nil { - t.Errorf("got non-nil File; want nil") + if file == nil { + t.Errorf("got nil File; want actual file") + } + if file.Body == nil { + t.Fatalf("got nil Body; want actual body") + } + if file.Body.(*body).obj == nil { + t.Errorf("got nil Body object; want placeholder object") } } diff --git a/zcl/json/scanner.go b/zcl/json/scanner.go index 7367e2f..58e316c 100644 --- a/zcl/json/scanner.go +++ b/zcl/json/scanner.go @@ -21,6 +21,7 @@ const ( tokenNumber tokenType = 'N' tokenEOF tokenType = '␄' tokenInvalid tokenType = 0 + tokenEquals tokenType = '=' // used only for reminding the user of JSON syntax ) type token struct { @@ -64,7 +65,7 @@ func scan(buf []byte, start pos) []token { first := buf[0] switch { - case first == '{' || first == '}' || first == '[' || first == ']' || first == ',' || first == ':': + case first == '{' || first == '}' || first == '[' || first == ']' || first == ',' || first == ':' || first == '=': p.Pos.Column++ p.Pos.Byte++ tokens = append(tokens, token{