json: recovery behavior for the parser

When using the parser to do static analysis and editor integrations, it's
still useful to be able to get the incomplete AST resulting from a parse
error.

Callers that intend to use the returned value to take real actions (as
opposed to just analysis) must check diags.HasError() to determine if
the returned file can be considered valid.
This commit is contained in:
Martin Atkins 2017-05-20 13:32:12 -07:00
parent 341223612b
commit 4bbfa6d6ab
6 changed files with 244 additions and 50 deletions

View File

@ -103,3 +103,18 @@ func (n *nullVal) Range() zcl.Range {
func (n *nullVal) StartRange() zcl.Range {
return n.SrcRange
}
// invalidVal is used as a placeholder where a value is needed for a valid
// parse tree but the input was invalid enough to prevent one from being
// created.
type invalidVal struct {
SrcRange zcl.Range
}
func (n invalidVal) Range() zcl.Range {
return n.SrcRange
}
func (n invalidVal) StartRange() zcl.Range {
return n.SrcRange
}

View File

@ -33,53 +33,60 @@ func parseFileContent(buf []byte, filename string) (node, zcl.Diagnostics) {
func parseValue(p *peeker) (node, zcl.Diagnostics) {
tok := p.Peek()
wrapInvalid := func(n node, diags zcl.Diagnostics) (node, zcl.Diagnostics) {
if n != nil {
return n, diags
}
return invalidVal{tok.Range}, diags
}
switch tok.Type {
case tokenBraceO:
return parseObject(p)
return wrapInvalid(parseObject(p))
case tokenBrackO:
return parseArray(p)
return wrapInvalid(parseArray(p))
case tokenNumber:
return parseNumber(p)
return wrapInvalid(parseNumber(p))
case tokenString:
return parseString(p)
return wrapInvalid(parseString(p))
case tokenKeyword:
return parseKeyword(p)
return wrapInvalid(parseKeyword(p))
case tokenBraceC:
return nil, zcl.Diagnostics{
return wrapInvalid(nil, zcl.Diagnostics{
{
Severity: zcl.DiagError,
Summary: "Missing attribute value",
Detail: "A JSON value must start with a brace, a bracket, a number, a string, or a keyword.",
Subject: &tok.Range,
},
}
})
case tokenBrackC:
return nil, zcl.Diagnostics{
return wrapInvalid(nil, zcl.Diagnostics{
{
Severity: zcl.DiagError,
Summary: "Missing array element value",
Detail: "A JSON value must start with a brace, a bracket, a number, a string, or a keyword.",
Subject: &tok.Range,
},
}
})
case tokenEOF:
return nil, zcl.Diagnostics{
return wrapInvalid(nil, zcl.Diagnostics{
{
Severity: zcl.DiagError,
Summary: "Missing value",
Detail: "The JSON data ends prematurely.",
Subject: &tok.Range,
},
}
})
default:
return nil, zcl.Diagnostics{
return wrapInvalid(nil, zcl.Diagnostics{
{
Severity: zcl.DiagError,
Summary: "Invalid start of value",
Detail: "A JSON value must start with a brace, a bracket, a number, a string, or a keyword.",
Subject: &tok.Range,
},
}
})
}
}
@ -98,6 +105,29 @@ func parseObject(p *peeker) (node, zcl.Diagnostics) {
open := p.Read()
attrs := map[string]*objectAttr{}
// recover is used to shift the peeker to what seems to be the end of
// our object, so that when we encounter an error we leave the peeker
// at a reasonable point in the token stream to continue parsing.
recover := func(tok token) {
open := 1
for {
switch tok.Type {
case tokenBraceO:
open++
case tokenBraceC:
open--
if open <= 1 {
return
}
case tokenEOF:
// Ran out of source before we were able to recover,
// so we'll bail here and let the caller deal with it.
return
}
tok = p.Read()
}
}
Token:
for {
if p.Peek().Type == tokenBraceC {
@ -124,9 +154,11 @@ Token:
colon := p.Read()
if colon.Type != tokenColon {
recover(colon)
if colon.Type == tokenBraceC || colon.Type == tokenComma {
// Catch common mistake of using braces instead of brackets
// for an array.
// for an object.
return nil, diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError,
Summary: "Missing object value",
@ -135,6 +167,16 @@ Token:
})
}
if colon.Type == tokenEquals {
// Possible confusion with native zcl syntax.
return nil, diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError,
Summary: "Missing attribute value colon",
Detail: "JSON uses a colon as its name/value delimiter, not an equals sign.",
Subject: &colon.Range,
})
}
return nil, diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError,
Summary: "Missing attribute value colon",
@ -189,6 +231,9 @@ Token:
Subject: &open.Range,
})
case tokenBrackC:
// Consume the bracket anyway, so that we don't return with the peeker
// at a strange place.
p.Read()
return nil, diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError,
Summary: "Mismatched braces",
@ -198,6 +243,7 @@ Token:
case tokenBraceC:
break Token
default:
recover(p.Read())
return nil, diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError,
Summary: "Missing attribute seperator comma",
@ -222,6 +268,29 @@ func parseArray(p *peeker) (node, zcl.Diagnostics) {
open := p.Read()
vals := []node{}
// recover is used to shift the peeker to what seems to be the end of
// our array, so that when we encounter an error we leave the peeker
// at a reasonable point in the token stream to continue parsing.
recover := func(tok token) {
open := 1
for {
switch tok.Type {
case tokenBrackO:
open++
case tokenBrackC:
open--
if open <= 1 {
return
}
case tokenEOF:
// Ran out of source before we were able to recover,
// so we'll bail here and let the caller deal with it.
return
}
tok = p.Read()
}
}
Token:
for {
if p.Peek().Type == tokenBrackC {
@ -250,6 +319,7 @@ Token:
}
continue Token
case tokenColon:
recover(p.Read())
return nil, diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError,
Summary: "Invalid array value",
@ -257,6 +327,7 @@ Token:
Subject: p.Peek().Range.Ptr(),
})
case tokenEOF:
recover(p.Read())
return nil, diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError,
Summary: "Unclosed object",
@ -264,6 +335,7 @@ Token:
Subject: &open.Range,
})
case tokenBraceC:
recover(p.Read())
return nil, diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError,
Summary: "Mismatched brackets",
@ -273,6 +345,7 @@ Token:
case tokenBrackC:
break Token
default:
recover(p.Read())
return nil, diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError,
Summary: "Missing attribute seperator comma",

View File

@ -50,12 +50,18 @@ func TestParse(t *testing.T) {
},
{
`undefined`,
nil,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 10, Byte: 9},
}},
1,
},
{
`flase`,
nil,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 6, Byte: 5},
}},
1,
},
{
@ -104,12 +110,18 @@ func TestParse(t *testing.T) {
},
{
`"hello`,
nil,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 7, Byte: 6},
}},
1,
},
{
`"he\llo"`,
nil,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 9, Byte: 8},
}},
1,
},
{
@ -180,12 +192,18 @@ func TestParse(t *testing.T) {
},
{
`.1`,
nil,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 3, Byte: 2},
}},
1,
},
{
`+2`,
nil,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 3, Byte: 2},
}},
1,
},
{
@ -292,22 +310,34 @@ func TestParse(t *testing.T) {
},
{
`{"hello":true`,
nil,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1,
},
{
`{"hello":true]`,
nil,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1,
},
{
`{"hello":true,}`,
nil,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1,
},
{
`{true:false}`,
nil,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1,
},
{
@ -342,12 +372,18 @@ func TestParse(t *testing.T) {
},
{
`{"hello": true, "hello": true, "hello", true}`,
nil,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
2,
},
{
`{"hello", "world"}`,
nil,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1,
},
{
@ -447,37 +483,82 @@ func TestParse(t *testing.T) {
},
{
`[`,
nil,
1,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
2,
},
{
`[true`,
nil,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1,
},
{
`]`,
nil,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1,
},
{
`[true,]`,
nil,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1,
},
{
`[[],]`,
nil,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1,
},
{
`["hello":true]`,
nil,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1,
},
{
`[true}`,
nil,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1,
},
{
`{"wrong"=true}`,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1,
},
{
`{"wrong" = true}`,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1,
},
{
`{"wrong" true}`,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1,
},
}

View File

@ -14,28 +14,46 @@ import (
// This is not a generic JSON parser. Instead, it deals only with the profile
// of JSON used to express zcl configuration.
//
// The returned file is valid if it is non-nil, regardless of whether the
// diagnostics are also non-nil. If both are returned, the diagnostics should
// still be presented to the user because they may contain warnings.
// The returned file is valid only if the returned diagnostics returns false
// from its HasErrors method. If HasErrors returns true, the file represents
// the subset of data that was able to be parsed, which may be none.
func Parse(src []byte, filename string) (*zcl.File, zcl.Diagnostics) {
var file *zcl.File
rootNode, diags := parseFileContent(src, filename)
if _, ok := rootNode.(*objectVal); !ok {
return nil, diags.Append(&zcl.Diagnostic{
diags = diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError,
Summary: "Root value must be object",
Detail: "The root value in a JSON-based configuration must be a JSON object.",
Subject: rootNode.StartRange().Ptr(),
})
}
if rootNode != nil {
file = &zcl.File{
Body: &body{
obj: rootNode.(*objectVal),
},
Bytes: src,
Nav: navigation{rootNode.(*objectVal)},
// Put in a placeholder objectVal just so the caller always gets
// a valid file, even if it appears empty. This is useful for callers
// that are doing static analysis of possibly-erroneous source code,
// which will try to process the returned file even if we return
// diagnostics of severity error. This way, they'll get a file that
// has an empty body rather than a body that panics when probed.
fakePos := zcl.Pos{
Byte: 0,
Line: 1,
Column: 1,
}
fakeRange := zcl.Range{
Filename: filename,
Start: fakePos,
End: fakePos,
}
rootNode = &objectVal{
Attrs: map[string]*objectAttr{},
SrcRange: fakeRange,
OpenRange: fakeRange,
}
}
file := &zcl.File{
Body: &body{
obj: rootNode.(*objectVal),
},
Bytes: src,
Nav: navigation{rootNode.(*objectVal)},
}
return file, diags
}

View File

@ -10,7 +10,13 @@ func TestParse_nonObject(t *testing.T) {
if len(diags) != 1 {
t.Errorf("got %d diagnostics; want 1", len(diags))
}
if file != nil {
t.Errorf("got non-nil File; want nil")
if file == nil {
t.Errorf("got nil File; want actual file")
}
if file.Body == nil {
t.Fatalf("got nil Body; want actual body")
}
if file.Body.(*body).obj == nil {
t.Errorf("got nil Body object; want placeholder object")
}
}

View File

@ -21,6 +21,7 @@ const (
tokenNumber tokenType = 'N'
tokenEOF tokenType = '␄'
tokenInvalid tokenType = 0
tokenEquals tokenType = '=' // used only for reminding the user of JSON syntax
)
type token struct {
@ -64,7 +65,7 @@ func scan(buf []byte, start pos) []token {
first := buf[0]
switch {
case first == '{' || first == '}' || first == '[' || first == ']' || first == ',' || first == ':':
case first == '{' || first == '}' || first == '[' || first == ']' || first == ',' || first == ':' || first == '=':
p.Pos.Column++
p.Pos.Byte++
tokens = append(tokens, token{