json: recovery behavior for the parser

When using the parser to do static analysis and editor integrations, it's
still useful to be able to get the incomplete AST resulting from a parse
error.

Callers that intend to use the returned value to take real actions (as
opposed to just analysis) must check diags.HasError() to determine if
the returned file can be considered valid.
This commit is contained in:
Martin Atkins 2017-05-20 13:32:12 -07:00
parent 341223612b
commit 4bbfa6d6ab
6 changed files with 244 additions and 50 deletions

View File

@ -103,3 +103,18 @@ func (n *nullVal) Range() zcl.Range {
func (n *nullVal) StartRange() zcl.Range { func (n *nullVal) StartRange() zcl.Range {
return n.SrcRange return n.SrcRange
} }
// invalidVal is used as a placeholder where a value is needed for a valid
// parse tree but the input was invalid enough to prevent one from being
// created.
type invalidVal struct {
SrcRange zcl.Range
}
func (n invalidVal) Range() zcl.Range {
return n.SrcRange
}
func (n invalidVal) StartRange() zcl.Range {
return n.SrcRange
}

View File

@ -33,53 +33,60 @@ func parseFileContent(buf []byte, filename string) (node, zcl.Diagnostics) {
func parseValue(p *peeker) (node, zcl.Diagnostics) { func parseValue(p *peeker) (node, zcl.Diagnostics) {
tok := p.Peek() tok := p.Peek()
wrapInvalid := func(n node, diags zcl.Diagnostics) (node, zcl.Diagnostics) {
if n != nil {
return n, diags
}
return invalidVal{tok.Range}, diags
}
switch tok.Type { switch tok.Type {
case tokenBraceO: case tokenBraceO:
return parseObject(p) return wrapInvalid(parseObject(p))
case tokenBrackO: case tokenBrackO:
return parseArray(p) return wrapInvalid(parseArray(p))
case tokenNumber: case tokenNumber:
return parseNumber(p) return wrapInvalid(parseNumber(p))
case tokenString: case tokenString:
return parseString(p) return wrapInvalid(parseString(p))
case tokenKeyword: case tokenKeyword:
return parseKeyword(p) return wrapInvalid(parseKeyword(p))
case tokenBraceC: case tokenBraceC:
return nil, zcl.Diagnostics{ return wrapInvalid(nil, zcl.Diagnostics{
{ {
Severity: zcl.DiagError, Severity: zcl.DiagError,
Summary: "Missing attribute value", Summary: "Missing attribute value",
Detail: "A JSON value must start with a brace, a bracket, a number, a string, or a keyword.", Detail: "A JSON value must start with a brace, a bracket, a number, a string, or a keyword.",
Subject: &tok.Range, Subject: &tok.Range,
}, },
} })
case tokenBrackC: case tokenBrackC:
return nil, zcl.Diagnostics{ return wrapInvalid(nil, zcl.Diagnostics{
{ {
Severity: zcl.DiagError, Severity: zcl.DiagError,
Summary: "Missing array element value", Summary: "Missing array element value",
Detail: "A JSON value must start with a brace, a bracket, a number, a string, or a keyword.", Detail: "A JSON value must start with a brace, a bracket, a number, a string, or a keyword.",
Subject: &tok.Range, Subject: &tok.Range,
}, },
} })
case tokenEOF: case tokenEOF:
return nil, zcl.Diagnostics{ return wrapInvalid(nil, zcl.Diagnostics{
{ {
Severity: zcl.DiagError, Severity: zcl.DiagError,
Summary: "Missing value", Summary: "Missing value",
Detail: "The JSON data ends prematurely.", Detail: "The JSON data ends prematurely.",
Subject: &tok.Range, Subject: &tok.Range,
}, },
} })
default: default:
return nil, zcl.Diagnostics{ return wrapInvalid(nil, zcl.Diagnostics{
{ {
Severity: zcl.DiagError, Severity: zcl.DiagError,
Summary: "Invalid start of value", Summary: "Invalid start of value",
Detail: "A JSON value must start with a brace, a bracket, a number, a string, or a keyword.", Detail: "A JSON value must start with a brace, a bracket, a number, a string, or a keyword.",
Subject: &tok.Range, Subject: &tok.Range,
}, },
} })
} }
} }
@ -98,6 +105,29 @@ func parseObject(p *peeker) (node, zcl.Diagnostics) {
open := p.Read() open := p.Read()
attrs := map[string]*objectAttr{} attrs := map[string]*objectAttr{}
// recover is used to shift the peeker to what seems to be the end of
// our object, so that when we encounter an error we leave the peeker
// at a reasonable point in the token stream to continue parsing.
recover := func(tok token) {
open := 1
for {
switch tok.Type {
case tokenBraceO:
open++
case tokenBraceC:
open--
if open <= 1 {
return
}
case tokenEOF:
// Ran out of source before we were able to recover,
// so we'll bail here and let the caller deal with it.
return
}
tok = p.Read()
}
}
Token: Token:
for { for {
if p.Peek().Type == tokenBraceC { if p.Peek().Type == tokenBraceC {
@ -124,9 +154,11 @@ Token:
colon := p.Read() colon := p.Read()
if colon.Type != tokenColon { if colon.Type != tokenColon {
recover(colon)
if colon.Type == tokenBraceC || colon.Type == tokenComma { if colon.Type == tokenBraceC || colon.Type == tokenComma {
// Catch common mistake of using braces instead of brackets // Catch common mistake of using braces instead of brackets
// for an array. // for an object.
return nil, diags.Append(&zcl.Diagnostic{ return nil, diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError, Severity: zcl.DiagError,
Summary: "Missing object value", Summary: "Missing object value",
@ -135,6 +167,16 @@ Token:
}) })
} }
if colon.Type == tokenEquals {
// Possible confusion with native zcl syntax.
return nil, diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError,
Summary: "Missing attribute value colon",
Detail: "JSON uses a colon as its name/value delimiter, not an equals sign.",
Subject: &colon.Range,
})
}
return nil, diags.Append(&zcl.Diagnostic{ return nil, diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError, Severity: zcl.DiagError,
Summary: "Missing attribute value colon", Summary: "Missing attribute value colon",
@ -189,6 +231,9 @@ Token:
Subject: &open.Range, Subject: &open.Range,
}) })
case tokenBrackC: case tokenBrackC:
// Consume the bracket anyway, so that we don't return with the peeker
// at a strange place.
p.Read()
return nil, diags.Append(&zcl.Diagnostic{ return nil, diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError, Severity: zcl.DiagError,
Summary: "Mismatched braces", Summary: "Mismatched braces",
@ -198,6 +243,7 @@ Token:
case tokenBraceC: case tokenBraceC:
break Token break Token
default: default:
recover(p.Read())
return nil, diags.Append(&zcl.Diagnostic{ return nil, diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError, Severity: zcl.DiagError,
Summary: "Missing attribute seperator comma", Summary: "Missing attribute seperator comma",
@ -222,6 +268,29 @@ func parseArray(p *peeker) (node, zcl.Diagnostics) {
open := p.Read() open := p.Read()
vals := []node{} vals := []node{}
// recover is used to shift the peeker to what seems to be the end of
// our array, so that when we encounter an error we leave the peeker
// at a reasonable point in the token stream to continue parsing.
recover := func(tok token) {
open := 1
for {
switch tok.Type {
case tokenBrackO:
open++
case tokenBrackC:
open--
if open <= 1 {
return
}
case tokenEOF:
// Ran out of source before we were able to recover,
// so we'll bail here and let the caller deal with it.
return
}
tok = p.Read()
}
}
Token: Token:
for { for {
if p.Peek().Type == tokenBrackC { if p.Peek().Type == tokenBrackC {
@ -250,6 +319,7 @@ Token:
} }
continue Token continue Token
case tokenColon: case tokenColon:
recover(p.Read())
return nil, diags.Append(&zcl.Diagnostic{ return nil, diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError, Severity: zcl.DiagError,
Summary: "Invalid array value", Summary: "Invalid array value",
@ -257,6 +327,7 @@ Token:
Subject: p.Peek().Range.Ptr(), Subject: p.Peek().Range.Ptr(),
}) })
case tokenEOF: case tokenEOF:
recover(p.Read())
return nil, diags.Append(&zcl.Diagnostic{ return nil, diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError, Severity: zcl.DiagError,
Summary: "Unclosed object", Summary: "Unclosed object",
@ -264,6 +335,7 @@ Token:
Subject: &open.Range, Subject: &open.Range,
}) })
case tokenBraceC: case tokenBraceC:
recover(p.Read())
return nil, diags.Append(&zcl.Diagnostic{ return nil, diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError, Severity: zcl.DiagError,
Summary: "Mismatched brackets", Summary: "Mismatched brackets",
@ -273,6 +345,7 @@ Token:
case tokenBrackC: case tokenBrackC:
break Token break Token
default: default:
recover(p.Read())
return nil, diags.Append(&zcl.Diagnostic{ return nil, diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError, Severity: zcl.DiagError,
Summary: "Missing attribute seperator comma", Summary: "Missing attribute seperator comma",

View File

@ -50,12 +50,18 @@ func TestParse(t *testing.T) {
}, },
{ {
`undefined`, `undefined`,
nil, invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 10, Byte: 9},
}},
1, 1,
}, },
{ {
`flase`, `flase`,
nil, invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 6, Byte: 5},
}},
1, 1,
}, },
{ {
@ -104,12 +110,18 @@ func TestParse(t *testing.T) {
}, },
{ {
`"hello`, `"hello`,
nil, invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 7, Byte: 6},
}},
1, 1,
}, },
{ {
`"he\llo"`, `"he\llo"`,
nil, invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 9, Byte: 8},
}},
1, 1,
}, },
{ {
@ -180,12 +192,18 @@ func TestParse(t *testing.T) {
}, },
{ {
`.1`, `.1`,
nil, invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 3, Byte: 2},
}},
1, 1,
}, },
{ {
`+2`, `+2`,
nil, invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 3, Byte: 2},
}},
1, 1,
}, },
{ {
@ -292,22 +310,34 @@ func TestParse(t *testing.T) {
}, },
{ {
`{"hello":true`, `{"hello":true`,
nil, invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1, 1,
}, },
{ {
`{"hello":true]`, `{"hello":true]`,
nil, invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1, 1,
}, },
{ {
`{"hello":true,}`, `{"hello":true,}`,
nil, invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1, 1,
}, },
{ {
`{true:false}`, `{true:false}`,
nil, invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1, 1,
}, },
{ {
@ -342,12 +372,18 @@ func TestParse(t *testing.T) {
}, },
{ {
`{"hello": true, "hello": true, "hello", true}`, `{"hello": true, "hello": true, "hello", true}`,
nil, invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
2, 2,
}, },
{ {
`{"hello", "world"}`, `{"hello", "world"}`,
nil, invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1, 1,
}, },
{ {
@ -447,37 +483,82 @@ func TestParse(t *testing.T) {
}, },
{ {
`[`, `[`,
nil, invalidVal{zcl.Range{
1, Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
2,
}, },
{ {
`[true`, `[true`,
nil, invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1, 1,
}, },
{ {
`]`, `]`,
nil, invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1, 1,
}, },
{ {
`[true,]`, `[true,]`,
nil, invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1, 1,
}, },
{ {
`[[],]`, `[[],]`,
nil, invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1, 1,
}, },
{ {
`["hello":true]`, `["hello":true]`,
nil, invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1, 1,
}, },
{ {
`[true}`, `[true}`,
nil, invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1,
},
{
`{"wrong"=true}`,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1,
},
{
`{"wrong" = true}`,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1,
},
{
`{"wrong" true}`,
invalidVal{zcl.Range{
Start: zcl.Pos{Line: 1, Column: 1, Byte: 0},
End: zcl.Pos{Line: 1, Column: 2, Byte: 1},
}},
1, 1,
}, },
} }

View File

@ -14,28 +14,46 @@ import (
// This is not a generic JSON parser. Instead, it deals only with the profile // This is not a generic JSON parser. Instead, it deals only with the profile
// of JSON used to express zcl configuration. // of JSON used to express zcl configuration.
// //
// The returned file is valid if it is non-nil, regardless of whether the // The returned file is valid only if the returned diagnostics returns false
// diagnostics are also non-nil. If both are returned, the diagnostics should // from its HasErrors method. If HasErrors returns true, the file represents
// still be presented to the user because they may contain warnings. // the subset of data that was able to be parsed, which may be none.
func Parse(src []byte, filename string) (*zcl.File, zcl.Diagnostics) { func Parse(src []byte, filename string) (*zcl.File, zcl.Diagnostics) {
var file *zcl.File
rootNode, diags := parseFileContent(src, filename) rootNode, diags := parseFileContent(src, filename)
if _, ok := rootNode.(*objectVal); !ok { if _, ok := rootNode.(*objectVal); !ok {
return nil, diags.Append(&zcl.Diagnostic{ diags = diags.Append(&zcl.Diagnostic{
Severity: zcl.DiagError, Severity: zcl.DiagError,
Summary: "Root value must be object", Summary: "Root value must be object",
Detail: "The root value in a JSON-based configuration must be a JSON object.", Detail: "The root value in a JSON-based configuration must be a JSON object.",
Subject: rootNode.StartRange().Ptr(), Subject: rootNode.StartRange().Ptr(),
}) })
} // Put in a placeholder objectVal just so the caller always gets
if rootNode != nil { // a valid file, even if it appears empty. This is useful for callers
file = &zcl.File{ // that are doing static analysis of possibly-erroneous source code,
Body: &body{ // which will try to process the returned file even if we return
obj: rootNode.(*objectVal), // diagnostics of severity error. This way, they'll get a file that
}, // has an empty body rather than a body that panics when probed.
Bytes: src, fakePos := zcl.Pos{
Nav: navigation{rootNode.(*objectVal)}, Byte: 0,
Line: 1,
Column: 1,
} }
fakeRange := zcl.Range{
Filename: filename,
Start: fakePos,
End: fakePos,
}
rootNode = &objectVal{
Attrs: map[string]*objectAttr{},
SrcRange: fakeRange,
OpenRange: fakeRange,
}
}
file := &zcl.File{
Body: &body{
obj: rootNode.(*objectVal),
},
Bytes: src,
Nav: navigation{rootNode.(*objectVal)},
} }
return file, diags return file, diags
} }

View File

@ -10,7 +10,13 @@ func TestParse_nonObject(t *testing.T) {
if len(diags) != 1 { if len(diags) != 1 {
t.Errorf("got %d diagnostics; want 1", len(diags)) t.Errorf("got %d diagnostics; want 1", len(diags))
} }
if file != nil { if file == nil {
t.Errorf("got non-nil File; want nil") t.Errorf("got nil File; want actual file")
}
if file.Body == nil {
t.Fatalf("got nil Body; want actual body")
}
if file.Body.(*body).obj == nil {
t.Errorf("got nil Body object; want placeholder object")
} }
} }

View File

@ -21,6 +21,7 @@ const (
tokenNumber tokenType = 'N' tokenNumber tokenType = 'N'
tokenEOF tokenType = '␄' tokenEOF tokenType = '␄'
tokenInvalid tokenType = 0 tokenInvalid tokenType = 0
tokenEquals tokenType = '=' // used only for reminding the user of JSON syntax
) )
type token struct { type token struct {
@ -64,7 +65,7 @@ func scan(buf []byte, start pos) []token {
first := buf[0] first := buf[0]
switch { switch {
case first == '{' || first == '}' || first == '[' || first == ']' || first == ',' || first == ':': case first == '{' || first == '}' || first == '[' || first == ']' || first == ',' || first == ':' || first == '=':
p.Pos.Column++ p.Pos.Column++
p.Pos.Byte++ p.Pos.Byte++
tokens = append(tokens, token{ tokens = append(tokens, token{