Skip to content

Commit 0baecbe

Browse files
authored
Make comma-list parsing really robust (#403)
This PR adds more diagnostics for comma-delimited list of types and expressions, and ensures that commas are optional pretty much everywhere for generating a valid AST, while at the same time diagnosing all missing and extraneous commas. This ensures a match with the behavior advertised in #393. Includes a test that exercises various corner cases. This PR also adds `Span.RuneRange` and `Span.Rune`, to aid in constructing suggestion spans.
1 parent b5374ec commit 0baecbe

File tree

12 files changed

+901
-30
lines changed

12 files changed

+901
-30
lines changed

experimental/ast/options.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ type Option struct {
4646
Value ExprAny
4747
}
4848

49+
// Span implements [report.Spanner].
50+
func (o Option) Span() report.Span {
51+
return report.Join(o.Path, o.Equals, o.Value)
52+
}
53+
4954
type rawOption struct {
5055
path rawPath
5156
equals token.ID

experimental/parser/diagnostics_internal.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ func (e errUnexpected) Diagnose(d *report.Diagnostic) {
4242
got := e.got
4343
if got == nil {
4444
got = taxa.Classify(e.what)
45+
if got == taxa.Unknown {
46+
got = "tokens"
47+
}
4548
}
4649

4750
var message report.DiagnosticOption

experimental/parser/parse_decl.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ type exprComma struct {
2828
comma token.Token
2929
}
3030

31+
func (e exprComma) Span() report.Span {
32+
return e.expr.Span()
33+
}
34+
3135
// parseDecl parses any Protobuf declaration.
3236
//
3337
// This function will always advance cursor if it is not empty.
@@ -291,6 +295,7 @@ func parseRange(p *parser, c *token.Cursor) ast.DeclRange {
291295
// is empty, or if the first comma occurs without seeing an =, we can choose
292296
// to parse this as an array, instead.
293297
if !canStartOptions(c.Peek()) {
298+
var last token.Token
294299
delimited[ast.ExprAny]{
295300
p: p, c: c,
296301
what: taxa.Expr,
@@ -299,11 +304,45 @@ func parseRange(p *parser, c *token.Cursor) ast.DeclRange {
299304
required: true,
300305
exhaust: false,
301306
parse: func(c *token.Cursor) (ast.ExprAny, bool) {
307+
last = c.Peek()
302308
expr := parseExpr(p, c, in.In())
303309
badExpr = expr.IsZero()
304310

305311
return expr, !expr.IsZero()
306312
},
313+
start: canStartExpr,
314+
stop: func(t token.Token) bool {
315+
if t.Text() == ";" || t.Text() == "[" {
316+
return true
317+
}
318+
319+
// After the first element, stop if we see an identifier
320+
// coming up. This is for a case like this:
321+
//
322+
// reserved 1, 2
323+
// message Foo {}
324+
//
325+
// If we don't do this, message will be interpreted as an
326+
// expression.
327+
if !last.IsZero() && t.Kind() == token.Ident {
328+
// However, this will cause
329+
//
330+
// reserved foo, bar baz;
331+
//
332+
// to treat baz as a new declaration, rather than assume a
333+
// missing comma. Distinguishing this case is tricky: the
334+
// cheapest option is to check whether a newline exists between
335+
// this token and the last position passed to parse.
336+
//
337+
// This case will not be hit for valid syntax, so it's ok
338+
// to do 2*O(log n) line lookups.
339+
prev := last.Span().EndLoc()
340+
next := t.Span().StartLoc()
341+
return prev.Line != next.Line
342+
}
343+
344+
return false
345+
},
307346
}.iter(func(expr ast.ExprAny, comma token.Token) bool {
308347
exprs = append(exprs, exprComma{expr, comma})
309348
return true
@@ -344,6 +383,7 @@ func parseTypeList(p *parser, parens token.Token, types ast.TypeList, in taxa.No
344383
ty := parseType(p, c, in.In())
345384
return ty, !ty.IsZero()
346385
},
386+
start: canStartPath,
347387
}.appendTo(types)
348388
}
349389

@@ -398,6 +438,7 @@ func parseOptions(p *parser, brackets token.Token, _ taxa.Noun) ast.CompactOptio
398438
}
399439
return option, !option.Value.IsZero()
400440
},
441+
start: canStartPath,
401442
}.appendTo(options.Entries())
402443

403444
return options

experimental/parser/parse_delimited.go

Lines changed: 86 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import (
2525
)
2626

2727
// delimited is a mechanism for parsing a punctuation-delimited list.
28-
type delimited[T any] struct {
28+
type delimited[T report.Spanner] struct {
2929
p *parser
3030
c *token.Cursor
3131

@@ -46,6 +46,12 @@ type delimited[T any] struct {
4646
//
4747
// This function is expected to exhaust
4848
parse func(*token.Cursor) (T, bool)
49+
50+
// Used for skipping tokens until we can begin parsing.
51+
//
52+
// start is called until we see a token that returns true for it. However,
53+
// if stop is not nil and it returns true for that token, parsing stops.
54+
start, stop func(token.Token) bool
4955
}
5056

5157
func (d delimited[T]) appendTo(commas ast.Commas[T]) {
@@ -65,9 +71,11 @@ func (d delimited[T]) iter(yield func(value T, delim token.Token) bool) {
6571
}
6672

6773
var delim token.Token
74+
var latest int // The index of the most recently seen delimiter.
6875

6976
if next := d.c.Peek(); slices.Contains(d.delims, next.Text()) {
7077
_ = d.c.Pop()
78+
latest = slices.Index(d.delims, next.Text())
7179

7280
d.p.Error(errUnexpected{
7381
what: next,
@@ -77,21 +85,79 @@ func (d delimited[T]) iter(yield func(value T, delim token.Token) bool) {
7785
})
7886
}
7987

88+
var needDelim bool
89+
var mark token.CursorMark
8090
for !d.c.Done() {
91+
ensureProgress(d.c, &mark)
92+
93+
// Set if we should not diagnose a missing comma, because there was
94+
// garbage in front of the call to parse().
95+
var badPrefix bool
96+
if !d.start(d.c.Peek()) {
97+
if d.stop != nil && d.stop(d.c.Peek()) {
98+
break
99+
}
100+
101+
first := d.c.Pop()
102+
var last token.Token
103+
for !d.c.Done() && !d.start(d.c.Peek()) {
104+
if d.stop != nil && d.stop(d.c.Peek()) {
105+
break
106+
}
107+
last = d.c.Pop()
108+
}
109+
110+
want := d.what.AsSet()
111+
if needDelim && delim.IsZero() {
112+
want = d.delimNouns()
113+
}
114+
115+
what := report.Spanner(first)
116+
if !last.IsZero() {
117+
what = report.Join(first, last)
118+
}
119+
120+
badPrefix = true
121+
d.p.Error(errUnexpected{
122+
what: what,
123+
where: d.in.In(),
124+
want: want,
125+
})
126+
}
127+
81128
v, ok := d.parse(d.c)
82129
if !ok {
83130
break
84131
}
85132

133+
if !badPrefix && needDelim && delim.IsZero() {
134+
d.p.Error(errUnexpected{
135+
what: v,
136+
where: d.in.In(),
137+
want: d.delimNouns(),
138+
}).Apply(
139+
// TODO: this should be a suggestion.
140+
report.Snippetf(v.Span().Rune(0), "note: assuming a missing `%s` here", d.delims[latest]),
141+
)
142+
}
143+
needDelim = d.required
144+
86145
// Pop as many delimiters as we can.
87146
delim = token.Zero
88-
for slices.Contains(d.delims, d.c.Peek().Text()) {
147+
for {
148+
which := slices.Index(d.delims, d.c.Peek().Text())
149+
if which < 0 {
150+
break
151+
}
152+
latest = which
153+
89154
next := d.c.Pop()
90155
if delim.IsZero() {
91156
delim = next
92157
continue
93158
}
94159

160+
// Diagnose all extra delimiters after the first.
95161
d.p.Error(errUnexpected{
96162
what: next,
97163
where: d.in.In(),
@@ -100,9 +166,18 @@ func (d delimited[T]) iter(yield func(value T, delim token.Token) bool) {
100166
}).Apply(report.Snippetf(delim, "first delimiter is here"))
101167
}
102168

103-
if !yield(v, delim) || (d.required && delim.IsZero()) {
169+
if !yield(v, delim) {
104170
break
105171
}
172+
173+
// In non-exhaust mode, if we miss a required comma, bail if we have
174+
// reached a stop token, or if we don't have a stop predicate.
175+
// Otherwise, go again to parse another thing.
176+
if delim.IsZero() && d.required && !d.exhaust {
177+
if d.stop == nil || d.stop(d.c.Peek()) {
178+
break
179+
}
180+
}
106181
}
107182

108183
switch {
@@ -121,3 +196,11 @@ func (d delimited[T]) iter(yield func(value T, delim token.Token) bool) {
121196
})
122197
}
123198
}
199+
200+
func (d delimited[T]) delimNouns() taxa.Set {
201+
var set taxa.Set
202+
for _, delim := range d.delims {
203+
set = set.With(taxa.Punct(delim, false))
204+
}
205+
return set
206+
}

experimental/parser/parse_expr.go

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -60,31 +60,40 @@ func parseExprInfix(p *parser, c *token.Cursor, where taxa.Place, lhs ast.ExprAn
6060
}).AsAny()
6161

6262
case "{", "<", "[": // This is for colon-less, array or dict-valued fields.
63-
if !next.IsLeaf() && lhs.Kind() != ast.ExprKindField {
64-
// The previous expression cannot also be a key-value pair, since
65-
// this messes with parsing of dicts, which are not comma-separated.
66-
//
67-
// In other words, consider the following, inside of an expression
68-
// context:
63+
if next.IsLeaf() {
64+
break
65+
}
66+
67+
// The previous expression cannot also be a key-value pair, since
68+
// this messes with parsing of dicts, which are not comma-separated.
69+
//
70+
// In other words, consider the following, inside of an expression
71+
// context:
72+
//
73+
// foo: bar { ... }
74+
//
75+
// We want to diagnose the { as unexpected here, and it is better
76+
// for that to be done by whatever is calling parseExpr since it
77+
// will have more context.
78+
//
79+
// We also do not allow this inside of arrays, because we want
80+
// [a {}] to parse as [a, {}] not [a: {}].
81+
if lhs.Kind() == ast.ExprKindField || where.Subject() == taxa.Array {
82+
break
83+
}
84+
85+
return p.NewExprField(ast.ExprFieldArgs{
86+
Key: lhs,
87+
// Why not call parseExprSolo? Suppose the following
88+
// (invalid) production:
6989
//
70-
// foo: bar { ... }
90+
// foo { ... } to { ... }
7191
//
72-
// We want to diagnose the { as unexpected here, and it is better
73-
// for that to be done by whatever is calling parseExpr since it
74-
// will have more context.
75-
return p.NewExprField(ast.ExprFieldArgs{
76-
Key: lhs,
77-
// Why not call parseExprSolo? Suppose the following
78-
// (invalid) production:
79-
//
80-
// foo { ... } to { ... }
81-
//
82-
// Calling parseExprInfix will cause this to be parsed
83-
// as a range expression, which will be diagnosed when
84-
// we legalize.
85-
Value: parseExprInfix(p, c, where, ast.ExprAny{}, prec+1),
86-
}).AsAny()
87-
}
92+
// Calling parseExprInfix will cause this to be parsed
93+
// as a range expression, which will be diagnosed when
94+
// we legalize.
95+
Value: parseExprInfix(p, c, where, ast.ExprAny{}, prec+1),
96+
}).AsAny()
8897
}
8998
}
9099

@@ -157,7 +166,7 @@ func parseExprSolo(p *parser, c *token.Cursor, where taxa.Place) ast.ExprAny {
157166
elems := delimited[ast.ExprAny]{
158167
p: p,
159168
c: body.Children(),
160-
what: taxa.Expr,
169+
what: taxa.DictField,
161170
in: in,
162171

163172
delims: []string{",", ";"},
@@ -168,9 +177,15 @@ func parseExprSolo(p *parser, c *token.Cursor, where taxa.Place) ast.ExprAny {
168177
expr := parseExpr(p, c, in.In())
169178
return expr, !expr.IsZero()
170179
},
180+
start: canStartExpr,
171181
}
172182

173183
if next.Text() == "[" {
184+
elems.what = taxa.Expr
185+
elems.delims = []string{","}
186+
elems.required = true
187+
elems.trailing = false
188+
174189
array := p.NewExprArray(body)
175190
elems.appendTo(array.Elements())
176191
return array.AsAny()

experimental/parser/parse_type.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ func parseTypeImpl(p *parser, c *token.Cursor, where taxa.Place, pathAfter bool)
185185
ty := parseType(p, c, taxa.TypeParams.In())
186186
return ty, !ty.IsZero()
187187
},
188+
start: canStartPath,
188189
}.appendTo(generic.Args())
189190

190191
ty = generic.AsAny()

0 commit comments

Comments
 (0)