Skip to content

Commit d4a60c0

Browse files
authored
Improve chevrotain parser error recovery (#1822)
1 parent de4607c commit d4a60c0

File tree

5 files changed

+88
-40
lines changed

5 files changed

+88
-40
lines changed

packages/langium/src/grammar/generated/grammar.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ export const LangiumGrammarGrammar = (): Grammar => loadedLangiumGrammarGrammar
234234
}
235235
}
236236
],
237-
"cardinality": "+"
237+
"cardinality": "*"
238238
}
239239
]
240240
},

packages/langium/src/grammar/langium-grammar.langium

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ entry Grammar:
1212
(definesHiddenTokens?='hidden' '(' (hiddenTokens+=[AbstractRule:ID] (',' hiddenTokens+=[AbstractRule:ID])*)? ')')?
1313
)?
1414
imports+=GrammarImport*
15-
(rules+=AbstractRule | interfaces+=Interface | types+=Type)+;
15+
(rules+=AbstractRule | interfaces+=Interface | types+=Type)*;
1616

1717
Interface:
1818
'interface' name=ID

packages/langium/src/parser/langium-parser.ts

Lines changed: 51 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
******************************************************************************/
66

77
/* eslint-disable @typescript-eslint/no-explicit-any */
8-
import type { DSLMethodOpts, ILexingError, IOrAlt, IParserErrorMessageProvider, IRecognitionException, IToken, TokenType, TokenVocabulary } from 'chevrotain';
8+
import type { DSLMethodOpts, ILexingError, IOrAlt, IParserErrorMessageProvider, IRecognitionException, IRuleConfig, IToken, TokenType, TokenVocabulary } from 'chevrotain';
99
import type { AbstractElement, Action, Assignment, ParserRule } from '../languages/generated/ast.js';
1010
import type { Linker } from '../references/linker.js';
1111
import type { LangiumCoreServices } from '../services.js';
@@ -99,10 +99,6 @@ export interface BaseParser {
9999
* Executes a grammar action that modifies the currently active AST node
100100
*/
101101
action($type: string, action: Action): void;
102-
/**
103-
* Finishes construction of the current AST node. Only used by the AST parser.
104-
*/
105-
construct(): unknown;
106102
/**
107103
* Whether the parser is currently actually in use or in "recording mode".
108104
* Recording mode is activated once when the parser is analyzing itself.
@@ -163,7 +159,6 @@ export abstract class AbstractLangiumParser implements BaseParser {
163159
abstract consume(idx: number, tokenType: TokenType, feature: AbstractElement): void;
164160
abstract subrule(idx: number, rule: RuleResult, fragment: boolean, feature: AbstractElement, args: Args): void;
165161
abstract action($type: string, action: Action): void;
166-
abstract construct(): unknown;
167162

168163
getRule(name: string): RuleResult | undefined {
169164
return this.allRules.get(name);
@@ -239,7 +234,7 @@ export class LangiumParser extends AbstractLangiumParser {
239234
if (!ruleMethod) {
240235
throw new Error(options.rule ? `No rule found with name '${options.rule}'` : 'No main rule available.');
241236
}
242-
const result = ruleMethod.call(this.wrapper, {});
237+
const result = this.doParse(ruleMethod);
243238
this.nodeBuilder.addHiddenNodes(lexerResult.hidden);
244239
this.unorderedGroups.clear();
245240
this.lexerResult = undefined;
@@ -251,6 +246,22 @@ export class LangiumParser extends AbstractLangiumParser {
251246
};
252247
}
253248

249+
private doParse(rule: RuleResult): any {
250+
let result = rule.call(this.wrapper, {});
251+
if (this.stack.length > 0) {
252+
// In case the parser throws on the entry rule, `construct` is not called
253+
// We need to call it manually here
254+
result = this.construct();
255+
}
256+
// Perform some sanity checking
257+
if (result === undefined) {
258+
throw new Error('No result from parser');
259+
} else if (this.stack.length > 0) {
260+
throw new Error('Parser stack is not empty after parsing');
261+
}
262+
return result;
263+
}
264+
254265
private startImplementation($type: string | symbol | undefined, implementation: RuleImpl): RuleImpl {
255266
return (args) => {
256267
// Only create a new AST node in case the calling rule is not a fragment rule
@@ -262,16 +273,12 @@ export class LangiumParser extends AbstractLangiumParser {
262273
node.value = '';
263274
}
264275
}
265-
let result: unknown;
266-
try {
267-
result = implementation(args);
268-
} catch (err) {
269-
result = undefined;
270-
}
271-
if (result === undefined && createNode) {
272-
result = this.construct();
273-
}
274-
return result;
276+
// Execute the actual rule implementation
277+
// The `implementation` never returns anything and only manipulates the parser state.
278+
implementation(args);
279+
// Once the rule implementation is done, we need to construct the AST node
280+
// If the implementation throws (likely a recognition error), we relay the construction to the `subrule` method
281+
return createNode ? this.construct() : undefined;
275282
};
276283
}
277284

@@ -293,6 +300,10 @@ export class LangiumParser extends AbstractLangiumParser {
293300
consume(idx: number, tokenType: TokenType, feature: AbstractElement): void {
294301
const token = this.wrapper.wrapConsume(idx, tokenType);
295302
if (!this.isRecording() && this.isValidToken(token)) {
303+
// Before inserting the current token into the CST, we want add the hidden tokens (i.e. comments)
304+
// These are located directly before the current token, but are not part of the token stream.
305+
// Adding the hidden tokens to the CST requires searching through the CST and finding the correct position.
306+
// Performing this work here is more efficient than doing it later on.
296307
const hiddenTokens = this.extractHiddenTokens(token);
297308
this.nodeBuilder.addHiddenNodes(hiddenTokens);
298309
const leafNode = this.nodeBuilder.buildLeafNode(token, feature);
@@ -330,9 +341,25 @@ export class LangiumParser extends AbstractLangiumParser {
330341
// This is intended, as fragment rules only enrich the current AST node
331342
cstNode = this.nodeBuilder.buildCompositeNode(feature);
332343
}
333-
const subruleResult = this.wrapper.wrapSubrule(idx, rule, args) as any;
334-
if (!this.isRecording() && cstNode && cstNode.length > 0) {
335-
this.performSubruleAssignment(subruleResult, feature, cstNode);
344+
let result: any;
345+
try {
346+
result = this.wrapper.wrapSubrule(idx, rule, args);
347+
} finally {
348+
if (!this.isRecording()) {
349+
// Calling `subrule` on chevrotain parsers can result in a recognition error
350+
// This likely means that we encounter a syntax error in the input.
351+
// In this case, the result of the subrule is `undefined` and we need to call `construct` manually.
352+
if (result === undefined && !fragment) {
353+
result = this.construct();
354+
}
355+
// We want to perform the subrule assignment regardless of the recognition error
356+
// But only if the subrule call actually consumed any tokens
357+
if (result !== undefined && cstNode && cstNode.length > 0) {
358+
this.performSubruleAssignment(result, feature, cstNode);
359+
}
360+
}
361+
// We don't have a catch block in here because we want to propagate the recognition error to the caller
362+
// This results in much better error recovery and error messages from chevrotain
336363
}
337364
}
338365

@@ -375,7 +402,7 @@ export class LangiumParser extends AbstractLangiumParser {
375402
}
376403
}
377404

378-
construct(): unknown {
405+
private construct(): unknown {
379406
if (this.isRecording()) {
380407
return undefined;
381408
}
@@ -673,16 +700,16 @@ class ChevrotainWrapper extends EmbeddedActionsParser {
673700
return this.RECORDING_PHASE;
674701
}
675702

676-
DEFINE_RULE(name: string, impl: RuleImpl): RuleResult {
677-
return this.RULE(name, impl);
703+
DEFINE_RULE(name: string, impl: RuleImpl, config?: IRuleConfig<any>): RuleResult {
704+
return this.RULE(name, impl, config);
678705
}
679706

680707
wrapSelfAnalysis(): void {
681708
this.performSelfAnalysis();
682709
}
683710

684711
wrapConsume(idx: number, tokenType: TokenType): IToken {
685-
return this.consume(idx, tokenType);
712+
return this.consume(idx, tokenType, undefined);
686713
}
687714

688715
wrapSubrule(idx: number, rule: RuleResult, args: Args): unknown {

packages/langium/test/parser/langium-parser-builder.test.ts

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ describe('Handle unordered group', () => {
231231
description "New description"
232232
author "foo"
233233
`);
234-
expect(parsed.parserErrors.length).toBe(2);
234+
expect(parsed.parserErrors).toHaveLength(1);
235235
});
236236

237237
test('Should parse multiple instances', () => {
@@ -250,8 +250,7 @@ describe('Handle unordered group', () => {
250250
description "Cool book2"
251251
author "foo2"
252252
253-
`, parser) as { parserErrors?: string | string[], books?: string[] });
254-
expect(lib.parserErrors).toBeUndefined();
253+
`, parser) as { books?: string[] });
255254
expect(lib.books).not.toBeUndefined();
256255
expect(lib.books?.length).toBe(2);
257256
});
@@ -321,7 +320,7 @@ describe('check the default value converter for data type rules using terminal r
321320
const result = parser.parse('123 true abc');
322321
expect(result.lexerErrors.length).toBe(0);
323322
expect(result.parserErrors.length).toBe(0);
324-
const value = result.value as unknown as { propInteger: number, propBoolean: boolean, propString: string };
323+
const value = result.value as AstNode & { propInteger: number, propBoolean: boolean, propString: string };
325324
expect(value.propInteger).not.toBe('123');
326325
expect(value.propInteger).toBe(123);
327326
expect(value.propBoolean).toBe(true);
@@ -343,7 +342,7 @@ describe('Boolean value converter', () => {
343342
});
344343

345344
function expectValue(input: string, value: unknown): void {
346-
const main = parser.parse(input).value as unknown as { value: unknown };
345+
const main = parser.parse(input).value as AstNode & { value: unknown };
347346
expect(main.value).toBe(value);
348347
}
349348

@@ -375,7 +374,7 @@ describe('BigInt Parser value converter', () => {
375374
});
376375

377376
function expectValue(input: string, value: unknown): void {
378-
const main = parser.parse(input).value as unknown as { value: unknown };
377+
const main = parser.parse(input).value as AstNode & { value: unknown };
379378
expect(main.value).toBe(value);
380379
}
381380

@@ -412,7 +411,7 @@ describe('Date Parser value converter', () => {
412411
});
413412

414413
test('Parsed Date is correct Date object', () => {
415-
const parseResult = parser.parse('2022-10-12T00:00').value as unknown as { value: unknown };
414+
const parseResult = parser.parse('2022-10-12T00:00').value as AstNode & { value: unknown };
416415
expect(parseResult.value).toEqual(new Date('2022-10-12T00:00'));
417416
});
418417
});
@@ -451,12 +450,12 @@ describe('Parser calls value converter', () => {
451450
});
452451

453452
function expectValue(input: string, value: unknown): void {
454-
const main = parser.parse(input).value as unknown as { value: unknown };
453+
const main = parser.parse(input).value as AstNode & { value: unknown };
455454
expect(main.value).toBe(value);
456455
}
457456

458457
function expectEqual(input: string, value: unknown): void {
459-
const main = parser.parse(input).value as unknown as { value: unknown };
458+
const main = parser.parse(input).value as AstNode & { value: unknown };
460459
expect(main.value).toEqual(value);
461460
}
462461

@@ -527,7 +526,7 @@ describe('Parser calls value converter', () => {
527526
const result = parser.parse('A');
528527
expect(result.lexerErrors.length).toBe(0);
529528
expect(result.parserErrors.length).toBe(0);
530-
const value = result.value as unknown as { value: string };
529+
const value = result.value as AstNode & { value: string };
531530
expect(value.value).toBeTypeOf('string');
532531
expect(value.value).toBe('A');
533532
});
@@ -547,7 +546,7 @@ describe('Parser calls value converter', () => {
547546
const result = parser.parse('A');
548547
expect(result.lexerErrors.length).toBe(0);
549548
expect(result.parserErrors.length).toBe(0);
550-
const value = result.value as unknown as { value: string };
549+
const value = result.value as AstNode & { value: string };
551550
expect(value.value).toBeTypeOf('string');
552551
expect(value.value).toBe('A');
553552
});
@@ -817,7 +816,7 @@ describe('Parsing default values', () => {
817816

818817
test('Assigns default values for properties', async () => {
819818
const result = parser.parse('hi');
820-
const model = result.value as unknown as {
819+
const model = result.value as AstNode & {
821820
a: string
822821
b: string
823822
c: string[]
@@ -829,7 +828,7 @@ describe('Parsing default values', () => {
829828

830829
test('Does not overwrite parsed value for "b"', async () => {
831830
const result = parser.parse('hi world');
832-
const model = result.value as unknown as {
831+
const model = result.value as AstNode & {
833832
a: string
834833
b: string
835834
c: string[]
@@ -840,7 +839,7 @@ describe('Parsing default values', () => {
840839

841840
test('Does not overwrite parsed value for "c"', async () => {
842841
const result = parser.parse('hi world d e');
843-
const model = result.value as unknown as {
842+
const model = result.value as AstNode & {
844843
a: string
845844
b: string
846845
c: string[]

packages/langium/test/parser/langium-parser.test.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,28 @@ describe('hidden node parsing', () => {
8989

9090
});
9191

92+
describe('parser error recovery', () => {
93+
94+
for (const [open, close] of [[1, 0], [2, 0], [3, 0], [2, 1], [3, 1], [3, 2]]) {
95+
test(`recovers from lexer error with ${open} open and ${close} close parenthesis`, async () => {
96+
const text = `
97+
grammar Test
98+
entry Model: value=Expr ';';
99+
Expr: '(' Expr ')' | value=ID;
100+
terminal ID: /[_a-zA-Z][\\w_]*/;
101+
hidden terminal WS: /\\s+/;
102+
`;
103+
const services = await createServicesForGrammar({ grammar: text });
104+
const opening = '('.repeat(open);
105+
const closing = ')'.repeat(close);
106+
const result = services.parser.LangiumParser.parse(`${opening}a${closing};`);
107+
// Expect only one parser error independent of the number of missing closing parenthesis
108+
expect(result.parserErrors).toHaveLength(1);
109+
});
110+
}
111+
112+
});
113+
92114
interface A extends AstNode {
93115
name: string
94116
}

0 commit comments

Comments
 (0)