55 ******************************************************************************/
66
77/* eslint-disable @typescript-eslint/no-explicit-any */
8- import type { DSLMethodOpts , ILexingError , IOrAlt , IParserErrorMessageProvider , IRecognitionException , IToken , TokenType , TokenVocabulary } from 'chevrotain' ;
8+ import type { DSLMethodOpts , ILexingError , IOrAlt , IParserErrorMessageProvider , IRecognitionException , IRuleConfig , IToken , TokenType , TokenVocabulary } from 'chevrotain' ;
99import type { AbstractElement , Action , Assignment , ParserRule } from '../languages/generated/ast.js' ;
1010import type { Linker } from '../references/linker.js' ;
1111import type { LangiumCoreServices } from '../services.js' ;
@@ -99,10 +99,6 @@ export interface BaseParser {
9999 * Executes a grammar action that modifies the currently active AST node
100100 */
101101 action ( $type : string , action : Action ) : void ;
102- /**
103- * Finishes construction of the current AST node. Only used by the AST parser.
104- */
105- construct ( ) : unknown ;
106102 /**
107103 * Whether the parser is currently actually in use or in "recording mode".
108104 * Recording mode is activated once when the parser is analyzing itself.
@@ -163,7 +159,6 @@ export abstract class AbstractLangiumParser implements BaseParser {
163159 abstract consume ( idx : number , tokenType : TokenType , feature : AbstractElement ) : void ;
164160 abstract subrule ( idx : number , rule : RuleResult , fragment : boolean , feature : AbstractElement , args : Args ) : void ;
165161 abstract action ( $type : string , action : Action ) : void ;
166- abstract construct ( ) : unknown ;
167162
168163 getRule ( name : string ) : RuleResult | undefined {
169164 return this . allRules . get ( name ) ;
@@ -239,7 +234,7 @@ export class LangiumParser extends AbstractLangiumParser {
239234 if ( ! ruleMethod ) {
240235 throw new Error ( options . rule ? `No rule found with name '${ options . rule } '` : 'No main rule available.' ) ;
241236 }
242- const result = ruleMethod . call ( this . wrapper , { } ) ;
237+ const result = this . doParse ( ruleMethod ) ;
243238 this . nodeBuilder . addHiddenNodes ( lexerResult . hidden ) ;
244239 this . unorderedGroups . clear ( ) ;
245240 this . lexerResult = undefined ;
@@ -251,6 +246,22 @@ export class LangiumParser extends AbstractLangiumParser {
251246 } ;
252247 }
253248
249+ private doParse ( rule : RuleResult ) : any {
250+ let result = rule . call ( this . wrapper , { } ) ;
251+ if ( this . stack . length > 0 ) {
252+ // In case the parser throws on the entry rule, `construct` is not called
253+ // We need to call it manually here
254+ result = this . construct ( ) ;
255+ }
256+ // Perform some sanity checking
257+ if ( result === undefined ) {
258+ throw new Error ( 'No result from parser' ) ;
259+ } else if ( this . stack . length > 0 ) {
260+ throw new Error ( 'Parser stack is not empty after parsing' ) ;
261+ }
262+ return result ;
263+ }
264+
254265 private startImplementation ( $type : string | symbol | undefined , implementation : RuleImpl ) : RuleImpl {
255266 return ( args ) => {
256267 // Only create a new AST node in case the calling rule is not a fragment rule
@@ -262,16 +273,12 @@ export class LangiumParser extends AbstractLangiumParser {
262273 node . value = '' ;
263274 }
264275 }
265- let result : unknown ;
266- try {
267- result = implementation ( args ) ;
268- } catch ( err ) {
269- result = undefined ;
270- }
271- if ( result === undefined && createNode ) {
272- result = this . construct ( ) ;
273- }
274- return result ;
276+ // Execute the actual rule implementation
277+ // The `implementation` never returns anything and only manipulates the parser state.
278+ implementation ( args ) ;
279+ // Once the rule implementation is done, we need to construct the AST node
280+ // If the implementation throws (likely a recognition error), we relay the construction to the `subrule` method
281+ return createNode ? this . construct ( ) : undefined ;
275282 } ;
276283 }
277284
@@ -293,6 +300,10 @@ export class LangiumParser extends AbstractLangiumParser {
293300 consume ( idx : number , tokenType : TokenType , feature : AbstractElement ) : void {
294301 const token = this . wrapper . wrapConsume ( idx , tokenType ) ;
295302 if ( ! this . isRecording ( ) && this . isValidToken ( token ) ) {
303+ // Before inserting the current token into the CST, we want add the hidden tokens (i.e. comments)
304+ // These are located directly before the current token, but are not part of the token stream.
305+ // Adding the hidden tokens to the CST requires searching through the CST and finding the correct position.
306+ // Performing this work here is more efficient than doing it later on.
296307 const hiddenTokens = this . extractHiddenTokens ( token ) ;
297308 this . nodeBuilder . addHiddenNodes ( hiddenTokens ) ;
298309 const leafNode = this . nodeBuilder . buildLeafNode ( token , feature ) ;
@@ -330,9 +341,25 @@ export class LangiumParser extends AbstractLangiumParser {
330341 // This is intended, as fragment rules only enrich the current AST node
331342 cstNode = this . nodeBuilder . buildCompositeNode ( feature ) ;
332343 }
333- const subruleResult = this . wrapper . wrapSubrule ( idx , rule , args ) as any ;
334- if ( ! this . isRecording ( ) && cstNode && cstNode . length > 0 ) {
335- this . performSubruleAssignment ( subruleResult , feature , cstNode ) ;
344+ let result : any ;
345+ try {
346+ result = this . wrapper . wrapSubrule ( idx , rule , args ) ;
347+ } finally {
348+ if ( ! this . isRecording ( ) ) {
349+ // Calling `subrule` on chevrotain parsers can result in a recognition error
350+ // This likely means that we encounter a syntax error in the input.
351+ // In this case, the result of the subrule is `undefined` and we need to call `construct` manually.
352+ if ( result === undefined && ! fragment ) {
353+ result = this . construct ( ) ;
354+ }
355+ // We want to perform the subrule assignment regardless of the recognition error
356+ // But only if the subrule call actually consumed any tokens
357+ if ( result !== undefined && cstNode && cstNode . length > 0 ) {
358+ this . performSubruleAssignment ( result , feature , cstNode ) ;
359+ }
360+ }
361+ // We don't have a catch block in here because we want to propagate the recognition error to the caller
362+ // This results in much better error recovery and error messages from chevrotain
336363 }
337364 }
338365
@@ -375,7 +402,7 @@ export class LangiumParser extends AbstractLangiumParser {
375402 }
376403 }
377404
378- construct ( ) : unknown {
405+ private construct ( ) : unknown {
379406 if ( this . isRecording ( ) ) {
380407 return undefined ;
381408 }
@@ -673,16 +700,16 @@ class ChevrotainWrapper extends EmbeddedActionsParser {
673700 return this . RECORDING_PHASE ;
674701 }
675702
676- DEFINE_RULE ( name : string , impl : RuleImpl ) : RuleResult {
677- return this . RULE ( name , impl ) ;
703+ DEFINE_RULE ( name : string , impl : RuleImpl , config ?: IRuleConfig < any > ) : RuleResult {
704+ return this . RULE ( name , impl , config ) ;
678705 }
679706
680707 wrapSelfAnalysis ( ) : void {
681708 this . performSelfAnalysis ( ) ;
682709 }
683710
684711 wrapConsume ( idx : number , tokenType : TokenType ) : IToken {
685- return this . consume ( idx , tokenType ) ;
712+ return this . consume ( idx , tokenType , undefined ) ;
686713 }
687714
688715 wrapSubrule ( idx : number , rule : RuleResult , args : Args ) : unknown {
0 commit comments