Skip to content

Commit 60934de

Browse files
authored
Support terminal lookbehind (#1356)
1 parent 9b7f05f commit 60934de

File tree

6 files changed

+60
-4
lines changed

6 files changed

+60
-4
lines changed

packages/langium/src/grammar/generated/grammar.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3430,6 +3430,14 @@ export const LangiumGrammarGrammar = (): Grammar => loadedLangiumGrammarGrammar
34303430
{
34313431
"$type": "Keyword",
34323432
"value": "?!"
3433+
},
3434+
{
3435+
"$type": "Keyword",
3436+
"value": "?<="
3437+
},
3438+
{
3439+
"$type": "Keyword",
3440+
"value": "?<!"
34333441
}
34343442
]
34353443
},

packages/langium/src/grammar/langium-grammar.langium

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ TerminalTokenElement infers AbstractElement:
198198
CharacterRange | TerminalRuleCall | ParenthesizedTerminalElement | NegatedToken | UntilToken | RegexToken | Wildcard;
199199

200200
ParenthesizedTerminalElement infers AbstractElement:
201-
'(' (lookahead=('?='|'?!'))? TerminalAlternatives ')';
201+
'(' (lookahead=('?='|'?!'|'?<='|'?<!'))? TerminalAlternatives ')';
202202

203203
TerminalRuleCall infers AbstractElement:
204204
{infer TerminalRuleCall} rule=[TerminalRule:ID];

packages/langium/src/languages/generated/ast.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ export function isValueLiteral(item: unknown): item is ValueLiteral {
7272
export interface AbstractElement extends AstNode {
7373
readonly $type: 'AbstractElement' | 'Action' | 'Alternatives' | 'Assignment' | 'CharacterRange' | 'CrossReference' | 'EndOfFile' | 'Group' | 'Keyword' | 'NegatedToken' | 'RegexToken' | 'RuleCall' | 'TerminalAlternatives' | 'TerminalGroup' | 'TerminalRuleCall' | 'UnorderedGroup' | 'UntilToken' | 'Wildcard';
7474
cardinality?: '*' | '+' | '?';
75-
lookahead?: '?!' | '?=';
75+
lookahead?: '?!' | '?<!' | '?<=' | '?=';
7676
}
7777

7878
export const AbstractElement = 'AbstractElement';

packages/langium/src/parser/token-builder.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ export class DefaultTokenBuilder implements TokenBuilder {
4949

5050
protected buildTerminalToken(terminal: TerminalRule): TokenType {
5151
const regex = terminalRegex(terminal);
52-
const pattern = regex.flags.includes('u') ? this.regexPatternFunction(regex) : regex;
52+
const pattern = this.requiresCustomPattern(regex) ? this.regexPatternFunction(regex) : regex;
5353
const tokenType: TokenType = {
5454
name: terminal.name,
5555
PATTERN: pattern,
@@ -62,6 +62,18 @@ export class DefaultTokenBuilder implements TokenBuilder {
6262
return tokenType;
6363
}
6464

65+
protected requiresCustomPattern(regex: RegExp): boolean {
66+
if (regex.flags.includes('u')) {
67+
// Unicode regexes are not supported by Chevrotain.
68+
return true;
69+
} else if (regex.source.includes('?<=') || regex.source.includes('?<!')) {
70+
// Negative and positive lookbehind are not supported by Chevrotain yet.
71+
return true;
72+
} else {
73+
return false;
74+
}
75+
}
76+
6577
protected regexPatternFunction(regex: RegExp): CustomPatternMatcherFunc {
6678
const stickyRegex = new RegExp(regex, regex.flags + 'y');
6779
return (text, offset) => {

packages/langium/test/grammar/grammar-util.test.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,18 @@ describe('TerminalRule to regex', () => {
170170
expect(regex).toEqual(/(a(?!b))/);
171171
});
172172

173+
test('Should create negative lookbehind group', async () => {
174+
const terminal = await getTerminal("terminal X: 'a' (?<!'b');");
175+
const regex = terminalRegex(terminal);
176+
expect(regex).toEqual(/(a(?<!b))/);
177+
});
178+
179+
test('Should create positive lookbehind group', async () => {
180+
const terminal = await getTerminal("terminal X: 'a' (?<='b');");
181+
const regex = terminalRegex(terminal);
182+
expect(regex).toEqual(/(a(?<=b))/);
183+
});
184+
173185
test('Should create terminal reference in terminal definition', async () => {
174186
const terminal = await getTerminal(`
175187
terminal X: Y Y;

packages/langium/test/parser/langium-parser-builder.test.ts

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
******************************************************************************/
66

77
import type { TokenType, TokenVocabulary } from 'chevrotain';
8-
import type { AstNode, CstNode, GenericAstNode, Grammar, GrammarAST, LangiumParser, TokenBuilderOptions } from 'langium';
8+
import type { AstNode, CstNode, GenericAstNode, Grammar, GrammarAST, LangiumParser, ParseResult, TokenBuilderOptions } from 'langium';
99
import { EmptyFileSystem, DefaultTokenBuilder } from 'langium';
1010
import { describe, expect, test, onTestFailed, beforeEach } from 'vitest';
1111
import { createLangiumGrammarServices, createServicesForGrammar } from 'langium/grammar';
@@ -852,7 +852,31 @@ describe('Unassigned data type rules', () => {
852852
expect(parseResult.lexerErrors).toHaveLength(0);
853853
expect(parseResult.parserErrors).toHaveLength(0);
854854
});
855+
});
856+
857+
describe('Parsing with lookbehind tokens', () => {
858+
test('Parser Success / Failure with positive lookbehind', async () => {
859+
await testLookbehind(true, 'AB', 'CB');
860+
});
861+
862+
test('Parser Success / Failure with negative lookbehind', async () => {
863+
await testLookbehind(false, 'CB', 'AB');
864+
});
855865

866+
async function testLookbehind(positive: boolean, success: string, failure: string): Promise<void> {
867+
const parser = await parserFromGrammar(`
868+
grammar test
869+
entry Main: ('A' | 'C') b=B;
870+
terminal B: (?<${positive ? '=' : '!'}'A')'B';
871+
hidden terminal WS: /\\s+/;
872+
`
873+
);
874+
const validResult = parser.parse(success) as ParseResult<GenericAstNode>;
875+
expect(validResult.value.b).toEqual('B');
876+
const invalidResult = parser.parse(failure);
877+
expect(invalidResult.lexerErrors).toHaveLength(1);
878+
expect(invalidResult.parserErrors).toHaveLength(1);
879+
}
856880
});
857881

858882
async function parserFromGrammar(grammar: string): Promise<LangiumParser> {

0 commit comments

Comments
 (0)