Skip to content

Commit 58ce261

Browse files
authored
BNF generator - Support for rule parameters (#1874)
* BNF generator - Support for rule parameters * Fixed lodash import. Applied re-view suggestion.
1 parent 95fd0c2 commit 58ce261

File tree

3 files changed

+167
-15
lines changed

3 files changed

+167
-15
lines changed

packages/langium-cli/langium-config-schema.json

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,35 @@
135135
},
136136
"chevrotainParserConfig": {
137137
"$ref": "#/$defs/chevrotainParserConfig"
138+
},
139+
"bnf": {
140+
"description": "An object to describe bnf generator properties.",
141+
"type": "object",
142+
"properties": {
143+
"out": {
144+
"description": "The output path for the BNF file.",
145+
"type": "string"
146+
},
147+
"comment": {
148+
"description": "By default, comments are generated according to the dialect. GBNF uses `#`, EBNF uses `(* *)`. Use this option to force a specific comment style. Use `parentheses` for `(* comment *)`, `slash` for `/* comment *\/`, `hash` for `# comment` and `skip` to disable comment generation.",
149+
"type": {
150+
"enum": [
151+
"skip", "parentheses", "slash", "hash"
152+
]
153+
}
154+
},
155+
"dialect": {
156+
"description": "Dialect of the generated BNF file. GBNF is the default. In EBNF RegEx terminals are not supported.",
157+
"type": {
158+
"enum": [
159+
"GBNF", "EBNF"
160+
]
161+
}
162+
}
163+
},
164+
"required": [
165+
"out"
166+
]
138167
}
139168
},
140169
"required": [

packages/langium-cli/src/generator/bnf-generator.ts

Lines changed: 102 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,31 +6,37 @@
66

77
import { CstUtils, type Grammar } from 'langium';
88
import { EOL } from 'langium/generate';
9-
import * as _ from 'lodash';
10-
import type { AbstractElement, AbstractRule, TerminalRule } from '../../../langium/lib/languages/generated/ast.js';
9+
import _ from 'lodash';
10+
import type { AbstractElement, AbstractRule, Condition, NamedArgument, Parameter } from '../../../langium/lib/languages/generated/ast.js';
1111
import {
12-
isAction, isAlternatives, isAssignment, isCrossReference, isGroup, isKeyword, isParserRule, isRegexToken,
12+
isAction, isAlternatives, isAssignment,
13+
isCrossReference, isGroup, isKeyword,
14+
isParserRule, isRegexToken,
1315
isRuleCall, isTerminalAlternatives, isTerminalGroup, isTerminalRule, isTerminalRuleCall
1416
} from '../../../langium/lib/languages/generated/ast.js';
1517

1618
export function generateBnf(grammars: Grammar[], options: GeneratorOptions = { dialect: 'GBNF' }): string {
1719
const grammarsWithName = grammars.filter(grammar => !!grammar.name);
1820

21+
const isHiddenTerminalRule = (rule: AbstractRule): boolean => {
22+
return isTerminalRule(rule) && rule.hidden;
23+
};
24+
1925
const ctx: GeneratorContext = {
2026
rootAssigned: options.dialect === 'EBNF',
21-
hasHiddenRules: grammarsWithName.some(grammar => grammar.rules.some(rule => isTerminalRule(rule) && rule.hidden)),
27+
hasHiddenRules: grammarsWithName.some(grammar => grammar.rules.some(isHiddenTerminalRule)),
2228
dialect: options.dialect,
2329
commentStyle: options.commentStyle ?? (options.dialect === 'GBNF' ? 'hash' : 'parentheses')
2430
};
2531

26-
const hiddenRules: TerminalRule[] = [];
32+
const hiddenRules: AbstractRule[] = [];
2733

2834
let result: string = '';
2935
grammarsWithName.forEach(grammar => {
3036
grammar.rules.forEach(rule => {
3137
result += processRule(rule, ctx);
3238
result += EOL + EOL;
33-
if (ctx.hasHiddenRules && isTerminalRule(rule) && rule.hidden) {
39+
if (isHiddenTerminalRule(rule)) {
3440
hiddenRules.push(rule);
3541
}
3642
});
@@ -49,10 +55,23 @@ function processRule(rule: AbstractRule, ctx: GeneratorContext): string {
4955
}
5056

5157
// GBNF expects 'root' as the root rule name, Lark e.g. expects 'start'.
52-
const ruleName = processName(markRoot ? 'root' : rule.name, ctx);
5358
const ruleComment = processComment(rule, ctx);
5459
const hiddenPrefix = (isTerminalRule(rule) && !rule.hidden) ? hiddenRuleCall(ctx) : '';
55-
return `${ruleComment}${ruleName} ::= ${hiddenPrefix}${processElement(rule.definition, ctx)}`;
60+
const ruleName = markRoot ? 'root' : rule.name;
61+
if (isParserRule(rule) && rule.parameters.length > 0) {
62+
// parser rule with parameters
63+
const variations: Array<Record<string, boolean>> = parserRuleVariations(rule.parameters);
64+
let content = '';
65+
variations.forEach((variation, idx) => {
66+
const variationCtx = { ...ctx, parserRuleVariation: variation };
67+
content += `${ruleComment}${processName(ruleName, variationCtx, variation)} ::= ${hiddenPrefix}${processElement(rule.definition, variationCtx)}`;
68+
if (idx < variations.length - 1) {
69+
content += EOL;
70+
}
71+
});
72+
return content;
73+
}
74+
return `${ruleComment}${processName(ruleName, ctx)} ::= ${hiddenPrefix}${processElement(rule.definition, ctx)}`;
5675
}
5776

5877
function processElement(element: AbstractElement, ctx: GeneratorContext): string {
@@ -62,17 +81,27 @@ function processElement(element: AbstractElement, ctx: GeneratorContext): string
6281
if (isKeyword(element)) {
6382
return `${hiddenRuleCall(ctx)}"${element.value}"`;
6483
} else if (isGroup(element) || isTerminalGroup(element)) {
65-
if (element.cardinality) {
66-
return `( ${element.elements.map(processRecursively).filter(notEmpty).join(' ')} )${processCardinality(element)}`;
67-
} else {
68-
return element.elements.map(processRecursively).filter(notEmpty).join(' ');
84+
if (isGroup(element) && element.guardCondition && !evaluateCondition(element.guardCondition, ctx)) {
85+
// Skip group if guard condition is false
86+
return ' ';
87+
}
88+
const content = element.elements.map(processRecursively).filter(notEmpty).join(' ');
89+
if (element.cardinality && notEmpty(content)) {
90+
return `( ${content} )${processCardinality(element)}`;
6991
}
92+
return content;
7093
} else if (isAssignment(element)) {
7194
return processRecursively(element.terminal) + processCardinality(element);
7295
} else if (isRuleCall(element) || isTerminalRuleCall(element)) {
73-
return processName(element.rule.ref?.name ?? element.rule.$refText, ctx) + processCardinality(element);
96+
const variation = isRuleCall(element) ? collectArguments(element.rule.ref, element.arguments, ctx) : undefined;
97+
const ruleName = element.rule.ref?.name ?? element.rule.$refText;
98+
return processName(ruleName, ctx, variation) + processCardinality(element);
7499
} else if (isAlternatives(element) || isTerminalAlternatives(element)) {
75-
return '(' + element.elements.map(processRecursively).filter(notEmpty).join(' | ') + ')' + processCardinality(element);
100+
const content = element.elements.map(processRecursively).filter(notEmpty).join(' | ');
101+
if (notEmpty(content)) {
102+
return '(' + content + ')' + processCardinality(element);
103+
}
104+
return '';
76105
} else if (isRegexToken(element)) {
77106
// First remove trailing and leading slashes. Replace escaped slashes `\/` with unescaped slashes `/`.
78107
return element.regex.replace(/(^|[^\\])\//g, (_, p1) => p1 + '').replace(/\\\//g, '/');
@@ -89,7 +118,12 @@ function processCardinality(element: AbstractElement): string {
89118
return element.cardinality ?? '';
90119
}
91120

92-
function processName(name: string, ctx: GeneratorContext): string {
121+
function processName(ruleName: string, ctx: GeneratorContext, parserRuleVariation?: Record<string, boolean>): string {
122+
const name = parserRuleVariation
123+
? `${ruleName}${Object.entries(parserRuleVariation)
124+
.filter(entry => entry[1]).map(entry => entry[0].charAt(0).toUpperCase() + entry[0].slice(1))
125+
.join('')}`
126+
: ruleName;
93127
switch (ctx.dialect) {
94128
case 'GBNF':
95129
// convert camel case to Kebab Case for GBNF (GGML AI)
@@ -134,6 +168,57 @@ function notEmpty(text: string): boolean {
134168
return text.trim().length > 0;
135169
}
136170

171+
/**
172+
* @param params parserRule parameters
173+
* @returns all possible combination of guards for the parserRule - 2^params.length
174+
*/
175+
function parserRuleVariations(params: Parameter[]): Array<Record<string, boolean>> {
176+
const variationsCount = Math.pow(2, params.length);
177+
const variations: Array<Record<string, boolean>> = [];
178+
for (let i = 0; i < variationsCount; i++) {
179+
const variation: Record<string, boolean> = {};
180+
params.map((param, index) => {
181+
// eslint-disable-next-line no-bitwise
182+
const isTrue = (i & (1 << index)) !== 0;
183+
return variation[param.name] = isTrue;
184+
});
185+
variations.push(variation);
186+
}
187+
return variations;
188+
}
189+
190+
function collectArguments(rule: AbstractRule | undefined, namedArgs: NamedArgument[], ctx: GeneratorContext): Record<string, boolean> | undefined {
191+
if (isParserRule(rule) && namedArgs.length > 0 && rule.parameters.length === namedArgs.length) {
192+
const variation: Record<string, boolean> = {};
193+
namedArgs.forEach((arg, idx) => {
194+
variation[rule.parameters[idx].name] = evaluateCondition(arg.value, ctx);
195+
});
196+
return variation;
197+
}
198+
return undefined;
199+
}
200+
201+
function evaluateCondition(condition: Condition, ctx: GeneratorContext): boolean {
202+
switch (condition.$type) {
203+
case 'BooleanLiteral':
204+
return condition.true;
205+
case 'Conjunction':
206+
return evaluateCondition(condition.left, ctx) && evaluateCondition(condition.right, ctx);
207+
case 'Disjunction':
208+
return evaluateCondition(condition.left, ctx) || evaluateCondition(condition.right, ctx);
209+
case 'Negation':
210+
return !evaluateCondition(condition.value, ctx);
211+
case 'ParameterReference': {
212+
if (!ctx.parserRuleVariation) {
213+
return false;
214+
}
215+
return ctx.parserRuleVariation[condition.parameter.ref?.name ?? condition.parameter.$refText];
216+
}
217+
default:
218+
throw new Error(`Unhandled Condition type: ${(condition as Condition).$type}`);
219+
}
220+
}
221+
137222
/**
138223
* Default: GBNF
139224
* EBNF doesn't support RegEx terminal rules.
@@ -157,4 +242,6 @@ type GeneratorContext = GeneratorOptions & {
157242
rootAssigned: boolean;
158243
hasHiddenRules: boolean;
159244
commentStyle: CommentStyle;
245+
parserRuleVariation?: Record<string, boolean>;
160246
};
247+

packages/langium-cli/test/generator/bnf-generator.test.ts

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,42 @@ describe('BNF generator', () => {
172172
`;
173173
expect(generated).toBe(expected);
174174
});
175+
176+
test('Parser Rule parameter', async () => {
177+
const grammarContent = expandToStringWithNL`
178+
grammar Test
179+
entry Model:
180+
element1=Element<false, false>
181+
element2=Element<false, true>
182+
element3=Element<true, false>
183+
element4=Element<true, true>
184+
;
185+
Element<a,b>:
186+
(<a> "a")
187+
(<!a> "!a")
188+
(<a&b> "ab")
189+
(<a|b> "a_b")
190+
(<!(a&b)> '!ab')
191+
(<a> "a" | <b> "b")
192+
name="name"
193+
elements+=Element<false | a, b & true>*
194+
';';
195+
`;
196+
197+
const result = await parseGrammar(grammarContent);
198+
const generated = generateBnf([result.value]);
199+
const expected = expandToStringWithNL`
200+
root ::= element element-b element-a element-ab
201+
202+
element ::= "!a" "!ab" "name" element* ";"
203+
element-a ::= "a" "a_b" "!ab" ("a") "name" element-a* ";"
204+
element-b ::= "!a" "a_b" "!ab" ("b") "name" element-b* ";"
205+
element-ab ::= "a" "ab" "a_b" ("a" | "b") "name" element-ab* ";"
206+
207+
`;
208+
expect(generated).toBe(expected);
209+
});
210+
175211
});
176212

177213
const EXPECTED_BNF = expandToStringWithNL`

0 commit comments

Comments
 (0)