Skip to content

Commit 081caa0

Browse files
authored
Adds a BNF generator to generate GBNF definition from the Langium grammar (#1860)
GBNF generator initial version
1 parent d2eda28 commit 081caa0

File tree

5 files changed

+486
-87
lines changed

5 files changed

+486
-87
lines changed

package-lock.json

Lines changed: 12 additions & 86 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/langium-cli/src/generate.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
******************************************************************************/
66

77
import type { AstNode, Grammar, LangiumDocument, Mutable } from 'langium';
8-
import type { LangiumConfig, LangiumLanguageConfig} from './package-types.js';
8+
import type { LangiumConfig, LangiumLanguageConfig } from './package-types.js';
99
import { URI } from 'langium';
1010
import { loadConfig } from './package.js';
1111
import { AstUtils, GrammarAST } from 'langium';
@@ -14,6 +14,7 @@ import { NodeFileSystem } from 'langium/node';
1414
import { generateAst } from './generator/ast-generator.js';
1515
import { serializeGrammar } from './generator/grammar-serializer.js';
1616
import { generateModule } from './generator/module-generator.js';
17+
import { generateBnf } from './generator/bnf-generator.js';
1718
import { generateTextMate } from './generator/highlighting/textmate-generator.js';
1819
import { generateMonarch } from './generator/highlighting/monarch-generator.js';
1920
import { generatePrismHighlighting } from './generator/highlighting/prism-generator.js';
@@ -390,6 +391,15 @@ export async function runGenerator(config: LangiumConfig, options: GenerateOptio
390391
}
391392
}
392393
}
394+
395+
if (languageConfig?.bnf) {
396+
const genBnf = generateBnf([grammar], {
397+
dialect: languageConfig.bnf.dialect ?? 'GBNF',
398+
commentStyle: languageConfig.bnf.comment
399+
});
400+
const bnfPath = path.resolve(relPath, languageConfig.bnf.out ?? `${grammar.name ?? 'grammar'}.gbnf`);
401+
await writeWithFail(bnfPath, genBnf, options);
402+
}
393403
}
394404

395405
return buildResult(true);
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
/******************************************************************************
2+
* Copyright 2025 TypeFox GmbH
3+
* This program and the accompanying materials are made available under the
4+
* terms of the MIT License, which is available in the project root.
5+
******************************************************************************/
6+
7+
import { CstUtils, type Grammar } from 'langium';
8+
import { EOL } from 'langium/generate';
9+
import * as _ from 'lodash';
10+
import type { AbstractElement, AbstractRule, TerminalRule } from '../../../langium/lib/languages/generated/ast.js';
11+
import {
12+
isAction, isAlternatives, isAssignment, isCrossReference, isGroup, isKeyword, isParserRule, isRegexToken,
13+
isRuleCall, isTerminalAlternatives, isTerminalGroup, isTerminalRule, isTerminalRuleCall
14+
} from '../../../langium/lib/languages/generated/ast.js';
15+
16+
export function generateBnf(grammars: Grammar[], options: GeneratorOptions = { dialect: 'GBNF' }): string {
17+
const grammarsWithName = grammars.filter(grammar => !!grammar.name);
18+
19+
const ctx: GeneratorContext = {
20+
rootAssigned: options.dialect === 'EBNF',
21+
hasHiddenRules: grammarsWithName.some(grammar => grammar.rules.some(rule => isTerminalRule(rule) && rule.hidden)),
22+
dialect: options.dialect,
23+
commentStyle: options.commentStyle ?? (options.dialect === 'GBNF' ? 'hash' : 'parentheses')
24+
};
25+
26+
const hiddenRules: TerminalRule[] = [];
27+
28+
let result: string = '';
29+
grammarsWithName.forEach(grammar => {
30+
grammar.rules.forEach(rule => {
31+
result += processRule(rule, ctx);
32+
result += EOL + EOL;
33+
if (ctx.hasHiddenRules && isTerminalRule(rule) && rule.hidden) {
34+
hiddenRules.push(rule);
35+
}
36+
});
37+
});
38+
39+
if (hiddenRules.length > 0) {
40+
result += `${processName('HIDDEN', ctx)} ::= ( ${hiddenRules.map(rule => processName(rule.name, ctx)).join(' | ')} )${EOL}`;
41+
}
42+
return result;
43+
}
44+
45+
function processRule(rule: AbstractRule, ctx: GeneratorContext): string {
46+
const markRoot = !ctx.rootAssigned && isParserRule(rule) && rule.entry;
47+
if (markRoot) {
48+
ctx.rootAssigned = true;
49+
}
50+
51+
// GBNF expects 'root' as the root rule name, Lark e.g. expects 'start'.
52+
const ruleName = processName(markRoot ? 'root' : rule.name, ctx);
53+
const ruleComment = processComment(rule, ctx);
54+
const hiddenPrefix = (isTerminalRule(rule) && !rule.hidden) ? hiddenRuleCall(ctx) : '';
55+
return `${ruleComment}${ruleName} ::= ${hiddenPrefix}${processElement(rule.definition, ctx)}`;
56+
}
57+
58+
function processElement(element: AbstractElement, ctx: GeneratorContext): string {
59+
const processRecursively = (element: AbstractElement) => {
60+
return processElement(element, ctx);
61+
};
62+
if (isKeyword(element)) {
63+
return `${hiddenRuleCall(ctx)}"${element.value}"`;
64+
} else if (isGroup(element) || isTerminalGroup(element)) {
65+
if (element.cardinality) {
66+
return `( ${element.elements.map(processRecursively).filter(notEmpty).join(' ')} )${processCardinality(element)}`;
67+
} else {
68+
return element.elements.map(processRecursively).filter(notEmpty).join(' ');
69+
}
70+
} else if (isAssignment(element)) {
71+
return processRecursively(element.terminal) + processCardinality(element);
72+
} else if (isRuleCall(element) || isTerminalRuleCall(element)) {
73+
return processName(element.rule.ref?.name ?? element.rule.$refText, ctx) + processCardinality(element);
74+
} else if (isAlternatives(element) || isTerminalAlternatives(element)) {
75+
return '(' + element.elements.map(processRecursively).filter(notEmpty).join(' | ') + ')' + processCardinality(element);
76+
} else if (isRegexToken(element)) {
77+
// First remove trailing and leading slashes. Replace escaped slashes `\/` with unescaped slashes `/`.
78+
return element.regex.replace(/(^|[^\\])\//g, (_, p1) => p1 + '').replace(/\\\//g, '/');
79+
} else if (isCrossReference(element)) {
80+
return (element.terminal ? processRecursively(element.terminal) : 'ID') + processCardinality(element);
81+
} else if (isAction(element)) {
82+
return '';
83+
}
84+
console.error(`Not handled AbstractElement type: ${element?.$type}`);
85+
return `not-handled-(${element?.$type})`;
86+
}
87+
88+
function processCardinality(element: AbstractElement): string {
89+
return element.cardinality ?? '';
90+
}
91+
92+
function processName(name: string, ctx: GeneratorContext): string {
93+
switch (ctx.dialect) {
94+
case 'GBNF':
95+
// convert camel case to Kebab Case for GBNF (GGML AI)
96+
return _.kebabCase(name);
97+
case 'EBNF':
98+
return `<${name}>`;
99+
default:
100+
return name;
101+
}
102+
}
103+
104+
function processComment(rule: AbstractRule, ctx: GeneratorContext) {
105+
const comment = CstUtils.findCommentNode(rule.$cstNode, ['ML_COMMENT'])?.text
106+
?.replace(/\r?\n|\r/g, ' ') // Replace line breaks
107+
?.replace(/^\/\*\s*/, '') // Remove leading `/*`
108+
?.replace(/\s*\*\/$/, ''); // Remove trailing `*/`
109+
if (comment && comment.trim().length > 0) {
110+
switch (ctx.commentStyle) {
111+
case 'skip':
112+
return ' ';
113+
case 'parentheses':
114+
return `(* ${comment} *)${EOL}`;
115+
case 'slash':
116+
return `/* ${comment} */${EOL}`;
117+
case 'hash':
118+
return `# ${comment}${EOL}`;
119+
}
120+
}
121+
return '';
122+
}
123+
124+
/**
125+
* Generates a call to the `HIDDEN` rule with a trailing space, if there are hidden rules in the grammar.
126+
* @param ctx GeneratorContext
127+
* @returns `HIDDEN* ` if there are hidden rules in the grammar.
128+
*/
129+
function hiddenRuleCall(ctx: GeneratorContext): string {
130+
return ctx.hasHiddenRules ? (processName('HIDDEN', ctx) + '* ') : '';
131+
}
132+
133+
function notEmpty(text: string): boolean {
134+
return text.trim().length > 0;
135+
}
136+
137+
/**
138+
* Default: GBNF
139+
* EBNF doesn't support RegEx terminal rules.
140+
*/
141+
export type BnfDialect = 'GBNF' | 'EBNF';
142+
143+
/**
144+
* By default, comments are generated according to the dialect.
145+
* Use this option to force a specific comment style.
146+
* Use `parentheses` for `(* comment *)`, `slash` for `/* comment *\/`, `hash` for `# comment`
147+
* and `skip` to disable comment generation.
148+
*/
149+
export type CommentStyle = 'skip' | 'parentheses' | 'slash' | 'hash';
150+
151+
export type GeneratorOptions = {
152+
dialect: BnfDialect;
153+
commentStyle?: CommentStyle;
154+
};
155+
156+
type GeneratorContext = GeneratorOptions & {
157+
rootAssigned: boolean;
158+
hasHiddenRules: boolean;
159+
commentStyle: CommentStyle;
160+
};

packages/langium-cli/src/package-types.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,17 @@ export interface LangiumLanguageConfig {
6767
}
6868
/** Configure the chevrotain parser for a single language */
6969
chevrotainParserConfig?: IParserConfig
70+
/** Enable BNF file generation */
71+
bnf?: {
72+
/** Output path for the BNF file */
73+
out: string,
74+
/** Dialect of the generated BNF file. GBNF is the default. In EBNF RegEx terminals are not supported. */
75+
dialect?: 'GBNF' | 'EBNF',
76+
/**
77+
* By default, comments are generated according to the dialect. GBNF uses `#`, EBNF uses `(* *)`.
78+
* Use this option to force a specific comment style. Use `parentheses` for `(* comment *)`, `slash` for `/* comment *\/`,
79+
* `hash` for `# comment` and `skip` to disable comment generation.
80+
*/
81+
comment?: 'skip' | 'parentheses' | 'slash' | 'hash'
82+
}
7083
}

0 commit comments

Comments
 (0)