|
| 1 | +/****************************************************************************** |
| 2 | + * Copyright 2025 TypeFox GmbH |
| 3 | + * This program and the accompanying materials are made available under the |
| 4 | + * terms of the MIT License, which is available in the project root. |
| 5 | + ******************************************************************************/ |
| 6 | + |
| 7 | +import { CstUtils, type Grammar } from 'langium'; |
| 8 | +import { EOL } from 'langium/generate'; |
| 9 | +import * as _ from 'lodash'; |
| 10 | +import type { AbstractElement, AbstractRule, TerminalRule } from '../../../langium/lib/languages/generated/ast.js'; |
| 11 | +import { |
| 12 | + isAction, isAlternatives, isAssignment, isCrossReference, isGroup, isKeyword, isParserRule, isRegexToken, |
| 13 | + isRuleCall, isTerminalAlternatives, isTerminalGroup, isTerminalRule, isTerminalRuleCall |
| 14 | +} from '../../../langium/lib/languages/generated/ast.js'; |
| 15 | + |
| 16 | +export function generateBnf(grammars: Grammar[], options: GeneratorOptions = { dialect: 'GBNF' }): string { |
| 17 | + const grammarsWithName = grammars.filter(grammar => !!grammar.name); |
| 18 | + |
| 19 | + const ctx: GeneratorContext = { |
| 20 | + rootAssigned: options.dialect === 'EBNF', |
| 21 | + hasHiddenRules: grammarsWithName.some(grammar => grammar.rules.some(rule => isTerminalRule(rule) && rule.hidden)), |
| 22 | + dialect: options.dialect, |
| 23 | + commentStyle: options.commentStyle ?? (options.dialect === 'GBNF' ? 'hash' : 'parentheses') |
| 24 | + }; |
| 25 | + |
| 26 | + const hiddenRules: TerminalRule[] = []; |
| 27 | + |
| 28 | + let result: string = ''; |
| 29 | + grammarsWithName.forEach(grammar => { |
| 30 | + grammar.rules.forEach(rule => { |
| 31 | + result += processRule(rule, ctx); |
| 32 | + result += EOL + EOL; |
| 33 | + if (ctx.hasHiddenRules && isTerminalRule(rule) && rule.hidden) { |
| 34 | + hiddenRules.push(rule); |
| 35 | + } |
| 36 | + }); |
| 37 | + }); |
| 38 | + |
| 39 | + if (hiddenRules.length > 0) { |
| 40 | + result += `${processName('HIDDEN', ctx)} ::= ( ${hiddenRules.map(rule => processName(rule.name, ctx)).join(' | ')} )${EOL}`; |
| 41 | + } |
| 42 | + return result; |
| 43 | +} |
| 44 | + |
| 45 | +function processRule(rule: AbstractRule, ctx: GeneratorContext): string { |
| 46 | + const markRoot = !ctx.rootAssigned && isParserRule(rule) && rule.entry; |
| 47 | + if (markRoot) { |
| 48 | + ctx.rootAssigned = true; |
| 49 | + } |
| 50 | + |
| 51 | + // GBNF expects 'root' as the root rule name, Lark e.g. expects 'start'. |
| 52 | + const ruleName = processName(markRoot ? 'root' : rule.name, ctx); |
| 53 | + const ruleComment = processComment(rule, ctx); |
| 54 | + const hiddenPrefix = (isTerminalRule(rule) && !rule.hidden) ? hiddenRuleCall(ctx) : ''; |
| 55 | + return `${ruleComment}${ruleName} ::= ${hiddenPrefix}${processElement(rule.definition, ctx)}`; |
| 56 | +} |
| 57 | + |
| 58 | +function processElement(element: AbstractElement, ctx: GeneratorContext): string { |
| 59 | + const processRecursively = (element: AbstractElement) => { |
| 60 | + return processElement(element, ctx); |
| 61 | + }; |
| 62 | + if (isKeyword(element)) { |
| 63 | + return `${hiddenRuleCall(ctx)}"${element.value}"`; |
| 64 | + } else if (isGroup(element) || isTerminalGroup(element)) { |
| 65 | + if (element.cardinality) { |
| 66 | + return `( ${element.elements.map(processRecursively).filter(notEmpty).join(' ')} )${processCardinality(element)}`; |
| 67 | + } else { |
| 68 | + return element.elements.map(processRecursively).filter(notEmpty).join(' '); |
| 69 | + } |
| 70 | + } else if (isAssignment(element)) { |
| 71 | + return processRecursively(element.terminal) + processCardinality(element); |
| 72 | + } else if (isRuleCall(element) || isTerminalRuleCall(element)) { |
| 73 | + return processName(element.rule.ref?.name ?? element.rule.$refText, ctx) + processCardinality(element); |
| 74 | + } else if (isAlternatives(element) || isTerminalAlternatives(element)) { |
| 75 | + return '(' + element.elements.map(processRecursively).filter(notEmpty).join(' | ') + ')' + processCardinality(element); |
| 76 | + } else if (isRegexToken(element)) { |
| 77 | + // First remove trailing and leading slashes. Replace escaped slashes `\/` with unescaped slashes `/`. |
| 78 | + return element.regex.replace(/(^|[^\\])\//g, (_, p1) => p1 + '').replace(/\\\//g, '/'); |
| 79 | + } else if (isCrossReference(element)) { |
| 80 | + return (element.terminal ? processRecursively(element.terminal) : 'ID') + processCardinality(element); |
| 81 | + } else if (isAction(element)) { |
| 82 | + return ''; |
| 83 | + } |
| 84 | + console.error(`Not handled AbstractElement type: ${element?.$type}`); |
| 85 | + return `not-handled-(${element?.$type})`; |
| 86 | +} |
| 87 | + |
| 88 | +function processCardinality(element: AbstractElement): string { |
| 89 | + return element.cardinality ?? ''; |
| 90 | +} |
| 91 | + |
| 92 | +function processName(name: string, ctx: GeneratorContext): string { |
| 93 | + switch (ctx.dialect) { |
| 94 | + case 'GBNF': |
| 95 | + // convert camel case to Kebab Case for GBNF (GGML AI) |
| 96 | + return _.kebabCase(name); |
| 97 | + case 'EBNF': |
| 98 | + return `<${name}>`; |
| 99 | + default: |
| 100 | + return name; |
| 101 | + } |
| 102 | +} |
| 103 | + |
| 104 | +function processComment(rule: AbstractRule, ctx: GeneratorContext) { |
| 105 | + const comment = CstUtils.findCommentNode(rule.$cstNode, ['ML_COMMENT'])?.text |
| 106 | + ?.replace(/\r?\n|\r/g, ' ') // Replace line breaks |
| 107 | + ?.replace(/^\/\*\s*/, '') // Remove leading `/*` |
| 108 | + ?.replace(/\s*\*\/$/, ''); // Remove trailing `*/` |
| 109 | + if (comment && comment.trim().length > 0) { |
| 110 | + switch (ctx.commentStyle) { |
| 111 | + case 'skip': |
| 112 | + return ' '; |
| 113 | + case 'parentheses': |
| 114 | + return `(* ${comment} *)${EOL}`; |
| 115 | + case 'slash': |
| 116 | + return `/* ${comment} */${EOL}`; |
| 117 | + case 'hash': |
| 118 | + return `# ${comment}${EOL}`; |
| 119 | + } |
| 120 | + } |
| 121 | + return ''; |
| 122 | +} |
| 123 | + |
| 124 | +/** |
| 125 | + * Generates a call to the `HIDDEN` rule with a trailing space, if there are hidden rules in the grammar. |
| 126 | + * @param ctx GeneratorContext |
| 127 | + * @returns `HIDDEN* ` if there are hidden rules in the grammar. |
| 128 | + */ |
| 129 | +function hiddenRuleCall(ctx: GeneratorContext): string { |
| 130 | + return ctx.hasHiddenRules ? (processName('HIDDEN', ctx) + '* ') : ''; |
| 131 | +} |
| 132 | + |
| 133 | +function notEmpty(text: string): boolean { |
| 134 | + return text.trim().length > 0; |
| 135 | +} |
| 136 | + |
| 137 | +/** |
| 138 | + * Default: GBNF |
| 139 | + * EBNF doesn't support RegEx terminal rules. |
| 140 | + */ |
| 141 | +export type BnfDialect = 'GBNF' | 'EBNF'; |
| 142 | + |
| 143 | +/** |
| 144 | + * By default, comments are generated according to the dialect. |
| 145 | + * Use this option to force a specific comment style. |
| 146 | + * Use `parentheses` for `(* comment *)`, `slash` for `/* comment *\/`, `hash` for `# comment` |
| 147 | + * and `skip` to disable comment generation. |
| 148 | + */ |
| 149 | +export type CommentStyle = 'skip' | 'parentheses' | 'slash' | 'hash'; |
| 150 | + |
| 151 | +export type GeneratorOptions = { |
| 152 | + dialect: BnfDialect; |
| 153 | + commentStyle?: CommentStyle; |
| 154 | +}; |
| 155 | + |
| 156 | +type GeneratorContext = GeneratorOptions & { |
| 157 | + rootAssigned: boolean; |
| 158 | + hasHiddenRules: boolean; |
| 159 | + commentStyle: CommentStyle; |
| 160 | +}; |
0 commit comments