Skip to content

Commit e0c6aa4

Browse files
authored
Merge pull request #26 from theodevelop/dev
fix: resolve issues #21 #22 #23 — mid-rule actions, %token alias, flex SC blocks
2 parents a1d811a + 5b71d6d commit e0c6aa4

File tree

6 files changed

+253
-43
lines changed

6 files changed

+253
-43
lines changed

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,16 @@
22

33
All notable changes to the **Bison/Flex Language Support** extension will be documented in this file.
44

5+
## [1.4.1] - 2026-03-31
6+
7+
### Fixed
8+
9+
- **Bison — mid-rule action `$N` out-of-bounds** (#21): Action blocks `{ }` embedded in the middle of a production are now counted as grammar symbols in Bison's `$N` numbering. Previously they were silently stripped, causing false-positive `bison/out-of-bounds` errors and missed real out-of-bounds accesses.
10+
- **Bison — `%token` numeric value and string alias** (#22): `%token NAME NUMBER "alias"` is now parsed in the correct order (numeric value before string alias). Previously, words inside the alias string were misidentified as token names, generating spurious `bison/undeclared-token` and `bison/unused-token` diagnostics.
11+
- **Flex — `<SC>{ }` block syntax** (#23): Rules grouped inside a `<SC1,SC2>{ ... }` block now correctly inherit their start conditions. Previously, the block header was misidentified as a rule pattern, suppressing all rules inside it and generating false `flex/unreachable-rule` and `flex/unused-sc` diagnostics.
12+
13+
---
14+
515
## [1.4.0] - 2026-03-30
616

717
### Added

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"name": "bison-flex-lang",
33
"displayName": "Bison/Flex Language Support",
44
"description": "Full-featured language support for GNU Bison (.y, .yy) and Flex/RE-flex (.l, .ll) — syntax highlighting with embedded C/C++, real-time diagnostics, intelligent autocompletion, and hover documentation for all directives.",
5-
"version": "1.4.0",
5+
"version": "1.4.1",
66
"publisher": "theodevelop",
77
"license": "MIT",
88
"repository": {

server/src/parser/bisonParser.ts

Lines changed: 93 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -335,11 +335,23 @@ export function parseBisonDocument(text: string): BisonDocument {
335335
}
336336
}
337337

338-
// Track braces
338+
// Track braces — and detect when a multi-line action block opens.
339+
// When braceDepth goes from 0 to >0, a mid-rule action block has started.
340+
// Bison counts each action block as a grammar symbol ($N position), so we
341+
// add a '__midaction__' sentinel to the current alternative's symbol list.
342+
// Inline balanced blocks (e.g. `{ $$ = $1; }` on the same line) are already
343+
// counted by extractSymbols; only the unbalanced-open case needs handling here.
344+
const prevDepth = braceDepth;
339345
for (const ch of line) {
340346
if (ch === '{') braceDepth++;
341347
if (ch === '}') braceDepth = Math.max(0, braceDepth - 1);
342348
}
349+
if (prevDepth === 0 && braceDepth > 0 && currentRule) {
350+
const curRule = doc.rules.get(currentRule);
351+
if (curRule && curRule.alternatives.length > 0) {
352+
curRule.alternatives[curRule.alternatives.length - 1].symbols.push('__midaction__');
353+
}
354+
}
343355
}
344356

345357
return doc;
@@ -388,7 +400,7 @@ function replaceStringLiterals(text: string): string {
388400
*/
389401
function extractSymbols(text: string): string[] {
390402
const cleaned = replaceStringLiterals(text)
391-
.replace(/\{[^}]*\}/g, ' ') // remove inline actions
403+
.replace(/\{[^}]*\}/g, ' __midaction__ ') // inline actions count as a symbol ($N position)
392404
.replace(/%prec\s+\S+/g, ' ') // remove %prec TOKEN
393405
.replace(/%empty/g, ' ') // remove %empty
394406
.replace(/\/\/.*$/g, ' ') // remove line comments
@@ -422,23 +434,85 @@ function getFirstSymbol(text: string): string | undefined {
422434
}
423435

424436
function parseTokenNames(text: string, type: string | undefined, lineNum: number, doc: BisonDocument, colOffset: number = 0): void {
425-
// Match patterns like: NAME "alias" VALUE or just NAME
426-
// Use [a-zA-Z_][a-zA-Z0-9_]* to support lowercase letters and digits in token names.
427-
const regex = /([a-zA-Z_][a-zA-Z0-9_]*)\s*(?:("(?:[^"\\]|\\.)*")\s*)?(?:(\d+)\s*)?/g;
428-
let match: RegExpExecArray | null;
429-
while ((match = regex.exec(text)) !== null) {
430-
const name = match[1];
431-
const alias = match[2]?.replace(/"/g, '');
432-
const value = match[3] ? parseInt(match[3]) : undefined;
433-
const col = colOffset + match.index;
434-
const decl: TokenDeclaration = {
435-
name,
436-
type,
437-
alias,
438-
location: Range.create(lineNum, col, lineNum, col + name.length),
439-
value,
440-
};
441-
doc.tokens.set(name, decl);
437+
// Bison token declaration syntax: NAME [NUMBER] ["alias"] (repeating)
438+
// The optional NUMBER comes BEFORE the optional "alias".
439+
// We use a character scanner to correctly skip string literals and numeric values
440+
// so that words inside "end of file" are not mistaken for token names.
441+
let pos = 0;
442+
443+
while (pos < text.length) {
444+
// Skip whitespace
445+
while (pos < text.length && (text[pos] === ' ' || text[pos] === '\t')) pos++;
446+
if (pos >= text.length) break;
447+
448+
const ch = text[pos];
449+
450+
// Skip string literals (these are aliases for the previous token, not new token names)
451+
if (ch === '"') {
452+
pos++;
453+
while (pos < text.length && text[pos] !== '"') {
454+
if (text[pos] === '\\') pos++; // skip escaped character
455+
pos++;
456+
}
457+
pos++; // skip closing quote
458+
continue;
459+
}
460+
461+
// Skip numeric token values
462+
if (ch >= '0' && ch <= '9') {
463+
while (pos < text.length && text[pos] >= '0' && text[pos] <= '9') pos++;
464+
continue;
465+
}
466+
467+
// Match identifier (token name)
468+
if (/[a-zA-Z_]/.test(ch)) {
469+
const nameStart = pos;
470+
while (pos < text.length && /[a-zA-Z0-9_]/.test(text[pos])) pos++;
471+
const name = text.substring(nameStart, pos);
472+
const col = colOffset + nameStart;
473+
474+
// Peek ahead: optional NUMBER then optional "alias"
475+
let peekPos = pos;
476+
let alias: string | undefined;
477+
let value: number | undefined;
478+
479+
// Skip whitespace
480+
while (peekPos < text.length && (text[peekPos] === ' ' || text[peekPos] === '\t')) peekPos++;
481+
482+
// Optional numeric token code (e.g. %token TOKEN_EOF 0 "end of file")
483+
if (peekPos < text.length && text[peekPos] >= '0' && text[peekPos] <= '9') {
484+
const numStart = peekPos;
485+
while (peekPos < text.length && text[peekPos] >= '0' && text[peekPos] <= '9') peekPos++;
486+
value = parseInt(text.substring(numStart, peekPos), 10);
487+
pos = peekPos;
488+
while (peekPos < text.length && (text[peekPos] === ' ' || text[peekPos] === '\t')) peekPos++;
489+
}
490+
491+
// Optional string alias (e.g. %token PLUS "+" or %token TOKEN_EOF 0 "end of file")
492+
if (peekPos < text.length && text[peekPos] === '"') {
493+
peekPos++; // skip opening quote
494+
const aliasStart = peekPos;
495+
while (peekPos < text.length && text[peekPos] !== '"') {
496+
if (text[peekPos] === '\\') peekPos++; // skip escaped character
497+
peekPos++;
498+
}
499+
alias = text.substring(aliasStart, peekPos);
500+
peekPos++; // skip closing quote
501+
pos = peekPos;
502+
}
503+
504+
doc.tokens.set(name, {
505+
name,
506+
type,
507+
alias,
508+
location: Range.create(lineNum, col, lineNum, col + name.length),
509+
value,
510+
});
511+
continue;
512+
}
513+
514+
// Skip any other character (e.g. punctuation, stray closing >)
515+
pos++;
442516
}
443517
}
444518

server/src/parser/flexParser.ts

Lines changed: 81 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,17 @@ export function parseFlexDocument(text: string): FlexDocument {
198198
}
199199

200200
// Phase 3: Parse rules section
201-
let braceDepth = 0;
201+
//
202+
// Two separate depth counters are needed:
203+
// actionDepth — depth of C action blocks ({ ... }); content is skipped
204+
// scBlockStack — stack of start-condition lists for <SC>{...} blocks;
205+
// rules INSIDE these blocks inherit the SC names.
206+
//
207+
// A <SC>{...} block is NOT an action block: its content is Flex rules.
208+
// Action blocks nested inside a <SC> block increment actionDepth as usual.
209+
let actionDepth = 0;
210+
const scBlockStack: string[][] = []; // each entry = SC list for one nesting level
211+
let pendingScHeader: string | null = null; // accumulates multi-line <SC1,\nSC2>{ headers
202212
inBlockComment = false;
203213

204214
for (let i = rulesStart; i < rulesEnd; i++) {
@@ -218,16 +228,70 @@ export function parseFlexDocument(text: string): FlexDocument {
218228
// Skip empty lines and line comments
219229
if (!trimmed || trimmed.startsWith('//')) continue;
220230

221-
// Skip action blocks (brace-delimited C code)
222-
if (braceDepth > 0) {
231+
// ── Handle multi-line <SC1,\nSC2>{ header continuation ────────────────────
232+
if (pendingScHeader !== null) {
233+
const closeIdx = trimmed.indexOf('>');
234+
if (closeIdx >= 0) {
235+
// Collect any additional SC names before the >
236+
const before = trimmed.substring(0, closeIdx);
237+
const moreConds = before.match(/[A-Z_][A-Z0-9_]*/g);
238+
if (moreConds) pendingScHeader += ',' + moreConds.join(',');
239+
const conds = pendingScHeader.replace(/^,+/, '').split(',').filter(s => s.length > 0);
240+
pendingScHeader = null;
241+
// Expect '{' right after '>' to open the SC block
242+
const after = trimmed.substring(closeIdx + 1).trim();
243+
if (after === '{') {
244+
scBlockStack.push(conds);
245+
// actionDepth stays 0; the { is the SC block opening, not an action block
246+
}
247+
} else {
248+
// Still accumulating conditions from this line
249+
const moreConds = trimmed.match(/[A-Z_][A-Z0-9_]*/g);
250+
if (moreConds) pendingScHeader += ',' + moreConds.join(',');
251+
}
252+
continue;
253+
}
254+
255+
// ── Skip C action blocks ───────────────────────────────────────────────────
256+
if (actionDepth > 0) {
223257
for (const ch of line) {
224-
if (ch === '{') braceDepth++;
225-
if (ch === '}') braceDepth = Math.max(0, braceDepth - 1);
258+
if (ch === '{') actionDepth++;
259+
if (ch === '}') actionDepth = Math.max(0, actionDepth - 1);
226260
}
227261
continue;
228262
}
229263

230-
// Extract start condition references: <SC_NAME> or <SC1,SC2>
264+
// ── SC block closing } (at SC block level, actionDepth === 0) ─────────────
265+
if (scBlockStack.length > 0 && trimmed === '}') {
266+
scBlockStack.pop();
267+
continue;
268+
}
269+
270+
// ── SC block opener: <SC1,SC2>{ ───────────────────────────────────────────
271+
// Single-line header: <SC1,SC2>{ or <SC1,SC2> {
272+
{
273+
const scBlockMatch = trimmed.match(/^<([A-Z_][A-Z0-9_]*(?:,[A-Z_][A-Z0-9_]*)*)>\s*\{/);
274+
if (scBlockMatch) {
275+
const conds = scBlockMatch[1].split(',');
276+
scBlockStack.push(conds);
277+
// Record the start condition references from the block header line
278+
for (const cond of conds) {
279+
const col = line.indexOf(cond);
280+
const range = Range.create(i, col >= 0 ? col : 0, i, (col >= 0 ? col : 0) + cond.length);
281+
if (!doc.startConditionRefs.has(cond)) doc.startConditionRefs.set(cond, []);
282+
doc.startConditionRefs.get(cond)!.push(range);
283+
}
284+
continue;
285+
}
286+
// Multi-line header start: <SC1, (no closing > on this line)
287+
const scMultiStart = trimmed.match(/^<([A-Z_][A-Z0-9_]*(?:,[A-Z_][A-Z0-9_]*)*,\s*)$/);
288+
if (scMultiStart) {
289+
pendingScHeader = scMultiStart[1].replace(/,\s*$/, '');
290+
continue;
291+
}
292+
}
293+
294+
// ── Extract start condition references: <SC_NAME> or <SC1,SC2> ────────────
231295
// Exclude <<EOF>> which is a special pattern, not a start condition
232296
const scRefs = line.matchAll(/(?<!<)<([A-Z_][A-Z0-9_]*(?:,[A-Z_][A-Z0-9_]*)*)>(?!>)/g);
233297
for (const m of scRefs) {
@@ -242,7 +306,7 @@ export function parseFlexDocument(text: string): FlexDocument {
242306
}
243307
}
244308

245-
// Extract abbreviation references: {name} (but not C code {})
309+
// ── Extract abbreviation references: {name} (but not C code {}) ───────────
246310
// Only match {name} where name is a valid identifier
247311
const abbrRefs = line.matchAll(/\{([a-zA-Z_][a-zA-Z0-9_]*)\}/g);
248312
for (const m of abbrRefs) {
@@ -259,11 +323,15 @@ export function parseFlexDocument(text: string): FlexDocument {
259323
}
260324
}
261325

262-
// Build rule entry
263-
const startConditions: string[] = [];
326+
// ── Build rule entry ───────────────────────────────────────────────────────
327+
// Start conditions: explicit <SC> prefix on this line PLUS any inherited from <SC>{ block
328+
const inherited = scBlockStack.length > 0 ? scBlockStack[scBlockStack.length - 1] : [];
329+
const startConditions: string[] = [...inherited];
264330
const scMatch = trimmed.match(/^<([A-Z_][A-Z0-9_]*(?:,[A-Z_][A-Z0-9_]*)*)>/);
265331
if (scMatch) {
266-
startConditions.push(...scMatch[1].split(','));
332+
for (const c of scMatch[1].split(',')) {
333+
if (!startConditions.includes(c)) startConditions.push(c);
334+
}
267335
}
268336

269337
doc.rules.push({
@@ -272,10 +340,10 @@ export function parseFlexDocument(text: string): FlexDocument {
272340
location: Range.create(i, 0, i, line.length),
273341
});
274342

275-
// Track braces for action blocks
343+
// ── Track action brace depth ───────────────────────────────────────────────
276344
for (const ch of line) {
277-
if (ch === '{') braceDepth++;
278-
if (ch === '}') braceDepth = Math.max(0, braceDepth - 1);
345+
if (ch === '{') actionDepth++;
346+
if (ch === '}') actionDepth = Math.max(0, actionDepth - 1);
279347
}
280348
}
281349

tests/test-diagnostic-codes.ts

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,9 @@ console.log('\n=== TEST: Bison diagnostic codes ===');
111111
}
112112

113113
// out-of-bounds + href
114+
// Rule `expr : A { $3; }` has 2 symbols: A(1) + mid-rule action(2). $3 > 2 → OOB.
114115
{
115-
const src = `%token A\n%%\nexpr : A { $2; } ;\n%%\n`;
116+
const src = `%token A\n%%\nexpr : A { $3; } ;\n%%\n`;
116117
const doc = parseBisonDocument(src);
117118
const diags = computeBisonDiagnostics(doc, src);
118119
const d = diags.find(x => x.message.includes('out of bounds'));
@@ -321,6 +322,61 @@ const bisonDocCross = parseBisonDocument(bisonSrcCross);
321322
assert(d?.source === 'flex', 'missing-grammar-token source is flex', d?.source);
322323
}
323324

325+
// ─────────────────────────────────────────────────────────────────────────────
326+
// 5. Regression tests for reported bugs
327+
// ─────────────────────────────────────────────────────────────────────────────
328+
console.log('\n=== TEST: Bug regressions ===');
329+
330+
// Issue #22 — %token with numeric value + string alias: words inside "end of file"
331+
// must NOT be treated as token names.
332+
{
333+
const src = '%token TOKEN_EOF 0 "end of file"\n%token THREEDIMENSIONAL "3D"\n%token ACTUAL\n%%\nexpr : TOKEN_EOF THREEDIMENSIONAL ACTUAL ;\n%%\n';
334+
const doc = parseBisonDocument(src);
335+
assert(doc.tokens.has('TOKEN_EOF'), '#22 TOKEN_EOF is declared');
336+
assert(doc.tokens.get('TOKEN_EOF')?.value === 0, '#22 TOKEN_EOF value = 0');
337+
assert(doc.tokens.get('TOKEN_EOF')?.alias === 'end of file', '#22 TOKEN_EOF alias = "end of file"');
338+
assert(!doc.tokens.has('end'), '#22 "end" is NOT a token (was inside alias)');
339+
assert(!doc.tokens.has('of'), '#22 "of" is NOT a token');
340+
assert(!doc.tokens.has('file'), '#22 "file" is NOT a token');
341+
assert(doc.tokens.has('THREEDIMENSIONAL'), '#22 THREEDIMENSIONAL is declared');
342+
assert(doc.tokens.get('THREEDIMENSIONAL')?.alias === '3D', '#22 THREEDIMENSIONAL alias = "3D"');
343+
assert(doc.tokens.has('ACTUAL'), '#22 ACTUAL is declared');
344+
const diags22 = computeBisonDiagnostics(doc, src);
345+
const unusedTokenDiags = diags22.filter(d => d.code === 'bison/unused-token');
346+
assert(unusedTokenDiags.length === 0, '#22 no false bison/unused-token diagnostics');
347+
}
348+
349+
// Issue #21 — mid-rule action blocks count as grammar symbols.
350+
// In `testrule: A B { } D { $4 }`, $4 refers to D (symbol #4), not out of bounds.
351+
{
352+
const src = '%token A B D\n%%\ntestrule : A B { } D { $4; } ;\n%%\n';
353+
const doc = parseBisonDocument(src);
354+
const diags21 = computeBisonDiagnostics(doc, src);
355+
const oob = diags21.filter(d => d.code === 'bison/out-of-bounds');
356+
assert(oob.length === 0, '#21 $4 in rule with mid-action is not out-of-bounds (A=1 B=2 {action}=3 D=4)');
357+
}
358+
{
359+
// $6 IS out of bounds: A(1) B(2) {action}(3) D(4) {action2}(5) — only 5 symbols
360+
const src = '%token A B D\n%%\ntestrule : A B { } D { $6; } ;\n%%\n';
361+
const doc = parseBisonDocument(src);
362+
const diags21b = computeBisonDiagnostics(doc, src);
363+
const oob5 = diags21b.filter(d => d.code === 'bison/out-of-bounds');
364+
assert(oob5.length === 1, '#21 $6 IS out-of-bounds (5 symbols: A B {action} D {action2})');
365+
}
366+
367+
// Issue #23 — rules inside a <SC>{ ... } block inherit the start condition.
368+
// A catch-all `.` in INITIAL should NOT shadow rules in an exclusive SC block.
369+
{
370+
const src = `%x MY_STATE\n%%\n.\t{}\n<MY_STATE>{\n [a-z]+ {}\n [0-9]+ {}\n}\n%%\n`;
371+
const doc = require('../server/src/parser/flexParser').parseFlexDocument(src);
372+
// Rules inside <MY_STATE>{ } should have startConditions = ['MY_STATE'], not []
373+
const rulesInBlock = (doc.rules as { startConditions: string[] }[]).filter(r => r.startConditions.includes('MY_STATE'));
374+
assert(rulesInBlock.length === 2, '#23 rules inside <SC>{ block inherit start condition');
375+
const diags23 = computeFlexDiagnostics(doc, src);
376+
const unreachable = diags23.filter(d => d.code === 'flex/unreachable-rule');
377+
assert(unreachable.length === 0, '#23 no false flex/unreachable-rule for exclusive SC block');
378+
}
379+
324380
// ─────────────────────────────────────────────────────────────────────────────
325381
// Results
326382
// ─────────────────────────────────────────────────────────────────────────────

0 commit comments

Comments
 (0)