Skip to content

Commit 325a868

Browse files
committed
fix(section-splitter): track code fence state to prevent false heading detection
Lines like `# vulcan.yaml` in YAML code blocks or `# Generate events` in shell code blocks were matching the heading pattern `/^(#{1,6})\s+(.+)$/`, causing the section splitter to create section boundaries inside code blocks. This resulted in code block content rendering outside the fence, with only language badges showing. The fix implements proper fence tracking: - Detects fence start/end with `/^(\`{3,}|~{3,})/` - Tracks fence character (` or ~) and minimum length - Enforces CommonMark closing rules (same char, >= opening length) - Conditionally checks headings only when `!inCodeFence`
1 parent b84cad7 commit 325a868

File tree

1 file changed

+48
-22
lines changed

1 file changed

+48
-22
lines changed

src/utils/section-splitter.ts

Lines changed: 48 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,44 @@ export interface MarkdownSection {
1919
export function splitIntoSections(markdown: string): MarkdownSection[] {
2020
const lines = markdown.split('\n');
2121
const sections: MarkdownSection[] = [];
22-
22+
2323
let currentSection: string[] = [];
2424
let currentHeading: string | undefined;
2525
let currentLevel: number | undefined;
2626
let sectionStartLine = 0;
2727
let sectionId = 0;
28-
28+
29+
// Track fenced code block state
30+
let inCodeFence = false;
31+
let codeFenceChar = ''; // '`' or '~'
32+
let codeFenceLength = 0; // minimum 3
33+
2934
lines.forEach((line, index) => {
30-
// Check if line is a heading (ATX style: # Heading)
31-
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
32-
35+
// Check for code fence start/end (``` or ~~~)
36+
const fenceMatch = line.match(/^(`{3,}|~{3,})/);
37+
38+
if (fenceMatch) {
39+
const fenceStr = fenceMatch[1];
40+
const fenceChar = fenceStr[0];
41+
const fenceLen = fenceStr.length;
42+
43+
if (!inCodeFence) {
44+
// Starting a code fence
45+
inCodeFence = true;
46+
codeFenceChar = fenceChar;
47+
codeFenceLength = fenceLen;
48+
} else if (fenceChar === codeFenceChar && fenceLen >= codeFenceLength) {
49+
// Closing the code fence (must use same char and >= length)
50+
inCodeFence = false;
51+
codeFenceChar = '';
52+
codeFenceLength = 0;
53+
}
54+
// If different fence char or shorter length, it's content inside the fence
55+
}
56+
57+
// Only check for headings when NOT inside a code fence
58+
const headingMatch = !inCodeFence && line.match(/^(#{1,6})\s+(.+)$/);
59+
3360
if (headingMatch) {
3461
// Save previous section if it exists
3562
if (currentSection.length > 0) {
@@ -42,7 +69,7 @@ export function splitIntoSections(markdown: string): MarkdownSection[] {
4269
id: `section-${sectionId++}`,
4370
});
4471
}
45-
72+
4673
// Start new section
4774
currentSection = [line];
4875
currentHeading = headingMatch[2];
@@ -52,7 +79,7 @@ export function splitIntoSections(markdown: string): MarkdownSection[] {
5279
currentSection.push(line);
5380
}
5481
});
55-
82+
5683
// Add final section
5784
if (currentSection.length > 0) {
5885
sections.push({
@@ -64,7 +91,7 @@ export function splitIntoSections(markdown: string): MarkdownSection[] {
6491
id: `section-${sectionId++}`,
6592
});
6693
}
67-
94+
6895
return sections;
6996
}
7097

@@ -74,16 +101,16 @@ export function splitIntoSections(markdown: string): MarkdownSection[] {
74101
export function splitIntoChunks(markdown: string, chunkSize = 50000): MarkdownSection[] {
75102
const sections: MarkdownSection[] = [];
76103
const lines = markdown.split('\n');
77-
104+
78105
let currentChunk: string[] = [];
79106
let currentSize = 0;
80107
let chunkStartLine = 0;
81108
let chunkId = 0;
82-
109+
83110
lines.forEach((line, index) => {
84111
currentChunk.push(line);
85112
currentSize += line.length + 1; // +1 for newline
86-
113+
87114
// If chunk is large enough, save it
88115
if (currentSize >= chunkSize) {
89116
sections.push({
@@ -92,13 +119,13 @@ export function splitIntoChunks(markdown: string, chunkSize = 50000): MarkdownSe
92119
endLine: index,
93120
id: `chunk-${chunkId++}`,
94121
});
95-
122+
96123
currentChunk = [];
97124
currentSize = 0;
98125
chunkStartLine = index + 1;
99126
}
100127
});
101-
128+
102129
// Add remaining lines
103130
if (currentChunk.length > 0) {
104131
sections.push({
@@ -108,7 +135,7 @@ export function splitIntoChunks(markdown: string, chunkSize = 50000): MarkdownSe
108135
id: `chunk-${chunkId++}`,
109136
});
110137
}
111-
138+
112139
return sections;
113140
}
114141

@@ -120,33 +147,32 @@ export function getInitialSections(
120147
options: { maxSections?: number; maxSize?: number; upToSectionId?: string } = {}
121148
): MarkdownSection[] {
122149
const { maxSections = 3, maxSize = 30000, upToSectionId } = options;
123-
150+
124151
// If we need to render up to a specific section (for scroll restoration),
125152
// render all sections up to and including that one
126153
if (upToSectionId) {
127-
const targetIndex = sections.findIndex(s => s.id === upToSectionId);
154+
const targetIndex = sections.findIndex((s) => s.id === upToSectionId);
128155
if (targetIndex !== -1) {
129156
// Render all sections up to the target, plus one more for context
130157
return sections.slice(0, Math.min(targetIndex + 2, sections.length));
131158
}
132159
}
133-
160+
134161
const initial: MarkdownSection[] = [];
135162
let totalSize = 0;
136-
163+
137164
for (const section of sections) {
138165
if (initial.length >= maxSections) break;
139166
if (totalSize + section.markdown.length > maxSize) break;
140-
167+
141168
initial.push(section);
142169
totalSize += section.markdown.length;
143170
}
144-
171+
145172
// Always include at least one section
146173
if (initial.length === 0 && sections.length > 0) {
147174
initial.push(sections[0]);
148175
}
149-
176+
150177
return initial;
151178
}
152-

0 commit comments

Comments
 (0)