Skip to content

Commit 1bc0213

Browse files
author
Eric Wheeler
committed
fix: markdown parser not detecting sections with horizontal rules
The markdownParser was incorrectly interpreting horizontal rules (---) as setext headers when they appeared after non-header text. This caused some sections to be missed in the output. This fix: - Makes setext header detection more strict by requiring at least 3 = or - characters - Adds validation for the text line before a potential setext header - Ensures horizontal rules are not confused with setext headers Added a test case to verify the fix works correctly with horizontal rules. Signed-off-by: Eric Wheeler <[email protected]>
1 parent 43d554a commit 1bc0213

File tree

2 files changed

+43
-4
lines changed

2 files changed

+43
-4
lines changed

src/services/tree-sitter/__tests__/markdownParser.test.ts

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,42 @@ Line 5`
462462
}
463463
}
464464
})
465+
466+
it("should correctly handle horizontal rules and not confuse them with setext headers", () => {
467+
const content = `## Section Header
468+
469+
Some content here.
470+
471+
## License
472+
473+
[Apache 2.0 © 2025 Roo Veterinary, Inc.](./LICENSE)
474+
475+
---
476+
477+
**Enjoy Roo Code!** Whether you keep it on a short leash or let it roam autonomously, we can't wait to see what you build.`
478+
479+
const captures = parseMarkdown(content)
480+
expect(captures).toBeDefined()
481+
482+
// Format with default minSectionLines = 4
483+
const formatted = formatMarkdownCaptures(captures)
484+
expect(formatted).toBeDefined()
485+
expect(formatted).toContain("## Section Header")
486+
expect(formatted).toContain("## License")
487+
488+
// Verify that the horizontal rule is not treated as a setext header
489+
const licenseCapture = captures.find((c) => c.node.text === "License")
490+
expect(licenseCapture).toBeDefined()
491+
492+
// Check that the License section extends past the horizontal rule
493+
const licenseCaptureIndex = captures.findIndex((c) => c.node.text === "License")
494+
if (licenseCaptureIndex !== -1 && licenseCaptureIndex + 1 < captures.length) {
495+
const licenseDefinitionCapture = captures[licenseCaptureIndex + 1]
496+
expect(licenseDefinitionCapture.node.endPosition.row).toBeGreaterThan(
497+
content.split("\n").findIndex((line) => line === "---"),
498+
)
499+
}
500+
})
465501
})
466502

467503
// Helper function to mimic the processCaptures function from index.ts

src/services/tree-sitter/markdownParser.ts

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,11 @@ export function parseMarkdown(content: string): MockCapture[] {
4242

4343
// Regular expressions for different header types
4444
const atxHeaderRegex = /^(#{1,6})\s+(.+)$/
45-
const setextH1Regex = /^=+\s*$/
46-
const setextH2Regex = /^-+\s*$/
45+
// Setext headers must have at least 3 = or - characters
46+
const setextH1Regex = /^={3,}\s*$/
47+
const setextH2Regex = /^-{3,}\s*$/
48+
// Valid setext header text line should be plain text (not empty, not indented, not a special element)
49+
const validSetextTextRegex = /^\s*[^#<>!\[\]`\t]+[^\n]$/
4750

4851
// Find all headers in the document
4952
for (let i = 0; i < lines.length; i++) {
@@ -80,7 +83,7 @@ export function parseMarkdown(content: string): MockCapture[] {
8083
// Check for setext headers (underlined headers)
8184
if (i > 0) {
8285
// Check for H1 (======)
83-
if (setextH1Regex.test(line)) {
86+
if (setextH1Regex.test(line) && validSetextTextRegex.test(lines[i - 1])) {
8487
const text = lines[i - 1].trim()
8588

8689
// Create a mock node for this header
@@ -106,7 +109,7 @@ export function parseMarkdown(content: string): MockCapture[] {
106109
}
107110

108111
// Check for H2 (------)
109-
if (setextH2Regex.test(line)) {
112+
if (setextH2Regex.test(line) && validSetextTextRegex.test(lines[i - 1])) {
110113
const text = lines[i - 1].trim()
111114

112115
// Create a mock node for this header

0 commit comments

Comments
 (0)