Skip to content

Commit 43d554a

Browse files
author
Eric Wheeler
committed
feat: implement tree-sitter compatible markdown processor
Adds a special case implementation for markdown files that: - Parses markdown headers and section line ranges - Returns captures in a format compatible with tree-sitter - Integrates with the existing parseFile function - Includes comprehensive tests for the implementation Signed-off-by: Eric Wheeler <[email protected]>
1 parent f4524fb commit 43d554a

File tree

4 files changed

+1011
-126
lines changed

4 files changed

+1011
-126
lines changed
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import { describe, expect, it, jest, beforeEach } from "@jest/globals"
2+
import * as fs from "fs/promises"
3+
import * as path from "path"
4+
import { parseSourceCodeDefinitionsForFile } from "../index"
5+
6+
// Mock fs.readFile
7+
jest.mock("fs/promises", () => ({
8+
readFile: jest.fn().mockImplementation(() => Promise.resolve("")),
9+
stat: jest.fn().mockImplementation(() => Promise.resolve({ isDirectory: () => false })),
10+
}))
11+
12+
// Mock fileExistsAtPath
13+
jest.mock("../../../utils/fs", () => ({
14+
fileExistsAtPath: jest.fn().mockImplementation(() => Promise.resolve(true)),
15+
}))
16+
17+
describe("Markdown Integration Tests", () => {
18+
beforeEach(() => {
19+
jest.clearAllMocks()
20+
})
21+
22+
it("should parse markdown files and extract headers", async () => {
23+
// Mock markdown content
24+
const markdownContent =
25+
"# Main Header\n\nThis is some content under the main header.\nIt spans multiple lines to meet the minimum section length.\n\n## Section 1\n\nThis is content for section 1.\nIt also spans multiple lines.\n\n### Subsection 1.1\n\nThis is a subsection with enough lines\nto meet the minimum section length requirement.\n\n## Section 2\n\nFinal section content.\nWith multiple lines.\n"
26+
27+
// Mock fs.readFile to return our markdown content
28+
;(fs.readFile as jest.Mock).mockImplementation(() => Promise.resolve(markdownContent))
29+
30+
// Call the function with a markdown file path
31+
const result = await parseSourceCodeDefinitionsForFile("test.md")
32+
33+
// Verify fs.readFile was called with the correct path
34+
expect(fs.readFile).toHaveBeenCalledWith("test.md", "utf8")
35+
36+
// Check the result
37+
expect(result).toBeDefined()
38+
expect(result).toContain("# test.md")
39+
expect(result).toContain("0--4 | # Main Header")
40+
expect(result).toContain("5--9 | ## Section 1")
41+
expect(result).toContain("10--14 | ### Subsection 1.1")
42+
expect(result).toContain("15--19 | ## Section 2")
43+
})
44+
45+
it("should handle markdown files with no headers", async () => {
46+
// Mock markdown content with no headers
47+
const markdownContent = "This is just some text.\nNo headers here.\nJust plain text."
48+
49+
// Mock fs.readFile to return our markdown content
50+
;(fs.readFile as jest.Mock).mockImplementation(() => Promise.resolve(markdownContent))
51+
52+
// Call the function with a markdown file path
53+
const result = await parseSourceCodeDefinitionsForFile("no-headers.md")
54+
55+
// Verify fs.readFile was called with the correct path
56+
expect(fs.readFile).toHaveBeenCalledWith("no-headers.md", "utf8")
57+
58+
// Check the result
59+
expect(result).toBeUndefined()
60+
})
61+
62+
it("should handle markdown files with headers that don't meet minimum section length", async () => {
63+
// Mock markdown content with headers but short sections
64+
const markdownContent = "# Header 1\nShort section\n\n# Header 2\nAnother short section"
65+
66+
// Mock fs.readFile to return our markdown content
67+
;(fs.readFile as jest.Mock).mockImplementation(() => Promise.resolve(markdownContent))
68+
69+
// Call the function with a markdown file path
70+
const result = await parseSourceCodeDefinitionsForFile("short-sections.md")
71+
72+
// Verify fs.readFile was called with the correct path
73+
expect(fs.readFile).toHaveBeenCalledWith("short-sections.md", "utf8")
74+
75+
// Check the result - should be undefined since no sections meet the minimum length
76+
expect(result).toBeUndefined()
77+
})
78+
79+
it("should handle markdown files with mixed header styles", async () => {
80+
// Mock markdown content with mixed header styles
81+
const markdownContent =
82+
"# ATX Header\nThis is content under an ATX header.\nIt spans multiple lines to meet the minimum section length.\n\nSetext Header\n============\nThis is content under a setext header.\nIt also spans multiple lines to meet the minimum section length.\n"
83+
84+
// Mock fs.readFile to return our markdown content
85+
;(fs.readFile as jest.Mock).mockImplementation(() => Promise.resolve(markdownContent))
86+
87+
// Call the function with a markdown file path
88+
const result = await parseSourceCodeDefinitionsForFile("mixed-headers.md")
89+
90+
// Verify fs.readFile was called with the correct path
91+
expect(fs.readFile).toHaveBeenCalledWith("mixed-headers.md", "utf8")
92+
93+
// Check the result
94+
expect(result).toBeDefined()
95+
expect(result).toContain("# mixed-headers.md")
96+
expect(result).toContain("0--3 | # ATX Header")
97+
expect(result).toContain("4--8 | Setext Header")
98+
})
99+
})

0 commit comments

Comments
 (0)