Skip to content

Commit d53e627

Browse files
committed
test: address PR review feedback
- Remove redundant test 'should handle large markdown documentation folders efficiently' that only verified the scanner could iterate over mocked files - Add test to verify unique point IDs are generated for each block from the same file, ensuring the segmentHash-based ID generation prevents collisions
1 parent e2c14fc commit d53e627

File tree

1 file changed

+52
-24
lines changed

1 file changed

+52
-24
lines changed

src/services/code-index/processors/__tests__/scanner.spec.ts

Lines changed: 52 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -318,36 +318,64 @@ describe("DirectoryScanner", () => {
318318
expect(result.stats.processed).toBe(3)
319319
})
320320

321-
it("should handle large markdown documentation folders efficiently", async () => {
321+
it("should generate unique point IDs for each block from the same file", async () => {
322322
const { listFiles } = await import("../../../glob/list-files")
323+
vi.mocked(listFiles).mockResolvedValue([["test/large-doc.md"], false])
323324

324-
// Simulate a large documentation folder with many markdown files
325-
const markdownFiles = Array.from({ length: 50 }, (_, i) => `docs/section-${i}.md`)
326-
vi.mocked(listFiles).mockResolvedValue([markdownFiles, false])
327-
328-
const mockMarkdownBlock: any = {
329-
file_path: "docs/section-0.md",
330-
content: "# Section Header\nDetailed content...",
331-
start_line: 1,
332-
end_line: 5,
333-
identifier: "Section Header",
334-
type: "markdown_header_h1",
335-
fileHash: "section-hash",
336-
segmentHash: "section-segment-hash",
337-
}
325+
// Mock multiple blocks from the same file with different segmentHash values
326+
const mockBlocks: any[] = [
327+
{
328+
file_path: "test/large-doc.md",
329+
content: "# Introduction\nThis is the intro section...",
330+
start_line: 1,
331+
end_line: 10,
332+
identifier: "Introduction",
333+
type: "markdown_header_h1",
334+
fileHash: "same-file-hash",
335+
segmentHash: "unique-segment-hash-1",
336+
},
337+
{
338+
file_path: "test/large-doc.md",
339+
content: "## Getting Started\nHere's how to begin...",
340+
start_line: 11,
341+
end_line: 20,
342+
identifier: "Getting Started",
343+
type: "markdown_header_h2",
344+
fileHash: "same-file-hash",
345+
segmentHash: "unique-segment-hash-2",
346+
},
347+
{
348+
file_path: "test/large-doc.md",
349+
content: "## Advanced Topics\nFor advanced users...",
350+
start_line: 21,
351+
end_line: 30,
352+
identifier: "Advanced Topics",
353+
type: "markdown_header_h2",
354+
fileHash: "same-file-hash",
355+
segmentHash: "unique-segment-hash-3",
356+
},
357+
]
338358

339-
;(mockCodeParser.parseFile as any).mockResolvedValue([mockMarkdownBlock])
359+
;(mockCodeParser.parseFile as any).mockResolvedValue(mockBlocks)
340360

341-
const result = await scanner.scanDirectory("/test")
361+
await scanner.scanDirectory("/test")
342362

343-
// Verify all markdown files were processed
344-
expect(mockCodeParser.parseFile).toHaveBeenCalledTimes(50)
345-
expect(result.stats.processed).toBe(50)
346-
expect(result.codeBlocks).toHaveLength(50)
363+
// Verify that upsertPoints was called with unique IDs for each block
364+
expect(mockVectorStore.upsertPoints).toHaveBeenCalledTimes(1)
365+
const upsertCall = mockVectorStore.upsertPoints.mock.calls[0]
366+
const points = upsertCall[0]
347367

348-
// Verify embeddings were created for all markdown content
349-
expect(mockEmbedder.createEmbeddings).toHaveBeenCalled()
350-
expect(mockVectorStore.upsertPoints).toHaveBeenCalled()
368+
// Extract the IDs from the points
369+
const pointIds = points.map((point: any) => point.id)
370+
371+
// Verify all IDs are unique
372+
expect(pointIds).toHaveLength(3)
373+
expect(new Set(pointIds).size).toBe(3) // All IDs should be unique
374+
375+
// Verify that each point has the correct payload
376+
expect(points[0].payload.segmentHash).toBe("unique-segment-hash-1")
377+
expect(points[1].payload.segmentHash).toBe("unique-segment-hash-2")
378+
expect(points[2].payload.segmentHash).toBe("unique-segment-hash-3")
351379
})
352380
})
353381
})

0 commit comments

Comments
 (0)