Skip to content

Commit 66f84f6

Browse files
committed
Fix memory leak in DirectoryScanner
- Remove codeBlocks accumulation that was causing memory exhaustion - Fix batch processing bugs where file info was added multiple times per file - Move totalBlockCount increment outside block loop to fix counting bug - Return empty codeBlocks array since it's not used by main orchestrator logic - Update tests to expect empty codeBlocks array This fixes the extension running out of memory during indexing of large codebases. The memory usage should drop from ~500MB-1GB to ~10-50MB for large projects.
1 parent 8c8888a commit 66f84f6

File tree

2 files changed

+35
-25
lines changed

2 files changed

+35
-25
lines changed

src/services/code-index/processors/__tests__/scanner.spec.ts

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,16 @@ describe("DirectoryScanner", () => {
168168
expect(mockCodeParser.parseFile).not.toHaveBeenCalled()
169169
})
170170

171-
it("should parse changed files and return code blocks", async () => {
171+
it("should parse changed files and return empty codeBlocks array", async () => {
172+
// Create scanner without embedder to test the non-embedding path
173+
const scannerNoEmbeddings = new DirectoryScanner(
174+
null as any, // No embedder
175+
null as any, // No vector store
176+
mockCodeParser,
177+
mockCacheManager,
178+
mockIgnoreInstance,
179+
)
180+
172181
const { listFiles } = await import("../../../glob/list-files")
173182
vi.mocked(listFiles).mockResolvedValue([["test/file1.js"], false])
174183
const mockBlocks: any[] = [
@@ -185,8 +194,8 @@ describe("DirectoryScanner", () => {
185194
]
186195
;(mockCodeParser.parseFile as any).mockResolvedValue(mockBlocks)
187196

188-
const result = await scanner.scanDirectory("/test")
189-
expect(result.codeBlocks).toEqual(mockBlocks)
197+
const result = await scannerNoEmbeddings.scanDirectory("/test")
198+
expect(result.codeBlocks).toEqual([]) // Now returns empty array for memory optimization
190199
expect(result.stats.processed).toBe(1)
191200
})
192201

@@ -252,6 +261,15 @@ describe("DirectoryScanner", () => {
252261
})
253262

254263
it("should process markdown files alongside code files", async () => {
264+
// Create scanner without embedder to test the non-embedding path
265+
const scannerNoEmbeddings = new DirectoryScanner(
266+
null as any, // No embedder
267+
null as any, // No vector store
268+
mockCodeParser,
269+
mockCacheManager,
270+
mockIgnoreInstance,
271+
)
272+
255273
const { listFiles } = await import("../../../glob/list-files")
256274
vi.mocked(listFiles).mockResolvedValue([["test/README.md", "test/app.js", "docs/guide.markdown"], false])
257275

@@ -306,23 +324,16 @@ describe("DirectoryScanner", () => {
306324
return []
307325
})
308326

309-
const result = await scanner.scanDirectory("/test")
327+
const result = await scannerNoEmbeddings.scanDirectory("/test")
310328

311329
// Verify all files were processed
312330
expect(mockCodeParser.parseFile).toHaveBeenCalledTimes(3)
313331
expect(mockCodeParser.parseFile).toHaveBeenCalledWith("test/README.md", expect.any(Object))
314332
expect(mockCodeParser.parseFile).toHaveBeenCalledWith("test/app.js", expect.any(Object))
315333
expect(mockCodeParser.parseFile).toHaveBeenCalledWith("docs/guide.markdown", expect.any(Object))
316334

317-
// Verify code blocks include both markdown and code content
318-
expect(result.codeBlocks).toHaveLength(3)
319-
expect(result.codeBlocks).toEqual(
320-
expect.arrayContaining([
321-
expect.objectContaining({ type: "markdown_header_h1" }),
322-
expect.objectContaining({ type: "function" }),
323-
expect.objectContaining({ type: "markdown_header_h2" }),
324-
]),
325-
)
335+
// Verify codeBlocks is empty (memory optimization) but processing still works
336+
expect(result.codeBlocks).toEqual([])
326337

327338
expect(result.stats.processed).toBe(3)
328339
})

src/services/code-index/processors/scanner.ts

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ export class DirectoryScanner implements IDirectoryScanner {
8585

8686
// Initialize tracking variables
8787
const processedFiles = new Set<string>()
88-
const codeBlocks: CodeBlock[] = []
8988
let processedCount = 0
9089
let skippedCount = 0
9190

@@ -135,7 +134,6 @@ export class DirectoryScanner implements IDirectoryScanner {
135134
const blocks = await this.codeParser.parseFile(filePath, { content, fileHash: currentFileHash })
136135
const fileBlockCount = blocks.length
137136
onFileParsed?.(fileBlockCount)
138-
codeBlocks.push(...blocks)
139137
processedCount++
140138

141139
// Process embeddings if configured
@@ -146,20 +144,11 @@ export class DirectoryScanner implements IDirectoryScanner {
146144
const trimmedContent = block.content.trim()
147145
if (trimmedContent) {
148146
const release = await mutex.acquire()
149-
totalBlockCount += fileBlockCount
150147
try {
151148
currentBatchBlocks.push(block)
152149
currentBatchTexts.push(trimmedContent)
153150
addedBlocksFromFile = true
154151

155-
if (addedBlocksFromFile) {
156-
currentBatchFileInfos.push({
157-
filePath,
158-
fileHash: currentFileHash,
159-
isNew: !this.cacheManager.getHash(filePath),
160-
})
161-
}
162-
163152
// Check if batch threshold is met
164153
if (currentBatchBlocks.length >= BATCH_SEGMENT_THRESHOLD) {
165154
// Copy current batch data and clear accumulators
@@ -188,6 +177,16 @@ export class DirectoryScanner implements IDirectoryScanner {
188177
}
189178
}
190179
}
180+
181+
// Add file info once per file (outside the block loop)
182+
if (addedBlocksFromFile) {
183+
totalBlockCount += fileBlockCount
184+
currentBatchFileInfos.push({
185+
filePath,
186+
fileHash: currentFileHash,
187+
isNew: !this.cacheManager.getHash(filePath),
188+
})
189+
}
191190
} else {
192191
// Only update hash if not being processed in a batch
193192
await this.cacheManager.updateHash(filePath, currentFileHash)
@@ -280,7 +279,7 @@ export class DirectoryScanner implements IDirectoryScanner {
280279
}
281280

282281
return {
283-
codeBlocks,
282+
codeBlocks: [], // Return empty array to prevent memory accumulation
284283
stats: {
285284
processed: processedCount,
286285
skipped: skippedCount,

0 commit comments

Comments
 (0)