@@ -17,7 +17,7 @@ import { t } from "../../../i18n"
1717import {
1818 QDRANT_CODE_BLOCK_NAMESPACE ,
1919 MAX_FILE_SIZE_BYTES ,
20- MAX_LIST_FILES_LIMIT ,
20+ MAX_LIST_FILES_LIMIT_CODE_INDEX ,
2121 BATCH_SEGMENT_THRESHOLD ,
2222 MAX_BATCH_RETRIES ,
2323 INITIAL_RETRY_DELAY_MS ,
@@ -51,13 +51,13 @@ export class DirectoryScanner implements IDirectoryScanner {
5151 onError ?: ( error : Error ) => void ,
5252 onBlocksIndexed ?: ( indexedCount : number ) => void ,
5353 onFileParsed ?: ( fileBlockCount : number ) => void ,
54- ) : Promise < { codeBlocks : CodeBlock [ ] ; stats : { processed : number ; skipped : number } ; totalBlockCount : number } > {
54+ ) : Promise < { stats : { processed : number ; skipped : number } ; totalBlockCount : number } > {
5555 const directoryPath = directory
5656 // Capture workspace context at scan start
5757 const scanWorkspace = getWorkspacePathForContext ( directoryPath )
5858
5959 // Get all files recursively (handles .gitignore automatically)
60- const [ allPaths , _ ] = await listFiles ( directoryPath , true , MAX_LIST_FILES_LIMIT )
60+ const [ allPaths , _ ] = await listFiles ( directoryPath , true , MAX_LIST_FILES_LIMIT_CODE_INDEX )
6161
6262 // Filter out directories (marked with trailing '/')
6363 const filePaths = allPaths . filter ( ( p ) => ! p . endsWith ( "/" ) )
@@ -85,7 +85,6 @@ export class DirectoryScanner implements IDirectoryScanner {
8585
8686 // Initialize tracking variables
8787 const processedFiles = new Set < string > ( )
88- const codeBlocks : CodeBlock [ ] = [ ]
8988 let processedCount = 0
9089 let skippedCount = 0
9190
@@ -98,7 +97,7 @@ export class DirectoryScanner implements IDirectoryScanner {
9897 let currentBatchBlocks : CodeBlock [ ] = [ ]
9998 let currentBatchTexts : string [ ] = [ ]
10099 let currentBatchFileInfos : { filePath : string ; fileHash : string ; isNew : boolean } [ ] = [ ]
101- const activeBatchPromises : Promise < void > [ ] = [ ]
100+ const activeBatchPromises = new Set < Promise < void > > ( )
102101
103102 // Initialize block counter
104103 let totalBlockCount = 0
@@ -125,6 +124,7 @@ export class DirectoryScanner implements IDirectoryScanner {
125124
126125 // Check against cache
127126 const cachedFileHash = this . cacheManager . getHash ( filePath )
127+ const isNewFile = ! cachedFileHash
128128 if ( cachedFileHash === currentFileHash ) {
129129 // File is unchanged
130130 skippedCount ++
@@ -135,7 +135,6 @@ export class DirectoryScanner implements IDirectoryScanner {
135135 const blocks = await this . codeParser . parseFile ( filePath , { content, fileHash : currentFileHash } )
136136 const fileBlockCount = blocks . length
137137 onFileParsed ?.( fileBlockCount )
138- codeBlocks . push ( ...blocks )
139138 processedCount ++
140139
141140 // Process embeddings if configured
@@ -146,20 +145,11 @@ export class DirectoryScanner implements IDirectoryScanner {
146145 const trimmedContent = block . content . trim ( )
147146 if ( trimmedContent ) {
148147 const release = await mutex . acquire ( )
149- totalBlockCount += fileBlockCount
150148 try {
151149 currentBatchBlocks . push ( block )
152150 currentBatchTexts . push ( trimmedContent )
153151 addedBlocksFromFile = true
154152
155- if ( addedBlocksFromFile ) {
156- currentBatchFileInfos . push ( {
157- filePath,
158- fileHash : currentFileHash ,
159- isNew : ! this . cacheManager . getHash ( filePath ) ,
160- } )
161- }
162-
163153 // Check if batch threshold is met
164154 if ( currentBatchBlocks . length >= BATCH_SEGMENT_THRESHOLD ) {
165155 // Copy current batch data and clear accumulators
@@ -181,13 +171,33 @@ export class DirectoryScanner implements IDirectoryScanner {
181171 onBlocksIndexed ,
182172 ) ,
183173 )
184- activeBatchPromises . push ( batchPromise )
174+ activeBatchPromises . add ( batchPromise )
175+
176+ // Clean up completed promises to prevent memory accumulation
177+ batchPromise . finally ( ( ) => {
178+ activeBatchPromises . delete ( batchPromise )
179+ } )
185180 }
186181 } finally {
187182 release ( )
188183 }
189184 }
190185 }
186+
187+ // Add file info once per file (outside the block loop)
188+ if ( addedBlocksFromFile ) {
189+ const release = await mutex . acquire ( )
190+ try {
191+ totalBlockCount += fileBlockCount
192+ currentBatchFileInfos . push ( {
193+ filePath,
194+ fileHash : currentFileHash ,
195+ isNew : isNewFile ,
196+ } )
197+ } finally {
198+ release ( )
199+ }
200+ }
191201 } else {
192202 // Only update hash if not being processed in a batch
193203 await this . cacheManager . updateHash ( filePath , currentFileHash )
@@ -232,7 +242,12 @@ export class DirectoryScanner implements IDirectoryScanner {
232242 const batchPromise = batchLimiter ( ( ) =>
233243 this . processBatch ( batchBlocks , batchTexts , batchFileInfos , scanWorkspace , onError , onBlocksIndexed ) ,
234244 )
235- activeBatchPromises . push ( batchPromise )
245+ activeBatchPromises . add ( batchPromise )
246+
247+ // Clean up completed promises to prevent memory accumulation
248+ batchPromise . finally ( ( ) => {
249+ activeBatchPromises . delete ( batchPromise )
250+ } )
236251 } finally {
237252 release ( )
238253 }
@@ -280,7 +295,6 @@ export class DirectoryScanner implements IDirectoryScanner {
280295 }
281296
282297 return {
283- codeBlocks,
284298 stats : {
285299 processed : processedCount ,
286300 skipped : skippedCount ,
0 commit comments