@@ -17,7 +17,7 @@ import { t } from "../../../i18n"
17
17
import {
18
18
QDRANT_CODE_BLOCK_NAMESPACE ,
19
19
MAX_FILE_SIZE_BYTES ,
20
- MAX_LIST_FILES_LIMIT ,
20
+ MAX_LIST_FILES_LIMIT_CODE_INDEX ,
21
21
BATCH_SEGMENT_THRESHOLD ,
22
22
MAX_BATCH_RETRIES ,
23
23
INITIAL_RETRY_DELAY_MS ,
@@ -51,13 +51,13 @@ export class DirectoryScanner implements IDirectoryScanner {
51
51
onError ?: ( error : Error ) => void ,
52
52
onBlocksIndexed ?: ( indexedCount : number ) => void ,
53
53
onFileParsed ?: ( fileBlockCount : number ) => void ,
54
- ) : Promise < { codeBlocks : CodeBlock [ ] ; stats : { processed : number ; skipped : number } ; totalBlockCount : number } > {
54
+ ) : Promise < { stats : { processed : number ; skipped : number } ; totalBlockCount : number } > {
55
55
const directoryPath = directory
56
56
// Capture workspace context at scan start
57
57
const scanWorkspace = getWorkspacePathForContext ( directoryPath )
58
58
59
59
// Get all files recursively (handles .gitignore automatically)
60
- const [ allPaths , _ ] = await listFiles ( directoryPath , true , MAX_LIST_FILES_LIMIT )
60
+ const [ allPaths , _ ] = await listFiles ( directoryPath , true , MAX_LIST_FILES_LIMIT_CODE_INDEX )
61
61
62
62
// Filter out directories (marked with trailing '/')
63
63
const filePaths = allPaths . filter ( ( p ) => ! p . endsWith ( "/" ) )
@@ -85,7 +85,6 @@ export class DirectoryScanner implements IDirectoryScanner {
85
85
86
86
// Initialize tracking variables
87
87
const processedFiles = new Set < string > ( )
88
- const codeBlocks : CodeBlock [ ] = [ ]
89
88
let processedCount = 0
90
89
let skippedCount = 0
91
90
@@ -98,7 +97,7 @@ export class DirectoryScanner implements IDirectoryScanner {
98
97
let currentBatchBlocks : CodeBlock [ ] = [ ]
99
98
let currentBatchTexts : string [ ] = [ ]
100
99
let currentBatchFileInfos : { filePath : string ; fileHash : string ; isNew : boolean } [ ] = [ ]
101
- const activeBatchPromises : Promise < void > [ ] = [ ]
100
+ const activeBatchPromises = new Set < Promise < void > > ( )
102
101
103
102
// Initialize block counter
104
103
let totalBlockCount = 0
@@ -125,6 +124,7 @@ export class DirectoryScanner implements IDirectoryScanner {
125
124
126
125
// Check against cache
127
126
const cachedFileHash = this . cacheManager . getHash ( filePath )
127
+ const isNewFile = ! cachedFileHash
128
128
if ( cachedFileHash === currentFileHash ) {
129
129
// File is unchanged
130
130
skippedCount ++
@@ -135,7 +135,6 @@ export class DirectoryScanner implements IDirectoryScanner {
135
135
const blocks = await this . codeParser . parseFile ( filePath , { content, fileHash : currentFileHash } )
136
136
const fileBlockCount = blocks . length
137
137
onFileParsed ?.( fileBlockCount )
138
- codeBlocks . push ( ...blocks )
139
138
processedCount ++
140
139
141
140
// Process embeddings if configured
@@ -146,20 +145,11 @@ export class DirectoryScanner implements IDirectoryScanner {
146
145
const trimmedContent = block . content . trim ( )
147
146
if ( trimmedContent ) {
148
147
const release = await mutex . acquire ( )
149
- totalBlockCount += fileBlockCount
150
148
try {
151
149
currentBatchBlocks . push ( block )
152
150
currentBatchTexts . push ( trimmedContent )
153
151
addedBlocksFromFile = true
154
152
155
- if ( addedBlocksFromFile ) {
156
- currentBatchFileInfos . push ( {
157
- filePath,
158
- fileHash : currentFileHash ,
159
- isNew : ! this . cacheManager . getHash ( filePath ) ,
160
- } )
161
- }
162
-
163
153
// Check if batch threshold is met
164
154
if ( currentBatchBlocks . length >= BATCH_SEGMENT_THRESHOLD ) {
165
155
// Copy current batch data and clear accumulators
@@ -181,13 +171,33 @@ export class DirectoryScanner implements IDirectoryScanner {
181
171
onBlocksIndexed ,
182
172
) ,
183
173
)
184
- activeBatchPromises . push ( batchPromise )
174
+ activeBatchPromises . add ( batchPromise )
175
+
176
+ // Clean up completed promises to prevent memory accumulation
177
+ batchPromise . finally ( ( ) => {
178
+ activeBatchPromises . delete ( batchPromise )
179
+ } )
185
180
}
186
181
} finally {
187
182
release ( )
188
183
}
189
184
}
190
185
}
186
+
187
+ // Add file info once per file (outside the block loop)
188
+ if ( addedBlocksFromFile ) {
189
+ const release = await mutex . acquire ( )
190
+ try {
191
+ totalBlockCount += fileBlockCount
192
+ currentBatchFileInfos . push ( {
193
+ filePath,
194
+ fileHash : currentFileHash ,
195
+ isNew : isNewFile ,
196
+ } )
197
+ } finally {
198
+ release ( )
199
+ }
200
+ }
191
201
} else {
192
202
// Only update hash if not being processed in a batch
193
203
await this . cacheManager . updateHash ( filePath , currentFileHash )
@@ -232,7 +242,12 @@ export class DirectoryScanner implements IDirectoryScanner {
232
242
const batchPromise = batchLimiter ( ( ) =>
233
243
this . processBatch ( batchBlocks , batchTexts , batchFileInfos , scanWorkspace , onError , onBlocksIndexed ) ,
234
244
)
235
- activeBatchPromises . push ( batchPromise )
245
+ activeBatchPromises . add ( batchPromise )
246
+
247
+ // Clean up completed promises to prevent memory accumulation
248
+ batchPromise . finally ( ( ) => {
249
+ activeBatchPromises . delete ( batchPromise )
250
+ } )
236
251
} finally {
237
252
release ( )
238
253
}
@@ -280,7 +295,6 @@ export class DirectoryScanner implements IDirectoryScanner {
280
295
}
281
296
282
297
return {
283
- codeBlocks,
284
298
stats : {
285
299
processed : processedCount ,
286
300
skipped : skippedCount ,
0 commit comments