Skip to content

Commit cc369da

Browse files
authored
feat: add batch limiting to code indexer (RooCodeInc#5891)
feat: add batch limiting to code indexer to control memory usage - Add MAX_PENDING_BATCHES constant (20) to limit concurrent batches - Implement backpressure mechanism to pause file parsing when limit reached - Prevent memory overflow during large codebase indexing
1 parent 020c233 commit cc369da

File tree

2 files changed

+17
-0
lines changed

2 files changed

+17
-0
lines changed

src/services/code-index/constants/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ export const BATCH_SEGMENT_THRESHOLD = 60 // Number of code segments to batch fo
2020
export const MAX_BATCH_RETRIES = 3
2121
export const INITIAL_RETRY_DELAY_MS = 500
2222
export const PARSING_CONCURRENCY = 10
23+
export const MAX_PENDING_BATCHES = 20 // Maximum number of batches to accumulate before waiting
2324

2425
/**OpenAI Embedder */
2526
export const MAX_BATCH_TOKENS = 100000

src/services/code-index/processors/scanner.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import {
2323
INITIAL_RETRY_DELAY_MS,
2424
PARSING_CONCURRENCY,
2525
BATCH_PROCESSING_CONCURRENCY,
26+
MAX_PENDING_BATCHES,
2627
} from "../constants"
2728
import { isPathInIgnoredDirectory } from "../../glob/ignore-utils"
2829
import { TelemetryService } from "@roo-code/telemetry"
@@ -98,6 +99,7 @@ export class DirectoryScanner implements IDirectoryScanner {
9899
let currentBatchTexts: string[] = []
99100
let currentBatchFileInfos: { filePath: string; fileHash: string; isNew: boolean }[] = []
100101
const activeBatchPromises = new Set<Promise<void>>()
102+
let pendingBatchCount = 0
101103

102104
// Initialize block counter
103105
let totalBlockCount = 0
@@ -152,6 +154,12 @@ export class DirectoryScanner implements IDirectoryScanner {
152154

153155
// Check if batch threshold is met
154156
if (currentBatchBlocks.length >= BATCH_SEGMENT_THRESHOLD) {
157+
// Wait if we've reached the maximum pending batches
158+
while (pendingBatchCount >= MAX_PENDING_BATCHES) {
159+
// Wait for at least one batch to complete
160+
await Promise.race(activeBatchPromises)
161+
}
162+
155163
// Copy current batch data and clear accumulators
156164
const batchBlocks = [...currentBatchBlocks]
157165
const batchTexts = [...currentBatchTexts]
@@ -160,6 +168,9 @@ export class DirectoryScanner implements IDirectoryScanner {
160168
currentBatchTexts = []
161169
currentBatchFileInfos = []
162170

171+
// Increment pending batch count
172+
pendingBatchCount++
173+
163174
// Queue batch processing
164175
const batchPromise = batchLimiter(() =>
165176
this.processBatch(
@@ -176,6 +187,7 @@ export class DirectoryScanner implements IDirectoryScanner {
176187
// Clean up completed promises to prevent memory accumulation
177188
batchPromise.finally(() => {
178189
activeBatchPromises.delete(batchPromise)
190+
pendingBatchCount--
179191
})
180192
}
181193
} finally {
@@ -238,6 +250,9 @@ export class DirectoryScanner implements IDirectoryScanner {
238250
currentBatchTexts = []
239251
currentBatchFileInfos = []
240252

253+
// Increment pending batch count for final batch
254+
pendingBatchCount++
255+
241256
// Queue final batch processing
242257
const batchPromise = batchLimiter(() =>
243258
this.processBatch(batchBlocks, batchTexts, batchFileInfos, scanWorkspace, onError, onBlocksIndexed),
@@ -247,6 +262,7 @@ export class DirectoryScanner implements IDirectoryScanner {
247262
// Clean up completed promises to prevent memory accumulation
248263
batchPromise.finally(() => {
249264
activeBatchPromises.delete(batchPromise)
265+
pendingBatchCount--
250266
})
251267
} finally {
252268
release()

0 commit comments

Comments
 (0)