Skip to content

Commit da98690

Browse files
committed
feat: add configurable embedding batch size setting
- Added new VSCode setting roo-cline.codeIndex.embeddingBatchSize - Default value is 400, configurable range 1-2048 - Updated DirectoryScanner and FileWatcher to use configurable batch size - Updated service factory to pass batch size to processors - Maintains backward compatibility with default value - Fixes #7356 - allows users to configure batch size based on API provider limits
1 parent 0f6079a commit da98690

File tree

4 files changed

+71
-5
lines changed

4 files changed

+71
-5
lines changed

src/package.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,13 @@
400400
"type": "boolean",
401401
"default": false,
402402
"description": "%settings.newTaskRequireTodos.description%"
403+
},
404+
"roo-cline.codeIndex.embeddingBatchSize": {
405+
"type": "number",
406+
"default": 400,
407+
"minimum": 1,
408+
"maximum": 2048,
409+
"description": "The batch size for embedding operations during code indexing. Adjust this based on your API provider's limits. Default is 400."
403410
}
404411
}
405412
}

src/services/code-index/processors/file-watcher.ts

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import { isPathInIgnoredDirectory } from "../../glob/ignore-utils"
2626
import { TelemetryService } from "@roo-code/telemetry"
2727
import { TelemetryEventName } from "@roo-code/types"
2828
import { sanitizeErrorMessage } from "../shared/validation-helpers"
29+
import { Package } from "../../../shared/package"
2930

3031
/**
3132
* Implementation of the file watcher interface
@@ -38,6 +39,7 @@ export class FileWatcher implements IFileWatcher {
3839
private batchProcessDebounceTimer?: NodeJS.Timeout
3940
private readonly BATCH_DEBOUNCE_DELAY_MS = 500
4041
private readonly FILE_PROCESSING_CONCURRENCY_LIMIT = 10
42+
private readonly batchSegmentThreshold: number
4143

4244
private readonly _onDidStartBatchProcessing = new vscode.EventEmitter<string[]>()
4345
private readonly _onBatchProgressUpdate = new vscode.EventEmitter<{
@@ -78,11 +80,26 @@ export class FileWatcher implements IFileWatcher {
7880
private vectorStore?: IVectorStore,
7981
ignoreInstance?: Ignore,
8082
ignoreController?: RooIgnoreController,
83+
batchSegmentThreshold?: number,
8184
) {
8285
this.ignoreController = ignoreController || new RooIgnoreController(workspacePath)
8386
if (ignoreInstance) {
8487
this.ignoreInstance = ignoreInstance
8588
}
89+
// Get the configurable batch size from VSCode settings, fallback to default
90+
// If not provided in constructor, try to get from VSCode settings
91+
if (batchSegmentThreshold !== undefined) {
92+
this.batchSegmentThreshold = batchSegmentThreshold
93+
} else {
94+
try {
95+
this.batchSegmentThreshold = vscode.workspace
96+
.getConfiguration(Package.name)
97+
.get<number>("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD)
98+
} catch {
99+
// In test environment, vscode.workspace might not be available
100+
this.batchSegmentThreshold = BATCH_SEGMENT_THRESHOLD
101+
}
102+
}
86103
}
87104

88105
/**
@@ -341,8 +358,8 @@ export class FileWatcher implements IFileWatcher {
341358
): Promise<Error | undefined> {
342359
if (pointsForBatchUpsert.length > 0 && this.vectorStore && !overallBatchError) {
343360
try {
344-
for (let i = 0; i < pointsForBatchUpsert.length; i += BATCH_SEGMENT_THRESHOLD) {
345-
const batch = pointsForBatchUpsert.slice(i, i + BATCH_SEGMENT_THRESHOLD)
361+
for (let i = 0; i < pointsForBatchUpsert.length; i += this.batchSegmentThreshold) {
362+
const batch = pointsForBatchUpsert.slice(i, i + this.batchSegmentThreshold)
346363
let retryCount = 0
347364
let upsertError: Error | undefined
348365

src/services/code-index/processors/scanner.ts

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,34 @@ import { isPathInIgnoredDirectory } from "../../glob/ignore-utils"
2929
import { TelemetryService } from "@roo-code/telemetry"
3030
import { TelemetryEventName } from "@roo-code/types"
3131
import { sanitizeErrorMessage } from "../shared/validation-helpers"
32+
import { Package } from "../../../shared/package"
3233

3334
export class DirectoryScanner implements IDirectoryScanner {
35+
private readonly batchSegmentThreshold: number
36+
3437
constructor(
3538
private readonly embedder: IEmbedder,
3639
private readonly qdrantClient: IVectorStore,
3740
private readonly codeParser: ICodeParser,
3841
private readonly cacheManager: CacheManager,
3942
private readonly ignoreInstance: Ignore,
40-
) {}
43+
batchSegmentThreshold?: number,
44+
) {
45+
// Get the configurable batch size from VSCode settings, fallback to default
46+
// If not provided in constructor, try to get from VSCode settings
47+
if (batchSegmentThreshold !== undefined) {
48+
this.batchSegmentThreshold = batchSegmentThreshold
49+
} else {
50+
try {
51+
this.batchSegmentThreshold = vscode.workspace
52+
.getConfiguration(Package.name)
53+
.get<number>("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD)
54+
} catch {
55+
// In test environment, vscode.workspace might not be available
56+
this.batchSegmentThreshold = BATCH_SEGMENT_THRESHOLD
57+
}
58+
}
59+
}
4160

4261
/**
4362
* Recursively scans a directory for code blocks in supported files.
@@ -153,7 +172,7 @@ export class DirectoryScanner implements IDirectoryScanner {
153172
addedBlocksFromFile = true
154173

155174
// Check if batch threshold is met
156-
if (currentBatchBlocks.length >= BATCH_SEGMENT_THRESHOLD) {
175+
if (currentBatchBlocks.length >= this.batchSegmentThreshold) {
157176
// Wait if we've reached the maximum pending batches
158177
while (pendingBatchCount >= MAX_PENDING_BATCHES) {
159178
// Wait for at least one batch to complete

src/services/code-index/service-factory.ts

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ import { Ignore } from "ignore"
1616
import { t } from "../../i18n"
1717
import { TelemetryService } from "@roo-code/telemetry"
1818
import { TelemetryEventName } from "@roo-code/types"
19+
import { Package } from "../../shared/package"
20+
import { BATCH_SEGMENT_THRESHOLD } from "./constants"
1921

2022
/**
2123
* Factory class responsible for creating and configuring code indexing service dependencies.
@@ -156,7 +158,17 @@ export class CodeIndexServiceFactory {
156158
parser: ICodeParser,
157159
ignoreInstance: Ignore,
158160
): DirectoryScanner {
159-
return new DirectoryScanner(embedder, vectorStore, parser, this.cacheManager, ignoreInstance)
161+
// Get the configurable batch size from VSCode settings
162+
let batchSize: number | undefined
163+
try {
164+
batchSize = vscode.workspace
165+
.getConfiguration(Package.name)
166+
.get<number>("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD)
167+
} catch {
168+
// In test environment, vscode.workspace might not be available
169+
batchSize = undefined
170+
}
171+
return new DirectoryScanner(embedder, vectorStore, parser, this.cacheManager, ignoreInstance, batchSize)
160172
}
161173

162174
/**
@@ -170,6 +182,16 @@ export class CodeIndexServiceFactory {
170182
ignoreInstance: Ignore,
171183
rooIgnoreController?: RooIgnoreController,
172184
): IFileWatcher {
185+
// Get the configurable batch size from VSCode settings
186+
let batchSize: number | undefined
187+
try {
188+
batchSize = vscode.workspace
189+
.getConfiguration(Package.name)
190+
.get<number>("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD)
191+
} catch {
192+
// In test environment, vscode.workspace might not be available
193+
batchSize = undefined
194+
}
173195
return new FileWatcher(
174196
this.workspacePath,
175197
context,
@@ -178,6 +200,7 @@ export class CodeIndexServiceFactory {
178200
vectorStore,
179201
ignoreInstance,
180202
rooIgnoreController,
203+
batchSize,
181204
)
182205
}
183206

0 commit comments

Comments
 (0)