Skip to content

Commit 11e0910

Browse files
committed
feat: add configurable embedding batch size for code indexing
- Add codebaseIndexEmbeddingBatchSize configuration field - Update DirectoryScanner and FileWatcher to use configurable batch size - Add UI control in CodeIndexPopover for adjusting batch size (1-100) - Update config manager to handle the new setting with default value of 60 - Add translation strings for the new setting - Update tests to include the new embeddingBatchSize field Fixes #7118
1 parent 44086e4 commit 11e0910

File tree

12 files changed

+69
-7
lines changed

12 files changed

+69
-7
lines changed

src/core/webview/ClineProvider.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1857,6 +1857,7 @@ export class ClineProvider
18571857
codebaseIndexOpenAiCompatibleBaseUrl: codebaseIndexConfig?.codebaseIndexOpenAiCompatibleBaseUrl,
18581858
codebaseIndexSearchMaxResults: codebaseIndexConfig?.codebaseIndexSearchMaxResults,
18591859
codebaseIndexSearchMinScore: codebaseIndexConfig?.codebaseIndexSearchMinScore,
1860+
codebaseIndexEmbeddingBatchSize: codebaseIndexConfig?.codebaseIndexEmbeddingBatchSize,
18601861
},
18611862
mdmCompliant: this.checkMdmCompliance(),
18621863
profileThresholds: profileThresholds ?? {},
@@ -2048,6 +2049,7 @@ export class ClineProvider
20482049
stateValues.codebaseIndexConfig?.codebaseIndexOpenAiCompatibleBaseUrl,
20492050
codebaseIndexSearchMaxResults: stateValues.codebaseIndexConfig?.codebaseIndexSearchMaxResults,
20502051
codebaseIndexSearchMinScore: stateValues.codebaseIndexConfig?.codebaseIndexSearchMinScore,
2052+
codebaseIndexEmbeddingBatchSize: stateValues.codebaseIndexConfig?.codebaseIndexEmbeddingBatchSize,
20512053
},
20522054
profileThresholds: stateValues.profileThresholds ?? {},
20532055
// Add diagnostic message settings

src/core/webview/webviewMessageHandler.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2045,6 +2045,7 @@ export const webviewMessageHandler = async (
20452045
codebaseIndexOpenAiCompatibleBaseUrl: settings.codebaseIndexOpenAiCompatibleBaseUrl,
20462046
codebaseIndexSearchMaxResults: settings.codebaseIndexSearchMaxResults,
20472047
codebaseIndexSearchMinScore: settings.codebaseIndexSearchMinScore,
2048+
codebaseIndexEmbeddingBatchSize: settings.codebaseIndexEmbeddingBatchSize,
20482049
}
20492050

20502051
// Save global state first

src/services/code-index/__tests__/config-manager.spec.ts

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,16 @@ describe("CodeIndexConfigManager", () => {
104104
isConfigured: false,
105105
embedderProvider: "openai",
106106
modelId: undefined,
107+
modelDimension: undefined,
107108
openAiOptions: { openAiNativeApiKey: "" },
108109
ollamaOptions: { ollamaBaseUrl: "" },
110+
openAiCompatibleOptions: undefined,
111+
geminiOptions: undefined,
112+
mistralOptions: undefined,
109113
qdrantUrl: "http://localhost:6333",
110114
qdrantApiKey: "",
111115
searchMinScore: 0.4,
116+
embeddingBatchSize: 60,
112117
})
113118
expect(result.requiresRestart).toBe(false)
114119
})
@@ -135,11 +140,16 @@ describe("CodeIndexConfigManager", () => {
135140
isConfigured: true,
136141
embedderProvider: "openai",
137142
modelId: "text-embedding-3-large",
143+
modelDimension: undefined,
138144
openAiOptions: { openAiNativeApiKey: "test-openai-key" },
139145
ollamaOptions: { ollamaBaseUrl: "" },
146+
openAiCompatibleOptions: undefined,
147+
geminiOptions: undefined,
148+
mistralOptions: undefined,
140149
qdrantUrl: "http://qdrant.local",
141150
qdrantApiKey: "test-qdrant-key",
142151
searchMinScore: 0.4,
152+
embeddingBatchSize: 60,
143153
})
144154
})
145155

@@ -167,6 +177,10 @@ describe("CodeIndexConfigManager", () => {
167177
expect(result.currentConfig).toEqual({
168178
isConfigured: true,
169179
embedderProvider: "openai-compatible",
180+
embeddingBatchSize: 60,
181+
geminiOptions: undefined,
182+
mistralOptions: undefined,
183+
modelDimension: undefined,
170184
modelId: "text-embedding-3-large",
171185
openAiOptions: { openAiNativeApiKey: "" },
172186
ollamaOptions: { ollamaBaseUrl: "" },
@@ -204,6 +218,9 @@ describe("CodeIndexConfigManager", () => {
204218
expect(result.currentConfig).toEqual({
205219
isConfigured: true,
206220
embedderProvider: "openai-compatible",
221+
embeddingBatchSize: 60,
222+
geminiOptions: undefined,
223+
mistralOptions: undefined,
207224
modelId: "custom-model",
208225
modelDimension: 1024,
209226
openAiOptions: { openAiNativeApiKey: "" },
@@ -242,6 +259,10 @@ describe("CodeIndexConfigManager", () => {
242259
expect(result.currentConfig).toEqual({
243260
isConfigured: true,
244261
embedderProvider: "openai-compatible",
262+
embeddingBatchSize: 60,
263+
geminiOptions: undefined,
264+
mistralOptions: undefined,
265+
modelDimension: undefined,
245266
modelId: "custom-model",
246267
openAiOptions: { openAiNativeApiKey: "" },
247268
ollamaOptions: { ollamaBaseUrl: "" },
@@ -280,6 +301,7 @@ describe("CodeIndexConfigManager", () => {
280301
expect(result.currentConfig).toEqual({
281302
isConfigured: true,
282303
embedderProvider: "openai-compatible",
304+
embeddingBatchSize: 60,
283305
modelId: "custom-model",
284306
modelDimension: undefined, // Invalid dimension is converted to undefined
285307
openAiOptions: { openAiNativeApiKey: "" },
@@ -289,6 +311,7 @@ describe("CodeIndexConfigManager", () => {
289311
apiKey: "test-openai-compatible-key",
290312
},
291313
geminiOptions: undefined,
314+
mistralOptions: undefined,
292315
qdrantUrl: "http://qdrant.local",
293316
qdrantApiKey: "test-qdrant-key",
294317
searchMinScore: 0.4,
@@ -1291,10 +1314,13 @@ describe("CodeIndexConfigManager", () => {
12911314
expect(config).toEqual({
12921315
isConfigured: true,
12931316
embedderProvider: "openai",
1317+
embeddingBatchSize: 60,
1318+
geminiOptions: undefined,
1319+
mistralOptions: undefined,
1320+
modelDimension: undefined,
12941321
modelId: "text-embedding-3-large",
12951322
openAiOptions: { openAiNativeApiKey: "test-openai-key" },
12961323
ollamaOptions: { ollamaBaseUrl: undefined },
1297-
geminiOptions: undefined,
12981324
openAiCompatibleOptions: undefined,
12991325
qdrantUrl: "http://qdrant.local",
13001326
qdrantApiKey: "test-qdrant-key",

src/services/code-index/config-manager.ts

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { ApiHandlerOptions } from "../../shared/api"
22
import { ContextProxy } from "../../core/config/ContextProxy"
33
import { EmbedderProvider } from "./interfaces/manager"
44
import { CodeIndexConfig, PreviousConfigSnapshot } from "./interfaces/config"
5-
import { DEFAULT_SEARCH_MIN_SCORE, DEFAULT_MAX_SEARCH_RESULTS } from "./constants"
5+
import { DEFAULT_SEARCH_MIN_SCORE, DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_BATCH_SEGMENT_THRESHOLD } from "./constants"
66
import { getDefaultModelId, getModelDimension, getModelScoreThreshold } from "../../shared/embeddingModels"
77

88
/**
@@ -23,6 +23,7 @@ export class CodeIndexConfigManager {
2323
private qdrantApiKey?: string
2424
private searchMinScore?: number
2525
private searchMaxResults?: number
26+
private embeddingBatchSize?: number
2627

2728
constructor(private readonly contextProxy: ContextProxy) {
2829
// Initialize with current configuration to avoid false restart triggers
@@ -50,6 +51,7 @@ export class CodeIndexConfigManager {
5051
codebaseIndexEmbedderModelId: "",
5152
codebaseIndexSearchMinScore: undefined,
5253
codebaseIndexSearchMaxResults: undefined,
54+
codebaseIndexEmbeddingBatchSize: undefined,
5355
}
5456

5557
const {
@@ -60,6 +62,7 @@ export class CodeIndexConfigManager {
6062
codebaseIndexEmbedderModelId,
6163
codebaseIndexSearchMinScore,
6264
codebaseIndexSearchMaxResults,
65+
codebaseIndexEmbeddingBatchSize,
6366
} = codebaseIndexConfig
6467

6568
const openAiKey = this.contextProxy?.getSecret("codeIndexOpenAiKey") ?? ""
@@ -76,6 +79,7 @@ export class CodeIndexConfigManager {
7679
this.qdrantApiKey = qdrantApiKey ?? ""
7780
this.searchMinScore = codebaseIndexSearchMinScore
7881
this.searchMaxResults = codebaseIndexSearchMaxResults
82+
this.embeddingBatchSize = codebaseIndexEmbeddingBatchSize
7983

8084
// Validate and set model dimension
8185
const rawDimension = codebaseIndexConfig.codebaseIndexEmbedderModelDimension
@@ -144,6 +148,7 @@ export class CodeIndexConfigManager {
144148
qdrantUrl?: string
145149
qdrantApiKey?: string
146150
searchMinScore?: number
151+
embeddingBatchSize?: number
147152
}
148153
requiresRestart: boolean
149154
}> {
@@ -187,6 +192,7 @@ export class CodeIndexConfigManager {
187192
qdrantUrl: this.qdrantUrl,
188193
qdrantApiKey: this.qdrantApiKey,
189194
searchMinScore: this.currentSearchMinScore,
195+
embeddingBatchSize: this.currentEmbeddingBatchSize,
190196
},
191197
requiresRestart,
192198
}
@@ -379,6 +385,7 @@ export class CodeIndexConfigManager {
379385
qdrantApiKey: this.qdrantApiKey,
380386
searchMinScore: this.currentSearchMinScore,
381387
searchMaxResults: this.currentSearchMaxResults,
388+
embeddingBatchSize: this.currentEmbeddingBatchSize,
382389
}
383390
}
384391

@@ -460,4 +467,12 @@ export class CodeIndexConfigManager {
460467
public get currentSearchMaxResults(): number {
461468
return this.searchMaxResults ?? DEFAULT_MAX_SEARCH_RESULTS
462469
}
470+
471+
/**
472+
* Gets the configured embedding batch size.
473+
* Returns user setting if configured, otherwise returns default.
474+
*/
475+
public get currentEmbeddingBatchSize(): number {
476+
return this.embeddingBatchSize ?? DEFAULT_BATCH_SEGMENT_THRESHOLD
477+
}
463478
}

src/services/code-index/constants/index.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ export const MAX_FILE_SIZE_BYTES = 1 * 1024 * 1024 // 1MB
1616

1717
/**Directory Scanner */
1818
export const MAX_LIST_FILES_LIMIT_CODE_INDEX = 50_000
19-
export const BATCH_SEGMENT_THRESHOLD = 60 // Number of code segments to batch for embeddings/upserts
19+
export const DEFAULT_BATCH_SEGMENT_THRESHOLD = 60 // Default number of code segments to batch for embeddings/upserts
20+
export const BATCH_SEGMENT_THRESHOLD = DEFAULT_BATCH_SEGMENT_THRESHOLD // Kept for backward compatibility, will be replaced by config value
2021
export const MAX_BATCH_RETRIES = 3
2122
export const INITIAL_RETRY_DELAY_MS = 500
2223
export const PARSING_CONCURRENCY = 10

src/services/code-index/interfaces/config.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ export interface CodeIndexConfig {
1818
qdrantApiKey?: string
1919
searchMinScore?: number
2020
searchMaxResults?: number
21+
embeddingBatchSize?: number
2122
}
2223

2324
/**

src/services/code-index/processors/file-watcher.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ export class FileWatcher implements IFileWatcher {
7878
private vectorStore?: IVectorStore,
7979
ignoreInstance?: Ignore,
8080
ignoreController?: RooIgnoreController,
81+
private readonly batchSegmentThreshold: number = BATCH_SEGMENT_THRESHOLD,
8182
) {
8283
this.ignoreController = ignoreController || new RooIgnoreController(workspacePath)
8384
if (ignoreInstance) {
@@ -341,8 +342,8 @@ export class FileWatcher implements IFileWatcher {
341342
): Promise<Error | undefined> {
342343
if (pointsForBatchUpsert.length > 0 && this.vectorStore && !overallBatchError) {
343344
try {
344-
for (let i = 0; i < pointsForBatchUpsert.length; i += BATCH_SEGMENT_THRESHOLD) {
345-
const batch = pointsForBatchUpsert.slice(i, i + BATCH_SEGMENT_THRESHOLD)
345+
for (let i = 0; i < pointsForBatchUpsert.length; i += this.batchSegmentThreshold) {
346+
const batch = pointsForBatchUpsert.slice(i, i + this.batchSegmentThreshold)
346347
let retryCount = 0
347348
let upsertError: Error | undefined
348349

src/services/code-index/processors/scanner.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ export class DirectoryScanner implements IDirectoryScanner {
3737
private readonly codeParser: ICodeParser,
3838
private readonly cacheManager: CacheManager,
3939
private readonly ignoreInstance: Ignore,
40+
private readonly batchSegmentThreshold: number = BATCH_SEGMENT_THRESHOLD,
4041
) {}
4142

4243
/**
@@ -153,7 +154,7 @@ export class DirectoryScanner implements IDirectoryScanner {
153154
addedBlocksFromFile = true
154155

155156
// Check if batch threshold is met
156-
if (currentBatchBlocks.length >= BATCH_SEGMENT_THRESHOLD) {
157+
if (currentBatchBlocks.length >= this.batchSegmentThreshold) {
157158
// Wait if we've reached the maximum pending batches
158159
while (pendingBatchCount >= MAX_PENDING_BATCHES) {
159160
// Wait for at least one batch to complete

src/services/code-index/service-factory.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,8 @@ export class CodeIndexServiceFactory {
150150
parser: ICodeParser,
151151
ignoreInstance: Ignore,
152152
): DirectoryScanner {
153-
return new DirectoryScanner(embedder, vectorStore, parser, this.cacheManager, ignoreInstance)
153+
const batchSize = this.configManager.currentEmbeddingBatchSize
154+
return new DirectoryScanner(embedder, vectorStore, parser, this.cacheManager, ignoreInstance, batchSize)
154155
}
155156

156157
/**
@@ -164,6 +165,7 @@ export class CodeIndexServiceFactory {
164165
ignoreInstance: Ignore,
165166
rooIgnoreController?: RooIgnoreController,
166167
): IFileWatcher {
168+
const batchSize = this.configManager.currentEmbeddingBatchSize
167169
return new FileWatcher(
168170
this.workspacePath,
169171
context,
@@ -172,6 +174,7 @@ export class CodeIndexServiceFactory {
172174
vectorStore,
173175
ignoreInstance,
174176
rooIgnoreController,
177+
batchSize,
175178
)
176179
}
177180

src/shared/WebviewMessage.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ export interface WebviewMessage {
265265
codebaseIndexOpenAiCompatibleBaseUrl?: string
266266
codebaseIndexSearchMaxResults?: number
267267
codebaseIndexSearchMinScore?: number
268+
codebaseIndexEmbeddingBatchSize?: number
268269

269270
// Secret settings
270271
codeIndexOpenAiKey?: string

0 commit comments

Comments
 (0)