From b6561cd23b173d31fb2ddb90384129226a59afd0 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Fri, 29 Aug 2025 03:46:16 +0000 Subject: [PATCH 1/2] feat: expose codebase indexing concurrency settings in UI - Add configurable concurrency settings to codebase-index types - Update config-manager to read and provide concurrency settings - Modify DirectoryScanner to accept and use configurable concurrency values - Add VS Code configuration properties for the new settings - Add localization strings for user-friendly descriptions - Add tests for the new configuration functionality This allows users to optimize indexing performance based on their hardware capabilities. --- packages/types/src/codebase-index.ts | 26 ++++ src/package.json | 21 +++ src/package.nls.json | 5 +- .../config-manager-concurrency.spec.ts | 136 ++++++++++++++++++ src/services/code-index/config-manager.ts | 48 ++++++- src/services/code-index/interfaces/config.ts | 3 + src/services/code-index/processors/scanner.ts | 24 +++- src/services/code-index/service-factory.ts | 7 +- 8 files changed, 263 insertions(+), 7 deletions(-) create mode 100644 src/services/code-index/__tests__/config-manager-concurrency.spec.ts diff --git a/packages/types/src/codebase-index.ts b/packages/types/src/codebase-index.ts index be7778f538..38030717a7 100644 --- a/packages/types/src/codebase-index.ts +++ b/packages/types/src/codebase-index.ts @@ -12,6 +12,16 @@ export const CODEBASE_INDEX_DEFAULTS = { MAX_SEARCH_SCORE: 1, DEFAULT_SEARCH_MIN_SCORE: 0.4, SEARCH_SCORE_STEP: 0.05, + // Concurrency defaults + DEFAULT_PARSING_CONCURRENCY: 10, + MIN_PARSING_CONCURRENCY: 1, + MAX_PARSING_CONCURRENCY: 50, + DEFAULT_MAX_PENDING_BATCHES: 20, + MIN_MAX_PENDING_BATCHES: 1, + MAX_MAX_PENDING_BATCHES: 100, + DEFAULT_BATCH_PROCESSING_CONCURRENCY: 10, + MIN_BATCH_PROCESSING_CONCURRENCY: 1, + MAX_BATCH_PROCESSING_CONCURRENCY: 50, } as const /** @@ -36,6 +46,22 @@ export const codebaseIndexConfigSchema = z.object({ // OpenAI Compatible specific fields codebaseIndexOpenAiCompatibleBaseUrl: z.string().optional(), codebaseIndexOpenAiCompatibleModelDimension: z.number().optional(), + // Concurrency settings + codebaseIndexParsingConcurrency: z + .number() + .min(CODEBASE_INDEX_DEFAULTS.MIN_PARSING_CONCURRENCY) + .max(CODEBASE_INDEX_DEFAULTS.MAX_PARSING_CONCURRENCY) + .optional(), + codebaseIndexMaxPendingBatches: z + .number() + .min(CODEBASE_INDEX_DEFAULTS.MIN_MAX_PENDING_BATCHES) + .max(CODEBASE_INDEX_DEFAULTS.MAX_MAX_PENDING_BATCHES) + .optional(), + codebaseIndexBatchProcessingConcurrency: z + .number() + .min(CODEBASE_INDEX_DEFAULTS.MIN_BATCH_PROCESSING_CONCURRENCY) + .max(CODEBASE_INDEX_DEFAULTS.MAX_BATCH_PROCESSING_CONCURRENCY) + .optional(), }) export type CodebaseIndexConfig = z.infer diff --git a/src/package.json b/src/package.json index fb236d515e..66f3f1fb50 100644 --- a/src/package.json +++ b/src/package.json @@ -400,6 +400,27 @@ "type": "boolean", "default": false, "description": "%settings.newTaskRequireTodos.description%" + }, + "roo-cline.codebaseIndexParsingConcurrency": { + "type": "number", + "default": 10, + "minimum": 1, + "maximum": 50, + "description": "%settings.codebaseIndexParsingConcurrency.description%" + }, + "roo-cline.codebaseIndexMaxPendingBatches": { + "type": "number", + "default": 20, + "minimum": 1, + "maximum": 100, + "description": "%settings.codebaseIndexMaxPendingBatches.description%" + }, + "roo-cline.codebaseIndexBatchProcessingConcurrency": { + "type": "number", + "default": 10, + "minimum": 1, + "maximum": 50, + "description": "%settings.codebaseIndexBatchProcessingConcurrency.description%" } } } diff --git a/src/package.nls.json b/src/package.nls.json index a18887218a..6d5b0a7699 100644 --- a/src/package.nls.json +++ b/src/package.nls.json @@ -40,5 +40,8 @@ "settings.autoImportSettingsPath.description": "Path to a RooCode configuration file to automatically import on extension startup. Supports absolute paths and paths relative to the home directory (e.g. '~/Documents/roo-code-settings.json'). Leave empty to disable auto-import.", "settings.useAgentRules.description": "Enable loading of AGENTS.md files for agent-specific rules (see https://agent-rules.org/)", "settings.apiRequestTimeout.description": "Maximum time in seconds to wait for API responses (0 = no timeout, 1-3600s, default: 600s). Higher values are recommended for local providers like LM Studio and Ollama that may need more processing time.", - "settings.newTaskRequireTodos.description": "Require todos parameter when creating new tasks with the new_task tool" + "settings.newTaskRequireTodos.description": "Require todos parameter when creating new tasks with the new_task tool", + "settings.codebaseIndexParsingConcurrency.description": "Number of files to parse in parallel during codebase indexing (1-50, default: 10). Increase for faster indexing on powerful machines, decrease for lower-end hardware.", + "settings.codebaseIndexMaxPendingBatches.description": "Maximum number of batches to accumulate before waiting during codebase indexing (1-100, default: 20). Higher values may improve throughput but increase memory usage.", + "settings.codebaseIndexBatchProcessingConcurrency.description": "Number of batches to process for embeddings/upserts in parallel (1-50, default: 10). Adjust based on your system's capabilities and API rate limits." } diff --git a/src/services/code-index/__tests__/config-manager-concurrency.spec.ts b/src/services/code-index/__tests__/config-manager-concurrency.spec.ts new file mode 100644 index 0000000000..88636f324d --- /dev/null +++ b/src/services/code-index/__tests__/config-manager-concurrency.spec.ts @@ -0,0 +1,136 @@ +import { describe, it, expect, beforeEach, vi } from "vitest" +import { CodeIndexConfigManager } from "../config-manager" +import { ContextProxy } from "../../../core/config/ContextProxy" +import { CODEBASE_INDEX_DEFAULTS } from "@roo-code/types" +import { PARSING_CONCURRENCY, MAX_PENDING_BATCHES, BATCH_PROCESSING_CONCURRENCY } from "../constants" + +describe("CodeIndexConfigManager - Concurrency Settings", () => { + let configManager: CodeIndexConfigManager + let mockContextProxy: ContextProxy + + beforeEach(() => { + mockContextProxy = { + getGlobalState: vi.fn(), + getSecret: vi.fn(), + refreshSecrets: vi.fn().mockResolvedValue(undefined), + } as any + + configManager = new CodeIndexConfigManager(mockContextProxy) + }) + + describe("Concurrency Configuration", () => { + it("should use default values when no concurrency settings are configured", () => { + // Setup mock to return empty config + vi.mocked(mockContextProxy.getGlobalState).mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexQdrantUrl: "http://localhost:6333", + codebaseIndexEmbedderProvider: "openai", + }) + vi.mocked(mockContextProxy.getSecret).mockImplementation((key) => { + if (key === "codeIndexOpenAiKey") return "test-key" + return "" + }) + + const config = configManager.getConfig() + + expect(config.parsingConcurrency).toBe(PARSING_CONCURRENCY) + expect(config.maxPendingBatches).toBe(MAX_PENDING_BATCHES) + expect(config.batchProcessingConcurrency).toBe(BATCH_PROCESSING_CONCURRENCY) + }) + + it("should use configured values when concurrency settings are provided", () => { + // Setup mock to return custom concurrency config + vi.mocked(mockContextProxy.getGlobalState).mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexQdrantUrl: "http://localhost:6333", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexParsingConcurrency: 25, + codebaseIndexMaxPendingBatches: 50, + codebaseIndexBatchProcessingConcurrency: 15, + }) + vi.mocked(mockContextProxy.getSecret).mockImplementation((key) => { + if (key === "codeIndexOpenAiKey") return "test-key" + return "" + }) + + const config = configManager.getConfig() + + expect(config.parsingConcurrency).toBe(25) + expect(config.maxPendingBatches).toBe(50) + expect(config.batchProcessingConcurrency).toBe(15) + }) + + it("should respect minimum and maximum bounds for concurrency settings", () => { + // Test that values are within the defined bounds + const minParsingConcurrency = CODEBASE_INDEX_DEFAULTS.MIN_PARSING_CONCURRENCY + const maxParsingConcurrency = CODEBASE_INDEX_DEFAULTS.MAX_PARSING_CONCURRENCY + const minMaxPendingBatches = CODEBASE_INDEX_DEFAULTS.MIN_MAX_PENDING_BATCHES + const maxMaxPendingBatches = CODEBASE_INDEX_DEFAULTS.MAX_MAX_PENDING_BATCHES + const minBatchProcessingConcurrency = CODEBASE_INDEX_DEFAULTS.MIN_BATCH_PROCESSING_CONCURRENCY + const maxBatchProcessingConcurrency = CODEBASE_INDEX_DEFAULTS.MAX_BATCH_PROCESSING_CONCURRENCY + + // Verify bounds are reasonable + expect(minParsingConcurrency).toBeGreaterThanOrEqual(1) + expect(maxParsingConcurrency).toBeLessThanOrEqual(50) + expect(minMaxPendingBatches).toBeGreaterThanOrEqual(1) + expect(maxMaxPendingBatches).toBeLessThanOrEqual(100) + expect(minBatchProcessingConcurrency).toBeGreaterThanOrEqual(1) + expect(maxBatchProcessingConcurrency).toBeLessThanOrEqual(50) + }) + + it("should use getter methods to retrieve concurrency values", () => { + // Setup mock to return custom concurrency config + vi.mocked(mockContextProxy.getGlobalState).mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexQdrantUrl: "http://localhost:6333", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexParsingConcurrency: 30, + codebaseIndexMaxPendingBatches: 40, + codebaseIndexBatchProcessingConcurrency: 20, + }) + vi.mocked(mockContextProxy.getSecret).mockImplementation((key) => { + if (key === "codeIndexOpenAiKey") return "test-key" + return "" + }) + + // Force re-initialization with new config + const newConfigManager = new CodeIndexConfigManager(mockContextProxy) + + expect(newConfigManager.currentParsingConcurrency).toBe(30) + expect(newConfigManager.currentMaxPendingBatches).toBe(40) + expect(newConfigManager.currentBatchProcessingConcurrency).toBe(20) + }) + + it("should not require restart when only concurrency settings change", async () => { + // Setup initial config + vi.mocked(mockContextProxy.getGlobalState).mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexQdrantUrl: "http://localhost:6333", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexParsingConcurrency: 10, + }) + vi.mocked(mockContextProxy.getSecret).mockImplementation((key) => { + if (key === "codeIndexOpenAiKey") return "test-key" + return "" + }) + + // Load initial configuration + await configManager.loadConfiguration() + + // Change only concurrency settings + vi.mocked(mockContextProxy.getGlobalState).mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexQdrantUrl: "http://localhost:6333", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexParsingConcurrency: 20, // Changed + }) + + // Load new configuration + const result = await configManager.loadConfiguration() + + // Concurrency changes alone should not require restart + // (This might need adjustment based on actual implementation requirements) + expect(result.requiresRestart).toBe(false) + }) + }) +}) diff --git a/src/services/code-index/config-manager.ts b/src/services/code-index/config-manager.ts index 2c0e8bb5c9..30d6f6aefe 100644 --- a/src/services/code-index/config-manager.ts +++ b/src/services/code-index/config-manager.ts @@ -2,8 +2,15 @@ import { ApiHandlerOptions } from "../../shared/api" import { ContextProxy } from "../../core/config/ContextProxy" import { EmbedderProvider } from "./interfaces/manager" import { CodeIndexConfig, PreviousConfigSnapshot } from "./interfaces/config" -import { DEFAULT_SEARCH_MIN_SCORE, DEFAULT_MAX_SEARCH_RESULTS } from "./constants" +import { + DEFAULT_SEARCH_MIN_SCORE, + DEFAULT_MAX_SEARCH_RESULTS, + PARSING_CONCURRENCY, + MAX_PENDING_BATCHES, + BATCH_PROCESSING_CONCURRENCY, +} from "./constants" import { getDefaultModelId, getModelDimension, getModelScoreThreshold } from "../../shared/embeddingModels" +import { CODEBASE_INDEX_DEFAULTS } from "@roo-code/types" /** * Manages configuration state and validation for the code indexing feature. @@ -24,6 +31,9 @@ export class CodeIndexConfigManager { private qdrantApiKey?: string private searchMinScore?: number private searchMaxResults?: number + private parsingConcurrency?: number + private maxPendingBatches?: number + private batchProcessingConcurrency?: number constructor(private readonly contextProxy: ContextProxy) { // Initialize with current configuration to avoid false restart triggers @@ -51,6 +61,9 @@ export class CodeIndexConfigManager { codebaseIndexEmbedderModelId: "", codebaseIndexSearchMinScore: undefined, codebaseIndexSearchMaxResults: undefined, + codebaseIndexParsingConcurrency: undefined, + codebaseIndexMaxPendingBatches: undefined, + codebaseIndexBatchProcessingConcurrency: undefined, } const { @@ -61,6 +74,9 @@ export class CodeIndexConfigManager { codebaseIndexEmbedderModelId, codebaseIndexSearchMinScore, codebaseIndexSearchMaxResults, + codebaseIndexParsingConcurrency, + codebaseIndexMaxPendingBatches, + codebaseIndexBatchProcessingConcurrency, } = codebaseIndexConfig const openAiKey = this.contextProxy?.getSecret("codeIndexOpenAiKey") ?? "" @@ -78,6 +94,9 @@ export class CodeIndexConfigManager { this.qdrantApiKey = qdrantApiKey ?? "" this.searchMinScore = codebaseIndexSearchMinScore this.searchMaxResults = codebaseIndexSearchMaxResults + this.parsingConcurrency = codebaseIndexParsingConcurrency + this.maxPendingBatches = codebaseIndexMaxPendingBatches + this.batchProcessingConcurrency = codebaseIndexBatchProcessingConcurrency // Validate and set model dimension const rawDimension = codebaseIndexConfig.codebaseIndexEmbedderModelDimension @@ -399,6 +418,9 @@ export class CodeIndexConfigManager { qdrantApiKey: this.qdrantApiKey, searchMinScore: this.currentSearchMinScore, searchMaxResults: this.currentSearchMaxResults, + parsingConcurrency: this.currentParsingConcurrency, + maxPendingBatches: this.currentMaxPendingBatches, + batchProcessingConcurrency: this.currentBatchProcessingConcurrency, } } @@ -480,4 +502,28 @@ export class CodeIndexConfigManager { public get currentSearchMaxResults(): number { return this.searchMaxResults ?? DEFAULT_MAX_SEARCH_RESULTS } + + /** + * Gets the configured parsing concurrency. + * Returns user setting if configured, otherwise returns default. + */ + public get currentParsingConcurrency(): number { + return this.parsingConcurrency ?? PARSING_CONCURRENCY + } + + /** + * Gets the configured maximum pending batches. + * Returns user setting if configured, otherwise returns default. + */ + public get currentMaxPendingBatches(): number { + return this.maxPendingBatches ?? MAX_PENDING_BATCHES + } + + /** + * Gets the configured batch processing concurrency. + * Returns user setting if configured, otherwise returns default. + */ + public get currentBatchProcessingConcurrency(): number { + return this.batchProcessingConcurrency ?? BATCH_PROCESSING_CONCURRENCY + } } diff --git a/src/services/code-index/interfaces/config.ts b/src/services/code-index/interfaces/config.ts index f168e26869..1cf680749d 100644 --- a/src/services/code-index/interfaces/config.ts +++ b/src/services/code-index/interfaces/config.ts @@ -19,6 +19,9 @@ export interface CodeIndexConfig { qdrantApiKey?: string searchMinScore?: number searchMaxResults?: number + parsingConcurrency?: number + maxPendingBatches?: number + batchProcessingConcurrency?: number } /** diff --git a/src/services/code-index/processors/scanner.ts b/src/services/code-index/processors/scanner.ts index 27362b8b74..19ff7ff78e 100644 --- a/src/services/code-index/processors/scanner.ts +++ b/src/services/code-index/processors/scanner.ts @@ -30,14 +30,30 @@ import { TelemetryService } from "@roo-code/telemetry" import { TelemetryEventName } from "@roo-code/types" import { sanitizeErrorMessage } from "../shared/validation-helpers" +export interface DirectoryScannerConfig { + parsingConcurrency?: number + maxPendingBatches?: number + batchProcessingConcurrency?: number +} + export class DirectoryScanner implements IDirectoryScanner { + private readonly parsingConcurrency: number + private readonly maxPendingBatches: number + private readonly batchProcessingConcurrency: number + constructor( private readonly embedder: IEmbedder, private readonly qdrantClient: IVectorStore, private readonly codeParser: ICodeParser, private readonly cacheManager: CacheManager, private readonly ignoreInstance: Ignore, - ) {} + config?: DirectoryScannerConfig, + ) { + // Use provided config values or fall back to constants + this.parsingConcurrency = config?.parsingConcurrency ?? PARSING_CONCURRENCY + this.maxPendingBatches = config?.maxPendingBatches ?? MAX_PENDING_BATCHES + this.batchProcessingConcurrency = config?.batchProcessingConcurrency ?? BATCH_PROCESSING_CONCURRENCY + } /** * Recursively scans a directory for code blocks in supported files. @@ -90,8 +106,8 @@ export class DirectoryScanner implements IDirectoryScanner { let skippedCount = 0 // Initialize parallel processing tools - const parseLimiter = pLimit(PARSING_CONCURRENCY) // Concurrency for file parsing - const batchLimiter = pLimit(BATCH_PROCESSING_CONCURRENCY) // Concurrency for batch processing + const parseLimiter = pLimit(this.parsingConcurrency) // Concurrency for file parsing + const batchLimiter = pLimit(this.batchProcessingConcurrency) // Concurrency for batch processing const mutex = new Mutex() // Shared batch accumulators (protected by mutex) @@ -155,7 +171,7 @@ export class DirectoryScanner implements IDirectoryScanner { // Check if batch threshold is met if (currentBatchBlocks.length >= BATCH_SEGMENT_THRESHOLD) { // Wait if we've reached the maximum pending batches - while (pendingBatchCount >= MAX_PENDING_BATCHES) { + while (pendingBatchCount >= this.maxPendingBatches) { // Wait for at least one batch to complete await Promise.race(activeBatchPromises) } diff --git a/src/services/code-index/service-factory.ts b/src/services/code-index/service-factory.ts index 409c591a6b..b7f048bddf 100644 --- a/src/services/code-index/service-factory.ts +++ b/src/services/code-index/service-factory.ts @@ -156,7 +156,12 @@ export class CodeIndexServiceFactory { parser: ICodeParser, ignoreInstance: Ignore, ): DirectoryScanner { - return new DirectoryScanner(embedder, vectorStore, parser, this.cacheManager, ignoreInstance) + const config = this.configManager.getConfig() + return new DirectoryScanner(embedder, vectorStore, parser, this.cacheManager, ignoreInstance, { + parsingConcurrency: config.parsingConcurrency, + maxPendingBatches: config.maxPendingBatches, + batchProcessingConcurrency: config.batchProcessingConcurrency, + }) } /** From 8cc9424d7ccdb1aaa680a77ea67efba52963cf07 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Fri, 29 Aug 2025 03:48:30 +0000 Subject: [PATCH 2/2] fix: update config-manager test to include new concurrency properties --- .../code-index/__tests__/config-manager-concurrency.spec.ts | 4 +++- src/services/code-index/__tests__/config-manager.spec.ts | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/services/code-index/__tests__/config-manager-concurrency.spec.ts b/src/services/code-index/__tests__/config-manager-concurrency.spec.ts index 88636f324d..fe5cde970d 100644 --- a/src/services/code-index/__tests__/config-manager-concurrency.spec.ts +++ b/src/services/code-index/__tests__/config-manager-concurrency.spec.ts @@ -38,7 +38,7 @@ describe("CodeIndexConfigManager - Concurrency Settings", () => { expect(config.batchProcessingConcurrency).toBe(BATCH_PROCESSING_CONCURRENCY) }) - it("should use configured values when concurrency settings are provided", () => { + it("should use configured values when concurrency settings are provided", async () => { // Setup mock to return custom concurrency config vi.mocked(mockContextProxy.getGlobalState).mockReturnValue({ codebaseIndexEnabled: true, @@ -53,6 +53,8 @@ describe("CodeIndexConfigManager - Concurrency Settings", () => { return "" }) + // Load configuration to pick up the new values + await configManager.loadConfiguration() const config = configManager.getConfig() expect(config.parsingConcurrency).toBe(25) diff --git a/src/services/code-index/__tests__/config-manager.spec.ts b/src/services/code-index/__tests__/config-manager.spec.ts index 9fc096ba74..5e9619d2a8 100644 --- a/src/services/code-index/__tests__/config-manager.spec.ts +++ b/src/services/code-index/__tests__/config-manager.spec.ts @@ -1290,14 +1290,20 @@ describe("CodeIndexConfigManager", () => { isConfigured: true, embedderProvider: "openai", modelId: "text-embedding-3-large", + modelDimension: undefined, openAiOptions: { openAiNativeApiKey: "test-openai-key" }, ollamaOptions: { ollamaBaseUrl: undefined }, geminiOptions: undefined, + mistralOptions: undefined, + vercelAiGatewayOptions: undefined, openAiCompatibleOptions: undefined, qdrantUrl: "http://qdrant.local", qdrantApiKey: "test-qdrant-key", searchMinScore: 0.4, searchMaxResults: 50, + parsingConcurrency: 10, + maxPendingBatches: 20, + batchProcessingConcurrency: 10, }) })