diff --git a/src/package.json b/src/package.json index 4217f1b8ef48..ca244079be07 100644 --- a/src/package.json +++ b/src/package.json @@ -429,6 +429,39 @@ "minimum": 1, "maximum": 200, "description": "%settings.codeIndex.embeddingBatchSize.description%" + }, + "roo-cline.codeIndex.parsingConcurrency": { + "type": "number", + "default": 10, + "minimum": 1, + "maximum": 50, + "description": "%settings.codeIndex.parsingConcurrency.description%" + }, + "roo-cline.codeIndex.batchProcessingConcurrency": { + "type": "number", + "default": 10, + "minimum": 1, + "maximum": 50, + "description": "%settings.codeIndex.batchProcessingConcurrency.description%" + }, + "roo-cline.codeIndex.maxPendingBatches": { + "type": "number", + "default": 20, + "minimum": 1, + "maximum": 100, + "description": "%settings.codeIndex.maxPendingBatches.description%" + }, + "roo-cline.codeIndex.lowResourceMode": { + "type": "boolean", + "default": false, + "description": "%settings.codeIndex.lowResourceMode.description%" + }, + "roo-cline.codeIndex.fileProcessingConcurrency": { + "type": "number", + "default": 10, + "minimum": 1, + "maximum": 50, + "description": "%settings.codeIndex.fileProcessingConcurrency.description%" } } } diff --git a/src/package.nls.json b/src/package.nls.json index 1db69777ac17..b519c7cf81c7 100644 --- a/src/package.nls.json +++ b/src/package.nls.json @@ -42,5 +42,10 @@ "settings.useAgentRules.description": "Enable loading of AGENTS.md files for agent-specific rules (see https://agent-rules.org/)", "settings.apiRequestTimeout.description": "Maximum time in seconds to wait for API responses (0 = no timeout, 1-3600s, default: 600s). Higher values are recommended for local providers like LM Studio and Ollama that may need more processing time.", "settings.newTaskRequireTodos.description": "Require todos parameter when creating new tasks with the new_task tool", - "settings.codeIndex.embeddingBatchSize.description": "The batch size for embedding operations during code indexing. Adjust this based on your API provider's limits. Default is 60." + "settings.codeIndex.embeddingBatchSize.description": "The batch size for embedding operations during code indexing. Adjust this based on your API provider's limits. Default is 60.", + "settings.codeIndex.parsingConcurrency.description": "Maximum number of files to parse concurrently during code indexing. Lower values reduce memory and CPU usage. Default is 10.", + "settings.codeIndex.batchProcessingConcurrency.description": "Maximum number of embedding batches to process concurrently. Lower values reduce API load and memory usage. Default is 10.", + "settings.codeIndex.maxPendingBatches.description": "Maximum number of pending batches before throttling. Lower values reduce memory usage. Default is 20.", + "settings.codeIndex.lowResourceMode.description": "Enable low resource mode for better performance on modest hardware. This reduces concurrency, batch sizes, and memory usage.", + "settings.codeIndex.fileProcessingConcurrency.description": "Maximum number of files to process concurrently when watching for file changes. Lower values reduce CPU and memory usage. Default is 10." } diff --git a/src/services/code-index/constants/index.ts b/src/services/code-index/constants/index.ts index 6f0e0fe7e627..91fb7a8e53ce 100644 --- a/src/services/code-index/constants/index.ts +++ b/src/services/code-index/constants/index.ts @@ -16,16 +16,24 @@ export const MAX_FILE_SIZE_BYTES = 1 * 1024 * 1024 // 1MB /**Directory Scanner */ export const MAX_LIST_FILES_LIMIT_CODE_INDEX = 50_000 -export const BATCH_SEGMENT_THRESHOLD = 60 // Number of code segments to batch for embeddings/upserts +export const BATCH_SEGMENT_THRESHOLD = 60 // Default number of code segments to batch for embeddings/upserts export const MAX_BATCH_RETRIES = 3 export const INITIAL_RETRY_DELAY_MS = 500 -export const PARSING_CONCURRENCY = 10 -export const MAX_PENDING_BATCHES = 20 // Maximum number of batches to accumulate before waiting +export const PARSING_CONCURRENCY = 10 // Default number of files to parse concurrently +export const MAX_PENDING_BATCHES = 20 // Default maximum number of batches to accumulate before waiting +export const BATCH_PROCESSING_CONCURRENCY = 10 // Default number of batches to process concurrently +export const FILE_PROCESSING_CONCURRENCY = 10 // Default number of files to process concurrently in file watcher + +/**Low Resource Mode Defaults */ +export const LOW_RESOURCE_PARSING_CONCURRENCY = 2 // Reduced concurrent file parsing for low-end hardware +export const LOW_RESOURCE_BATCH_PROCESSING_CONCURRENCY = 2 // Reduced concurrent batch processing +export const LOW_RESOURCE_MAX_PENDING_BATCHES = 5 // Reduced pending batches +export const LOW_RESOURCE_BATCH_SEGMENT_THRESHOLD = 20 // Smaller batch size for low-end hardware +export const LOW_RESOURCE_FILE_PROCESSING_CONCURRENCY = 2 // Reduced file processing concurrency for low-end hardware /**OpenAI Embedder */ export const MAX_BATCH_TOKENS = 100000 export const MAX_ITEM_TOKENS = 8191 -export const BATCH_PROCESSING_CONCURRENCY = 10 /**Gemini Embedder */ export const GEMINI_MAX_ITEM_TOKENS = 2048 diff --git a/src/services/code-index/processors/file-watcher.ts b/src/services/code-index/processors/file-watcher.ts index 1e5ebcbcebc7..c33c0409f802 100644 --- a/src/services/code-index/processors/file-watcher.ts +++ b/src/services/code-index/processors/file-watcher.ts @@ -5,6 +5,9 @@ import { BATCH_SEGMENT_THRESHOLD, MAX_BATCH_RETRIES, INITIAL_RETRY_DELAY_MS, + LOW_RESOURCE_BATCH_SEGMENT_THRESHOLD, + FILE_PROCESSING_CONCURRENCY, + LOW_RESOURCE_FILE_PROCESSING_CONCURRENCY, } from "../constants" import { createHash } from "crypto" import { RooIgnoreController } from "../../../core/ignore/RooIgnoreController" @@ -38,7 +41,7 @@ export class FileWatcher implements IFileWatcher { private accumulatedEvents: Map = new Map() private batchProcessDebounceTimer?: NodeJS.Timeout private readonly BATCH_DEBOUNCE_DELAY_MS = 500 - private readonly FILE_PROCESSING_CONCURRENCY_LIMIT = 10 + private readonly FILE_PROCESSING_CONCURRENCY_LIMIT: number private readonly batchSegmentThreshold: number private readonly _onDidStartBatchProcessing = new vscode.EventEmitter() @@ -86,19 +89,33 @@ export class FileWatcher implements IFileWatcher { if (ignoreInstance) { this.ignoreInstance = ignoreInstance } - // Get the configurable batch size from VSCode settings, fallback to default - // If not provided in constructor, try to get from VSCode settings - if (batchSegmentThreshold !== undefined) { - this.batchSegmentThreshold = batchSegmentThreshold - } else { - try { - this.batchSegmentThreshold = vscode.workspace - .getConfiguration(Package.name) - .get("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD) - } catch { - // In test environment, vscode.workspace might not be available - this.batchSegmentThreshold = BATCH_SEGMENT_THRESHOLD + // Get configurable settings from VSCode + try { + const config = vscode.workspace.getConfiguration(Package.name) + const isLowResourceMode = config.get("codeIndex.lowResourceMode", false) + + if (isLowResourceMode) { + this.batchSegmentThreshold = + batchSegmentThreshold ?? + config.get("codeIndex.embeddingBatchSize", LOW_RESOURCE_BATCH_SEGMENT_THRESHOLD) + // Use configurable file processing concurrency with low resource default + this.FILE_PROCESSING_CONCURRENCY_LIMIT = config.get( + "codeIndex.fileProcessingConcurrency", + LOW_RESOURCE_FILE_PROCESSING_CONCURRENCY, + ) + } else { + this.batchSegmentThreshold = + batchSegmentThreshold ?? config.get("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD) + // Use configurable file processing concurrency with normal default + this.FILE_PROCESSING_CONCURRENCY_LIMIT = config.get( + "codeIndex.fileProcessingConcurrency", + FILE_PROCESSING_CONCURRENCY, + ) } + } catch { + // In test environment, vscode.workspace might not be available + this.batchSegmentThreshold = batchSegmentThreshold ?? BATCH_SEGMENT_THRESHOLD + this.FILE_PROCESSING_CONCURRENCY_LIMIT = FILE_PROCESSING_CONCURRENCY } } diff --git a/src/services/code-index/processors/scanner.ts b/src/services/code-index/processors/scanner.ts index 92a7d77c2729..4c902138cdd5 100644 --- a/src/services/code-index/processors/scanner.ts +++ b/src/services/code-index/processors/scanner.ts @@ -24,6 +24,10 @@ import { PARSING_CONCURRENCY, BATCH_PROCESSING_CONCURRENCY, MAX_PENDING_BATCHES, + LOW_RESOURCE_PARSING_CONCURRENCY, + LOW_RESOURCE_BATCH_PROCESSING_CONCURRENCY, + LOW_RESOURCE_MAX_PENDING_BATCHES, + LOW_RESOURCE_BATCH_SEGMENT_THRESHOLD, } from "../constants" import { isPathInIgnoredDirectory } from "../../glob/ignore-utils" import { TelemetryService } from "@roo-code/telemetry" @@ -33,6 +37,9 @@ import { Package } from "../../../shared/package" export class DirectoryScanner implements IDirectoryScanner { private readonly batchSegmentThreshold: number + private readonly parsingConcurrency: number + private readonly batchProcessingConcurrency: number + private readonly maxPendingBatches: number constructor( private readonly embedder: IEmbedder, @@ -41,20 +48,49 @@ export class DirectoryScanner implements IDirectoryScanner { private readonly cacheManager: CacheManager, private readonly ignoreInstance: Ignore, batchSegmentThreshold?: number, + parsingConcurrency?: number, + batchProcessingConcurrency?: number, + maxPendingBatches?: number, ) { - // Get the configurable batch size from VSCode settings, fallback to default - // If not provided in constructor, try to get from VSCode settings - if (batchSegmentThreshold !== undefined) { - this.batchSegmentThreshold = batchSegmentThreshold - } else { - try { - this.batchSegmentThreshold = vscode.workspace - .getConfiguration(Package.name) - .get("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD) - } catch { - // In test environment, vscode.workspace might not be available - this.batchSegmentThreshold = BATCH_SEGMENT_THRESHOLD + // Get configuration from VSCode settings + try { + const config = vscode.workspace.getConfiguration(Package.name) + const isLowResourceMode = config.get("codeIndex.lowResourceMode", false) + + // Apply low resource mode defaults if enabled + if (isLowResourceMode) { + this.batchSegmentThreshold = + batchSegmentThreshold ?? + config.get("codeIndex.embeddingBatchSize", LOW_RESOURCE_BATCH_SEGMENT_THRESHOLD) + this.parsingConcurrency = + parsingConcurrency ?? + config.get("codeIndex.parsingConcurrency", LOW_RESOURCE_PARSING_CONCURRENCY) + this.batchProcessingConcurrency = + batchProcessingConcurrency ?? + config.get( + "codeIndex.batchProcessingConcurrency", + LOW_RESOURCE_BATCH_PROCESSING_CONCURRENCY, + ) + this.maxPendingBatches = + maxPendingBatches ?? + config.get("codeIndex.maxPendingBatches", LOW_RESOURCE_MAX_PENDING_BATCHES) + } else { + this.batchSegmentThreshold = + batchSegmentThreshold ?? config.get("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD) + this.parsingConcurrency = + parsingConcurrency ?? config.get("codeIndex.parsingConcurrency", PARSING_CONCURRENCY) + this.batchProcessingConcurrency = + batchProcessingConcurrency ?? + config.get("codeIndex.batchProcessingConcurrency", BATCH_PROCESSING_CONCURRENCY) + this.maxPendingBatches = + maxPendingBatches ?? config.get("codeIndex.maxPendingBatches", MAX_PENDING_BATCHES) } + } catch { + // In test environment, vscode.workspace might not be available + this.batchSegmentThreshold = batchSegmentThreshold ?? BATCH_SEGMENT_THRESHOLD + this.parsingConcurrency = parsingConcurrency ?? PARSING_CONCURRENCY + this.batchProcessingConcurrency = batchProcessingConcurrency ?? BATCH_PROCESSING_CONCURRENCY + this.maxPendingBatches = maxPendingBatches ?? MAX_PENDING_BATCHES } } @@ -108,9 +144,9 @@ export class DirectoryScanner implements IDirectoryScanner { let processedCount = 0 let skippedCount = 0 - // Initialize parallel processing tools - const parseLimiter = pLimit(PARSING_CONCURRENCY) // Concurrency for file parsing - const batchLimiter = pLimit(BATCH_PROCESSING_CONCURRENCY) // Concurrency for batch processing + // Initialize parallel processing tools with configurable concurrency + const parseLimiter = pLimit(this.parsingConcurrency) // Concurrency for file parsing + const batchLimiter = pLimit(this.batchProcessingConcurrency) // Concurrency for batch processing const mutex = new Mutex() // Shared batch accumulators (protected by mutex) @@ -174,7 +210,7 @@ export class DirectoryScanner implements IDirectoryScanner { // Check if batch threshold is met if (currentBatchBlocks.length >= this.batchSegmentThreshold) { // Wait if we've reached the maximum pending batches - while (pendingBatchCount >= MAX_PENDING_BATCHES) { + while (pendingBatchCount >= this.maxPendingBatches) { // Wait for at least one batch to complete await Promise.race(activeBatchPromises) } diff --git a/src/services/code-index/service-factory.ts b/src/services/code-index/service-factory.ts index 6d69e1f0b6c6..b3bbe6130e7f 100644 --- a/src/services/code-index/service-factory.ts +++ b/src/services/code-index/service-factory.ts @@ -17,7 +17,16 @@ import { t } from "../../i18n" import { TelemetryService } from "@roo-code/telemetry" import { TelemetryEventName } from "@roo-code/types" import { Package } from "../../shared/package" -import { BATCH_SEGMENT_THRESHOLD } from "./constants" +import { + BATCH_SEGMENT_THRESHOLD, + PARSING_CONCURRENCY, + BATCH_PROCESSING_CONCURRENCY, + MAX_PENDING_BATCHES, + LOW_RESOURCE_BATCH_SEGMENT_THRESHOLD, + LOW_RESOURCE_PARSING_CONCURRENCY, + LOW_RESOURCE_BATCH_PROCESSING_CONCURRENCY, + LOW_RESOURCE_MAX_PENDING_BATCHES, +} from "./constants" /** * Factory class responsible for creating and configuring code indexing service dependencies. @@ -158,17 +167,54 @@ export class CodeIndexServiceFactory { parser: ICodeParser, ignoreInstance: Ignore, ): DirectoryScanner { - // Get the configurable batch size from VSCode settings + // Get configurable settings from VSCode let batchSize: number + let parsingConcurrency: number + let batchProcessingConcurrency: number + let maxPendingBatches: number + try { - batchSize = vscode.workspace - .getConfiguration(Package.name) - .get("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD) + const config = vscode.workspace.getConfiguration(Package.name) + const isLowResourceMode = config.get("codeIndex.lowResourceMode", false) + + if (isLowResourceMode) { + batchSize = config.get("codeIndex.embeddingBatchSize", LOW_RESOURCE_BATCH_SEGMENT_THRESHOLD) + parsingConcurrency = config.get( + "codeIndex.parsingConcurrency", + LOW_RESOURCE_PARSING_CONCURRENCY, + ) + batchProcessingConcurrency = config.get( + "codeIndex.batchProcessingConcurrency", + LOW_RESOURCE_BATCH_PROCESSING_CONCURRENCY, + ) + maxPendingBatches = config.get("codeIndex.maxPendingBatches", LOW_RESOURCE_MAX_PENDING_BATCHES) + } else { + batchSize = config.get("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD) + parsingConcurrency = config.get("codeIndex.parsingConcurrency", PARSING_CONCURRENCY) + batchProcessingConcurrency = config.get( + "codeIndex.batchProcessingConcurrency", + BATCH_PROCESSING_CONCURRENCY, + ) + maxPendingBatches = config.get("codeIndex.maxPendingBatches", MAX_PENDING_BATCHES) + } } catch { // In test environment, vscode.workspace might not be available batchSize = BATCH_SEGMENT_THRESHOLD + parsingConcurrency = PARSING_CONCURRENCY + batchProcessingConcurrency = BATCH_PROCESSING_CONCURRENCY + maxPendingBatches = MAX_PENDING_BATCHES } - return new DirectoryScanner(embedder, vectorStore, parser, this.cacheManager, ignoreInstance, batchSize) + return new DirectoryScanner( + embedder, + vectorStore, + parser, + this.cacheManager, + ignoreInstance, + batchSize, + parsingConcurrency, + batchProcessingConcurrency, + maxPendingBatches, + ) } /** @@ -182,12 +228,18 @@ export class CodeIndexServiceFactory { ignoreInstance: Ignore, rooIgnoreController?: RooIgnoreController, ): IFileWatcher { - // Get the configurable batch size from VSCode settings + // Get configurable settings from VSCode let batchSize: number + try { - batchSize = vscode.workspace - .getConfiguration(Package.name) - .get("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD) + const config = vscode.workspace.getConfiguration(Package.name) + const isLowResourceMode = config.get("codeIndex.lowResourceMode", false) + + if (isLowResourceMode) { + batchSize = config.get("codeIndex.embeddingBatchSize", LOW_RESOURCE_BATCH_SEGMENT_THRESHOLD) + } else { + batchSize = config.get("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD) + } } catch { // In test environment, vscode.workspace might not be available batchSize = BATCH_SEGMENT_THRESHOLD