Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions src/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,39 @@
"minimum": 1,
"maximum": 200,
"description": "%settings.codeIndex.embeddingBatchSize.description%"
},
"roo-cline.codeIndex.parsingConcurrency": {
"type": "number",
"default": 10,
"minimum": 1,
"maximum": 50,
"description": "%settings.codeIndex.parsingConcurrency.description%"
},
"roo-cline.codeIndex.batchProcessingConcurrency": {
"type": "number",
"default": 10,
"minimum": 1,
"maximum": 50,
"description": "%settings.codeIndex.batchProcessingConcurrency.description%"
},
"roo-cline.codeIndex.maxPendingBatches": {
"type": "number",
"default": 20,
"minimum": 1,
"maximum": 100,
"description": "%settings.codeIndex.maxPendingBatches.description%"
},
"roo-cline.codeIndex.lowResourceMode": {
"type": "boolean",
"default": false,
"description": "%settings.codeIndex.lowResourceMode.description%"
},
"roo-cline.codeIndex.fileProcessingConcurrency": {
"type": "number",
"default": 10,
"minimum": 1,
"maximum": 50,
"description": "%settings.codeIndex.fileProcessingConcurrency.description%"
}
}
}
Expand Down
7 changes: 6 additions & 1 deletion src/package.nls.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,10 @@
"settings.useAgentRules.description": "Enable loading of AGENTS.md files for agent-specific rules (see https://agent-rules.org/)",
"settings.apiRequestTimeout.description": "Maximum time in seconds to wait for API responses (0 = no timeout, 1-3600s, default: 600s). Higher values are recommended for local providers like LM Studio and Ollama that may need more processing time.",
"settings.newTaskRequireTodos.description": "Require todos parameter when creating new tasks with the new_task tool",
"settings.codeIndex.embeddingBatchSize.description": "The batch size for embedding operations during code indexing. Adjust this based on your API provider's limits. Default is 60."
"settings.codeIndex.embeddingBatchSize.description": "The batch size for embedding operations during code indexing. Adjust this based on your API provider's limits. Default is 60.",
"settings.codeIndex.parsingConcurrency.description": "Maximum number of files to parse concurrently during code indexing. Lower values reduce memory and CPU usage. Default is 10.",
"settings.codeIndex.batchProcessingConcurrency.description": "Maximum number of embedding batches to process concurrently. Lower values reduce API load and memory usage. Default is 10.",
"settings.codeIndex.maxPendingBatches.description": "Maximum number of pending batches before throttling. Lower values reduce memory usage. Default is 20.",
"settings.codeIndex.lowResourceMode.description": "Enable low resource mode for better performance on modest hardware. This reduces concurrency, batch sizes, and memory usage.",
"settings.codeIndex.fileProcessingConcurrency.description": "Maximum number of files to process concurrently when watching for file changes. Lower values reduce CPU and memory usage. Default is 10."
}
16 changes: 12 additions & 4 deletions src/services/code-index/constants/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,24 @@ export const MAX_FILE_SIZE_BYTES = 1 * 1024 * 1024 // 1MB

/**Directory Scanner */
export const MAX_LIST_FILES_LIMIT_CODE_INDEX = 50_000
export const BATCH_SEGMENT_THRESHOLD = 60 // Number of code segments to batch for embeddings/upserts
export const BATCH_SEGMENT_THRESHOLD = 60 // Default number of code segments to batch for embeddings/upserts
export const MAX_BATCH_RETRIES = 3
export const INITIAL_RETRY_DELAY_MS = 500
export const PARSING_CONCURRENCY = 10
export const MAX_PENDING_BATCHES = 20 // Maximum number of batches to accumulate before waiting
export const PARSING_CONCURRENCY = 10 // Default number of files to parse concurrently
export const MAX_PENDING_BATCHES = 20 // Default maximum number of batches to accumulate before waiting
export const BATCH_PROCESSING_CONCURRENCY = 10 // Default number of batches to process concurrently
export const FILE_PROCESSING_CONCURRENCY = 10 // Default number of files to process concurrently in file watcher

/**Low Resource Mode Defaults */
export const LOW_RESOURCE_PARSING_CONCURRENCY = 2 // Reduced concurrent file parsing for low-end hardware
export const LOW_RESOURCE_BATCH_PROCESSING_CONCURRENCY = 2 // Reduced concurrent batch processing
export const LOW_RESOURCE_MAX_PENDING_BATCHES = 5 // Reduced pending batches
export const LOW_RESOURCE_BATCH_SEGMENT_THRESHOLD = 20 // Smaller batch size for low-end hardware
export const LOW_RESOURCE_FILE_PROCESSING_CONCURRENCY = 2 // Reduced file processing concurrency for low-end hardware

/**OpenAI Embedder */
export const MAX_BATCH_TOKENS = 100000
export const MAX_ITEM_TOKENS = 8191
export const BATCH_PROCESSING_CONCURRENCY = 10

/**Gemini Embedder */
export const GEMINI_MAX_ITEM_TOKENS = 2048
43 changes: 30 additions & 13 deletions src/services/code-index/processors/file-watcher.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ import {
BATCH_SEGMENT_THRESHOLD,
MAX_BATCH_RETRIES,
INITIAL_RETRY_DELAY_MS,
LOW_RESOURCE_BATCH_SEGMENT_THRESHOLD,
FILE_PROCESSING_CONCURRENCY,
LOW_RESOURCE_FILE_PROCESSING_CONCURRENCY,
} from "../constants"
import { createHash } from "crypto"
import { RooIgnoreController } from "../../../core/ignore/RooIgnoreController"
Expand Down Expand Up @@ -38,7 +41,7 @@ export class FileWatcher implements IFileWatcher {
private accumulatedEvents: Map<string, { uri: vscode.Uri; type: "create" | "change" | "delete" }> = new Map()
private batchProcessDebounceTimer?: NodeJS.Timeout
private readonly BATCH_DEBOUNCE_DELAY_MS = 500
private readonly FILE_PROCESSING_CONCURRENCY_LIMIT = 10
private readonly FILE_PROCESSING_CONCURRENCY_LIMIT: number
private readonly batchSegmentThreshold: number

private readonly _onDidStartBatchProcessing = new vscode.EventEmitter<string[]>()
Expand Down Expand Up @@ -86,19 +89,33 @@ export class FileWatcher implements IFileWatcher {
if (ignoreInstance) {
this.ignoreInstance = ignoreInstance
}
// Get the configurable batch size from VSCode settings, fallback to default
// If not provided in constructor, try to get from VSCode settings
if (batchSegmentThreshold !== undefined) {
this.batchSegmentThreshold = batchSegmentThreshold
} else {
try {
this.batchSegmentThreshold = vscode.workspace
.getConfiguration(Package.name)
.get<number>("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD)
} catch {
// In test environment, vscode.workspace might not be available
this.batchSegmentThreshold = BATCH_SEGMENT_THRESHOLD
// Get configurable settings from VSCode
try {
const config = vscode.workspace.getConfiguration(Package.name)
const isLowResourceMode = config.get<boolean>("codeIndex.lowResourceMode", false)

if (isLowResourceMode) {
this.batchSegmentThreshold =
batchSegmentThreshold ??
config.get<number>("codeIndex.embeddingBatchSize", LOW_RESOURCE_BATCH_SEGMENT_THRESHOLD)
// Use configurable file processing concurrency with low resource default
this.FILE_PROCESSING_CONCURRENCY_LIMIT = config.get<number>(
"codeIndex.fileProcessingConcurrency",
LOW_RESOURCE_FILE_PROCESSING_CONCURRENCY,
)
} else {
this.batchSegmentThreshold =
batchSegmentThreshold ?? config.get<number>("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD)
// Use configurable file processing concurrency with normal default
this.FILE_PROCESSING_CONCURRENCY_LIMIT = config.get<number>(
"codeIndex.fileProcessingConcurrency",
FILE_PROCESSING_CONCURRENCY,
)
}
} catch {
// In test environment, vscode.workspace might not be available
this.batchSegmentThreshold = batchSegmentThreshold ?? BATCH_SEGMENT_THRESHOLD
this.FILE_PROCESSING_CONCURRENCY_LIMIT = FILE_PROCESSING_CONCURRENCY
}
}

Expand Down
68 changes: 52 additions & 16 deletions src/services/code-index/processors/scanner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ import {
PARSING_CONCURRENCY,
BATCH_PROCESSING_CONCURRENCY,
MAX_PENDING_BATCHES,
LOW_RESOURCE_PARSING_CONCURRENCY,
LOW_RESOURCE_BATCH_PROCESSING_CONCURRENCY,
LOW_RESOURCE_MAX_PENDING_BATCHES,
LOW_RESOURCE_BATCH_SEGMENT_THRESHOLD,
} from "../constants"
import { isPathInIgnoredDirectory } from "../../glob/ignore-utils"
import { TelemetryService } from "@roo-code/telemetry"
Expand All @@ -33,6 +37,9 @@ import { Package } from "../../../shared/package"

export class DirectoryScanner implements IDirectoryScanner {
private readonly batchSegmentThreshold: number
private readonly parsingConcurrency: number
private readonly batchProcessingConcurrency: number
private readonly maxPendingBatches: number

constructor(
private readonly embedder: IEmbedder,
Expand All @@ -41,20 +48,49 @@ export class DirectoryScanner implements IDirectoryScanner {
private readonly cacheManager: CacheManager,
private readonly ignoreInstance: Ignore,
batchSegmentThreshold?: number,
parsingConcurrency?: number,
batchProcessingConcurrency?: number,
maxPendingBatches?: number,
) {
// Get the configurable batch size from VSCode settings, fallback to default
// If not provided in constructor, try to get from VSCode settings
if (batchSegmentThreshold !== undefined) {
this.batchSegmentThreshold = batchSegmentThreshold
} else {
try {
this.batchSegmentThreshold = vscode.workspace
.getConfiguration(Package.name)
.get<number>("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD)
} catch {
// In test environment, vscode.workspace might not be available
this.batchSegmentThreshold = BATCH_SEGMENT_THRESHOLD
// Get configuration from VSCode settings
try {
const config = vscode.workspace.getConfiguration(Package.name)
const isLowResourceMode = config.get<boolean>("codeIndex.lowResourceMode", false)

// Apply low resource mode defaults if enabled
if (isLowResourceMode) {
this.batchSegmentThreshold =
batchSegmentThreshold ??
config.get<number>("codeIndex.embeddingBatchSize", LOW_RESOURCE_BATCH_SEGMENT_THRESHOLD)
this.parsingConcurrency =
parsingConcurrency ??
config.get<number>("codeIndex.parsingConcurrency", LOW_RESOURCE_PARSING_CONCURRENCY)
this.batchProcessingConcurrency =
batchProcessingConcurrency ??
config.get<number>(
"codeIndex.batchProcessingConcurrency",
LOW_RESOURCE_BATCH_PROCESSING_CONCURRENCY,
)
this.maxPendingBatches =
maxPendingBatches ??
config.get<number>("codeIndex.maxPendingBatches", LOW_RESOURCE_MAX_PENDING_BATCHES)
} else {
this.batchSegmentThreshold =
batchSegmentThreshold ?? config.get<number>("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD)
this.parsingConcurrency =
parsingConcurrency ?? config.get<number>("codeIndex.parsingConcurrency", PARSING_CONCURRENCY)
this.batchProcessingConcurrency =
batchProcessingConcurrency ??
config.get<number>("codeIndex.batchProcessingConcurrency", BATCH_PROCESSING_CONCURRENCY)
this.maxPendingBatches =
maxPendingBatches ?? config.get<number>("codeIndex.maxPendingBatches", MAX_PENDING_BATCHES)
}
} catch {
// In test environment, vscode.workspace might not be available
this.batchSegmentThreshold = batchSegmentThreshold ?? BATCH_SEGMENT_THRESHOLD
this.parsingConcurrency = parsingConcurrency ?? PARSING_CONCURRENCY
this.batchProcessingConcurrency = batchProcessingConcurrency ?? BATCH_PROCESSING_CONCURRENCY
this.maxPendingBatches = maxPendingBatches ?? MAX_PENDING_BATCHES
}
}

Expand Down Expand Up @@ -108,9 +144,9 @@ export class DirectoryScanner implements IDirectoryScanner {
let processedCount = 0
let skippedCount = 0

// Initialize parallel processing tools
const parseLimiter = pLimit(PARSING_CONCURRENCY) // Concurrency for file parsing
const batchLimiter = pLimit(BATCH_PROCESSING_CONCURRENCY) // Concurrency for batch processing
// Initialize parallel processing tools with configurable concurrency
const parseLimiter = pLimit(this.parsingConcurrency) // Concurrency for file parsing
const batchLimiter = pLimit(this.batchProcessingConcurrency) // Concurrency for batch processing
const mutex = new Mutex()

// Shared batch accumulators (protected by mutex)
Expand Down Expand Up @@ -174,7 +210,7 @@ export class DirectoryScanner implements IDirectoryScanner {
// Check if batch threshold is met
if (currentBatchBlocks.length >= this.batchSegmentThreshold) {
// Wait if we've reached the maximum pending batches
while (pendingBatchCount >= MAX_PENDING_BATCHES) {
while (pendingBatchCount >= this.maxPendingBatches) {
// Wait for at least one batch to complete
await Promise.race(activeBatchPromises)
}
Expand Down
72 changes: 62 additions & 10 deletions src/services/code-index/service-factory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,16 @@ import { t } from "../../i18n"
import { TelemetryService } from "@roo-code/telemetry"
import { TelemetryEventName } from "@roo-code/types"
import { Package } from "../../shared/package"
import { BATCH_SEGMENT_THRESHOLD } from "./constants"
import {
BATCH_SEGMENT_THRESHOLD,
PARSING_CONCURRENCY,
BATCH_PROCESSING_CONCURRENCY,
MAX_PENDING_BATCHES,
LOW_RESOURCE_BATCH_SEGMENT_THRESHOLD,
LOW_RESOURCE_PARSING_CONCURRENCY,
LOW_RESOURCE_BATCH_PROCESSING_CONCURRENCY,
LOW_RESOURCE_MAX_PENDING_BATCHES,
} from "./constants"

/**
* Factory class responsible for creating and configuring code indexing service dependencies.
Expand Down Expand Up @@ -158,17 +167,54 @@ export class CodeIndexServiceFactory {
parser: ICodeParser,
ignoreInstance: Ignore,
): DirectoryScanner {
// Get the configurable batch size from VSCode settings
// Get configurable settings from VSCode
let batchSize: number
let parsingConcurrency: number
let batchProcessingConcurrency: number
let maxPendingBatches: number

try {
batchSize = vscode.workspace
.getConfiguration(Package.name)
.get<number>("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD)
const config = vscode.workspace.getConfiguration(Package.name)
const isLowResourceMode = config.get<boolean>("codeIndex.lowResourceMode", false)

if (isLowResourceMode) {
batchSize = config.get<number>("codeIndex.embeddingBatchSize", LOW_RESOURCE_BATCH_SEGMENT_THRESHOLD)
parsingConcurrency = config.get<number>(
"codeIndex.parsingConcurrency",
LOW_RESOURCE_PARSING_CONCURRENCY,
)
batchProcessingConcurrency = config.get<number>(
"codeIndex.batchProcessingConcurrency",
LOW_RESOURCE_BATCH_PROCESSING_CONCURRENCY,
)
maxPendingBatches = config.get<number>("codeIndex.maxPendingBatches", LOW_RESOURCE_MAX_PENDING_BATCHES)
} else {
batchSize = config.get<number>("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD)
parsingConcurrency = config.get<number>("codeIndex.parsingConcurrency", PARSING_CONCURRENCY)
batchProcessingConcurrency = config.get<number>(
"codeIndex.batchProcessingConcurrency",
BATCH_PROCESSING_CONCURRENCY,
)
maxPendingBatches = config.get<number>("codeIndex.maxPendingBatches", MAX_PENDING_BATCHES)
}
} catch {
// In test environment, vscode.workspace might not be available
batchSize = BATCH_SEGMENT_THRESHOLD
parsingConcurrency = PARSING_CONCURRENCY
batchProcessingConcurrency = BATCH_PROCESSING_CONCURRENCY
maxPendingBatches = MAX_PENDING_BATCHES
}
return new DirectoryScanner(embedder, vectorStore, parser, this.cacheManager, ignoreInstance, batchSize)
return new DirectoryScanner(
embedder,
vectorStore,
parser,
this.cacheManager,
ignoreInstance,
batchSize,
parsingConcurrency,
batchProcessingConcurrency,
maxPendingBatches,
)
}

/**
Expand All @@ -182,12 +228,18 @@ export class CodeIndexServiceFactory {
ignoreInstance: Ignore,
rooIgnoreController?: RooIgnoreController,
): IFileWatcher {
// Get the configurable batch size from VSCode settings
// Get configurable settings from VSCode
let batchSize: number

try {
batchSize = vscode.workspace
.getConfiguration(Package.name)
.get<number>("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD)
const config = vscode.workspace.getConfiguration(Package.name)
const isLowResourceMode = config.get<boolean>("codeIndex.lowResourceMode", false)

if (isLowResourceMode) {
batchSize = config.get<number>("codeIndex.embeddingBatchSize", LOW_RESOURCE_BATCH_SEGMENT_THRESHOLD)
} else {
batchSize = config.get<number>("codeIndex.embeddingBatchSize", BATCH_SEGMENT_THRESHOLD)
}
} catch {
// In test environment, vscode.workspace might not be available
batchSize = BATCH_SEGMENT_THRESHOLD
Expand Down
Loading