diff --git a/src/services/code-index/constants/index.ts b/src/services/code-index/constants/index.ts index 6f0e0fe7e6..17b02f7dad 100644 --- a/src/services/code-index/constants/index.ts +++ b/src/services/code-index/constants/index.ts @@ -16,16 +16,16 @@ export const MAX_FILE_SIZE_BYTES = 1 * 1024 * 1024 // 1MB /**Directory Scanner */ export const MAX_LIST_FILES_LIMIT_CODE_INDEX = 50_000 -export const BATCH_SEGMENT_THRESHOLD = 60 // Number of code segments to batch for embeddings/upserts +export const BATCH_SEGMENT_THRESHOLD = 200 // Number of code segments to batch for embeddings/upserts - increased from 60 for better performance export const MAX_BATCH_RETRIES = 3 export const INITIAL_RETRY_DELAY_MS = 500 -export const PARSING_CONCURRENCY = 10 -export const MAX_PENDING_BATCHES = 20 // Maximum number of batches to accumulate before waiting +export const PARSING_CONCURRENCY = 20 // Increased from 10 for faster parallel file parsing +export const MAX_PENDING_BATCHES = 30 // Maximum number of batches to accumulate before waiting - increased from 20 /**OpenAI Embedder */ export const MAX_BATCH_TOKENS = 100000 export const MAX_ITEM_TOKENS = 8191 -export const BATCH_PROCESSING_CONCURRENCY = 10 +export const BATCH_PROCESSING_CONCURRENCY = 15 // Increased from 10 for better throughput /**Gemini Embedder */ export const GEMINI_MAX_ITEM_TOKENS = 2048 diff --git a/src/services/code-index/orchestrator.ts b/src/services/code-index/orchestrator.ts index fbc4a24118..df9883d1c1 100644 --- a/src/services/code-index/orchestrator.ts +++ b/src/services/code-index/orchestrator.ts @@ -139,11 +139,23 @@ export class CodeIndexOrchestrator { const handleFileParsed = (fileBlockCount: number) => { cumulativeBlocksFoundSoFar += fileBlockCount this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar) + + // Add progress percentage to status message + if (cumulativeBlocksFoundSoFar > 0) { + const progressPercent = Math.round((cumulativeBlocksIndexed / cumulativeBlocksFoundSoFar) * 100) + this.stateManager.setSystemState("Indexing", `Indexing workspace... (${progressPercent}% complete)`) + } } const handleBlocksIndexed = (indexedCount: number) => { cumulativeBlocksIndexed += indexedCount this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar) + + // Add progress percentage to status message + if (cumulativeBlocksFoundSoFar > 0) { + const progressPercent = Math.round((cumulativeBlocksIndexed / cumulativeBlocksFoundSoFar) * 100) + this.stateManager.setSystemState("Indexing", `Indexing workspace... (${progressPercent}% complete)`) + } } const result = await this.scanner.scanDirectory( diff --git a/src/services/code-index/processors/scanner.ts b/src/services/code-index/processors/scanner.ts index 27362b8b74..362fc86da6 100644 --- a/src/services/code-index/processors/scanner.ts +++ b/src/services/code-index/processors/scanner.ts @@ -89,6 +89,74 @@ export class DirectoryScanner implements IDirectoryScanner { let processedCount = 0 let skippedCount = 0 + // Early termination check: if all files are already indexed, skip processing + let allFilesUnchanged = true + let quickCheckCount = 0 + const quickCheckLimit = Math.min(10, supportedPaths.length) // Check first 10 files for quick assessment + + for (const filePath of supportedPaths.slice(0, quickCheckLimit)) { + try { + const stats = await stat(filePath) + if (stats.size <= MAX_FILE_SIZE_BYTES) { + const content = await vscode.workspace.fs + .readFile(vscode.Uri.file(filePath)) + .then((buffer) => Buffer.from(buffer).toString("utf-8")) + const currentFileHash = createHash("sha256").update(content).digest("hex") + const cachedFileHash = this.cacheManager.getHash(filePath) + + if (cachedFileHash !== currentFileHash) { + allFilesUnchanged = false + break + } + } + quickCheckCount++ + } catch (error) { + // If we can't check a file, assume it might have changed + allFilesUnchanged = false + break + } + } + + // If quick check shows all sampled files are unchanged and we checked a reasonable sample, + // do a full check to confirm + if (allFilesUnchanged && quickCheckCount === quickCheckLimit && supportedPaths.length > quickCheckLimit) { + console.log(`[DirectoryScanner] Quick check passed, verifying all ${supportedPaths.length} files...`) + for (const filePath of supportedPaths.slice(quickCheckLimit)) { + try { + const stats = await stat(filePath) + if (stats.size <= MAX_FILE_SIZE_BYTES) { + const content = await vscode.workspace.fs + .readFile(vscode.Uri.file(filePath)) + .then((buffer) => Buffer.from(buffer).toString("utf-8")) + const currentFileHash = createHash("sha256").update(content).digest("hex") + const cachedFileHash = this.cacheManager.getHash(filePath) + + if (cachedFileHash !== currentFileHash) { + allFilesUnchanged = false + break + } + } + } catch (error) { + allFilesUnchanged = false + break + } + } + } + + // If all files are unchanged, we can skip the entire indexing process + if (allFilesUnchanged && supportedPaths.length > 0) { + console.log( + `[DirectoryScanner] All ${supportedPaths.length} files are already indexed and unchanged. Skipping indexing.`, + ) + return { + stats: { + processed: 0, + skipped: supportedPaths.length, + }, + totalBlockCount: 0, + } + } + // Initialize parallel processing tools const parseLimiter = pLimit(PARSING_CONCURRENCY) // Concurrency for file parsing const batchLimiter = pLimit(BATCH_PROCESSING_CONCURRENCY) // Concurrency for batch processing