Skip to content

Commit d553512

Browse files
committed
perf: optimize codebase indexing performance
- Increase BATCH_SEGMENT_THRESHOLD from 60 to 200 for better batching efficiency - Increase PARSING_CONCURRENCY from 10 to 20 for faster parallel file parsing - Increase BATCH_PROCESSING_CONCURRENCY from 10 to 15 for improved throughput - Increase MAX_PENDING_BATCHES from 20 to 30 to allow more parallel processing - Add early termination check to skip indexing when all files are unchanged - Add progress percentage to indexing status messages for better user feedback These changes significantly improve indexing performance by: 1. Processing larger batches to reduce API overhead 2. Increasing parallelization for CPU-bound operations 3. Skipping unnecessary work when files are already indexed 4. Providing better progress feedback to users Fixes #7350
1 parent 8e4c0ae commit d553512

File tree

3 files changed

+84
-4
lines changed

3 files changed

+84
-4
lines changed

src/services/code-index/constants/index.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,16 @@ export const MAX_FILE_SIZE_BYTES = 1 * 1024 * 1024 // 1MB
1616

1717
/**Directory Scanner */
1818
export const MAX_LIST_FILES_LIMIT_CODE_INDEX = 50_000
19-
export const BATCH_SEGMENT_THRESHOLD = 60 // Number of code segments to batch for embeddings/upserts
19+
export const BATCH_SEGMENT_THRESHOLD = 200 // Number of code segments to batch for embeddings/upserts - increased from 60 for better performance
2020
export const MAX_BATCH_RETRIES = 3
2121
export const INITIAL_RETRY_DELAY_MS = 500
22-
export const PARSING_CONCURRENCY = 10
23-
export const MAX_PENDING_BATCHES = 20 // Maximum number of batches to accumulate before waiting
22+
export const PARSING_CONCURRENCY = 20 // Increased from 10 for faster parallel file parsing
23+
export const MAX_PENDING_BATCHES = 30 // Maximum number of batches to accumulate before waiting - increased from 20
2424

2525
/**OpenAI Embedder */
2626
export const MAX_BATCH_TOKENS = 100000
2727
export const MAX_ITEM_TOKENS = 8191
28-
export const BATCH_PROCESSING_CONCURRENCY = 10
28+
export const BATCH_PROCESSING_CONCURRENCY = 15 // Increased from 10 for better throughput
2929

3030
/**Gemini Embedder */
3131
export const GEMINI_MAX_ITEM_TOKENS = 2048

src/services/code-index/orchestrator.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,11 +139,23 @@ export class CodeIndexOrchestrator {
139139
const handleFileParsed = (fileBlockCount: number) => {
140140
cumulativeBlocksFoundSoFar += fileBlockCount
141141
this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
142+
143+
// Add progress percentage to status message
144+
if (cumulativeBlocksFoundSoFar > 0) {
145+
const progressPercent = Math.round((cumulativeBlocksIndexed / cumulativeBlocksFoundSoFar) * 100)
146+
this.stateManager.setSystemState("Indexing", `Indexing workspace... (${progressPercent}% complete)`)
147+
}
142148
}
143149

144150
const handleBlocksIndexed = (indexedCount: number) => {
145151
cumulativeBlocksIndexed += indexedCount
146152
this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
153+
154+
// Add progress percentage to status message
155+
if (cumulativeBlocksFoundSoFar > 0) {
156+
const progressPercent = Math.round((cumulativeBlocksIndexed / cumulativeBlocksFoundSoFar) * 100)
157+
this.stateManager.setSystemState("Indexing", `Indexing workspace... (${progressPercent}% complete)`)
158+
}
147159
}
148160

149161
const result = await this.scanner.scanDirectory(

src/services/code-index/processors/scanner.ts

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,74 @@ export class DirectoryScanner implements IDirectoryScanner {
8989
let processedCount = 0
9090
let skippedCount = 0
9191

92+
// Early termination check: if all files are already indexed, skip processing
93+
let allFilesUnchanged = true
94+
let quickCheckCount = 0
95+
const quickCheckLimit = Math.min(10, supportedPaths.length) // Check first 10 files for quick assessment
96+
97+
for (const filePath of supportedPaths.slice(0, quickCheckLimit)) {
98+
try {
99+
const stats = await stat(filePath)
100+
if (stats.size <= MAX_FILE_SIZE_BYTES) {
101+
const content = await vscode.workspace.fs
102+
.readFile(vscode.Uri.file(filePath))
103+
.then((buffer) => Buffer.from(buffer).toString("utf-8"))
104+
const currentFileHash = createHash("sha256").update(content).digest("hex")
105+
const cachedFileHash = this.cacheManager.getHash(filePath)
106+
107+
if (cachedFileHash !== currentFileHash) {
108+
allFilesUnchanged = false
109+
break
110+
}
111+
}
112+
quickCheckCount++
113+
} catch (error) {
114+
// If we can't check a file, assume it might have changed
115+
allFilesUnchanged = false
116+
break
117+
}
118+
}
119+
120+
// If quick check shows all sampled files are unchanged and we checked a reasonable sample,
121+
// do a full check to confirm
122+
if (allFilesUnchanged && quickCheckCount === quickCheckLimit && supportedPaths.length > quickCheckLimit) {
123+
console.log(`[DirectoryScanner] Quick check passed, verifying all ${supportedPaths.length} files...`)
124+
for (const filePath of supportedPaths.slice(quickCheckLimit)) {
125+
try {
126+
const stats = await stat(filePath)
127+
if (stats.size <= MAX_FILE_SIZE_BYTES) {
128+
const content = await vscode.workspace.fs
129+
.readFile(vscode.Uri.file(filePath))
130+
.then((buffer) => Buffer.from(buffer).toString("utf-8"))
131+
const currentFileHash = createHash("sha256").update(content).digest("hex")
132+
const cachedFileHash = this.cacheManager.getHash(filePath)
133+
134+
if (cachedFileHash !== currentFileHash) {
135+
allFilesUnchanged = false
136+
break
137+
}
138+
}
139+
} catch (error) {
140+
allFilesUnchanged = false
141+
break
142+
}
143+
}
144+
}
145+
146+
// If all files are unchanged, we can skip the entire indexing process
147+
if (allFilesUnchanged && supportedPaths.length > 0) {
148+
console.log(
149+
`[DirectoryScanner] All ${supportedPaths.length} files are already indexed and unchanged. Skipping indexing.`,
150+
)
151+
return {
152+
stats: {
153+
processed: 0,
154+
skipped: supportedPaths.length,
155+
},
156+
totalBlockCount: 0,
157+
}
158+
}
159+
92160
// Initialize parallel processing tools
93161
const parseLimiter = pLimit(PARSING_CONCURRENCY) // Concurrency for file parsing
94162
const batchLimiter = pLimit(BATCH_PROCESSING_CONCURRENCY) // Concurrency for batch processing

0 commit comments

Comments
 (0)