Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 19 additions & 11 deletions src/core/webview/webviewMessageHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2663,18 +2663,26 @@ export const webviewMessageHandler = async (
return
}
if (manager.isFeatureEnabled && manager.isFeatureConfigured) {
if (!manager.isInitialized) {
await manager.initialize(provider.contextProxy)
}

// startIndexing now handles error recovery internally
manager.startIndexing()

// If startIndexing recovered from error, we need to reinitialize
if (!manager.isInitialized) {
await manager.initialize(provider.contextProxy)
// Try starting again after initialization
// Mimic extension startup behavior: initialize first, which will
// check if Qdrant container is active and reuse existing collection
await manager.initialize(provider.contextProxy)

// Only call startIndexing if we're in a state that requires it
// (e.g., Standby or Error). If already Indexed or Indexing, the
// initialize() call above will have already started the watcher.
const currentState = manager.state
if (currentState === "Standby" || currentState === "Error") {
// startIndexing now handles error recovery internally
manager.startIndexing()

// If startIndexing recovered from error, we need to reinitialize
if (!manager.isInitialized) {
await manager.initialize(provider.contextProxy)
// Try starting again after initialization
if (manager.state === "Standby" || manager.state === "Error") {
manager.startIndexing()
}
}
}
}
} catch (error) {
Expand Down
18 changes: 18 additions & 0 deletions src/services/code-index/interfaces/vector-store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,24 @@ export interface IVectorStore {
* @returns Promise resolving to boolean indicating if the collection exists
*/
collectionExists(): Promise<boolean>

/**
* Checks if the collection exists and has indexed points
* @returns Promise resolving to boolean indicating if the collection exists and has points
*/
hasIndexedData(): Promise<boolean>

/**
* Marks the indexing process as complete by storing metadata
* Should be called after a successful full workspace scan or incremental scan
*/
markIndexingComplete(): Promise<void>

/**
* Marks the indexing process as incomplete by storing metadata
* Should be called at the start of indexing to indicate work in progress
*/
markIndexingIncomplete(): Promise<void>
}

export interface VectorStoreSearchResult {
Expand Down
209 changes: 150 additions & 59 deletions src/services/code-index/orchestrator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,86 +123,164 @@ export class CodeIndexOrchestrator {
this._isProcessing = true
this.stateManager.setSystemState("Indexing", "Initializing services...")

// Track whether we successfully connected to Qdrant and started indexing
// This helps us decide whether to preserve cache on error
let indexingStarted = false

try {
const collectionCreated = await this.vectorStore.initialize()

// Successfully connected to Qdrant
indexingStarted = true

if (collectionCreated) {
await this.cacheManager.clearCacheFile()
}

this.stateManager.setSystemState("Indexing", "Services ready. Starting workspace scan...")
// Check if the collection already has indexed data
// If it does, we can skip the full scan and just start the watcher
const hasExistingData = await this.vectorStore.hasIndexedData()

let cumulativeBlocksIndexed = 0
let cumulativeBlocksFoundSoFar = 0
let batchErrors: Error[] = []
if (hasExistingData && !collectionCreated) {
// Collection exists with data - run incremental scan to catch any new/changed files
// This handles files added while workspace was closed or Qdrant was inactive
console.log(
"[CodeIndexOrchestrator] Collection already has indexed data. Running incremental scan for new/changed files...",
)
this.stateManager.setSystemState("Indexing", "Checking for new or modified files...")

const handleFileParsed = (fileBlockCount: number) => {
cumulativeBlocksFoundSoFar += fileBlockCount
this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
}
// Mark as incomplete at the start of incremental scan
await this.vectorStore.markIndexingIncomplete()

const handleBlocksIndexed = (indexedCount: number) => {
cumulativeBlocksIndexed += indexedCount
this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
}
let cumulativeBlocksIndexed = 0
let cumulativeBlocksFoundSoFar = 0
let batchErrors: Error[] = []

const result = await this.scanner.scanDirectory(
this.workspacePath,
(batchError: Error) => {
console.error(
`[CodeIndexOrchestrator] Error during initial scan batch: ${batchError.message}`,
batchError,
)
batchErrors.push(batchError)
},
handleBlocksIndexed,
handleFileParsed,
)
const handleFileParsed = (fileBlockCount: number) => {
cumulativeBlocksFoundSoFar += fileBlockCount
this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
}

if (!result) {
throw new Error("Scan failed, is scanner initialized?")
}
const handleBlocksIndexed = (indexedCount: number) => {
cumulativeBlocksIndexed += indexedCount
this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
}

const { stats } = result
// Run incremental scan - scanner will skip unchanged files using cache
const result = await this.scanner.scanDirectory(
this.workspacePath,
(batchError: Error) => {
console.error(
`[CodeIndexOrchestrator] Error during incremental scan batch: ${batchError.message}`,
batchError,
)
batchErrors.push(batchError)
},
handleBlocksIndexed,
handleFileParsed,
)

// Check if any blocks were actually indexed successfully
// If no blocks were indexed but blocks were found, it means all batches failed
if (cumulativeBlocksIndexed === 0 && cumulativeBlocksFoundSoFar > 0) {
if (batchErrors.length > 0) {
// Use the first batch error as it's likely representative of the main issue
const firstError = batchErrors[0]
throw new Error(`Indexing failed: ${firstError.message}`)
if (!result) {
throw new Error("Incremental scan failed, is scanner initialized?")
}

// If new files were found and indexed, log the results
if (cumulativeBlocksFoundSoFar > 0) {
console.log(
`[CodeIndexOrchestrator] Incremental scan completed: ${cumulativeBlocksIndexed} blocks indexed from new/changed files`,
)
} else {
throw new Error(t("embeddings:orchestrator.indexingFailedNoBlocks"))
console.log("[CodeIndexOrchestrator] No new or changed files found")
}

await this._startWatcher()

// Mark indexing as complete after successful incremental scan
await this.vectorStore.markIndexingComplete()

this.stateManager.setSystemState("Indexed", t("embeddings:orchestrator.fileWatcherStarted"))
} else {
// No existing data or collection was just created - do a full scan
this.stateManager.setSystemState("Indexing", "Services ready. Starting workspace scan...")

// Mark as incomplete at the start of full scan
await this.vectorStore.markIndexingIncomplete()

let cumulativeBlocksIndexed = 0
let cumulativeBlocksFoundSoFar = 0
let batchErrors: Error[] = []

const handleFileParsed = (fileBlockCount: number) => {
cumulativeBlocksFoundSoFar += fileBlockCount
this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
}

const handleBlocksIndexed = (indexedCount: number) => {
cumulativeBlocksIndexed += indexedCount
this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
}
}

// Check for partial failures - if a significant portion of blocks failed
const failureRate = (cumulativeBlocksFoundSoFar - cumulativeBlocksIndexed) / cumulativeBlocksFoundSoFar
if (batchErrors.length > 0 && failureRate > 0.1) {
// More than 10% of blocks failed to index
const firstError = batchErrors[0]
throw new Error(
`Indexing partially failed: Only ${cumulativeBlocksIndexed} of ${cumulativeBlocksFoundSoFar} blocks were indexed. ${firstError.message}`,
const result = await this.scanner.scanDirectory(
this.workspacePath,
(batchError: Error) => {
console.error(
`[CodeIndexOrchestrator] Error during initial scan batch: ${batchError.message}`,
batchError,
)
batchErrors.push(batchError)
},
handleBlocksIndexed,
handleFileParsed,
)
}

// CRITICAL: If there were ANY batch errors and NO blocks were successfully indexed,
// this is a complete failure regardless of the failure rate calculation
if (batchErrors.length > 0 && cumulativeBlocksIndexed === 0) {
const firstError = batchErrors[0]
throw new Error(`Indexing failed completely: ${firstError.message}`)
}
if (!result) {
throw new Error("Scan failed, is scanner initialized?")
}

// Final sanity check: If we found blocks but indexed none and somehow no errors were reported,
// this is still a failure
if (cumulativeBlocksFoundSoFar > 0 && cumulativeBlocksIndexed === 0) {
throw new Error(t("embeddings:orchestrator.indexingFailedCritical"))
}
const { stats } = result

// Check if any blocks were actually indexed successfully
// If no blocks were indexed but blocks were found, it means all batches failed
if (cumulativeBlocksIndexed === 0 && cumulativeBlocksFoundSoFar > 0) {
if (batchErrors.length > 0) {
// Use the first batch error as it's likely representative of the main issue
const firstError = batchErrors[0]
throw new Error(`Indexing failed: ${firstError.message}`)
} else {
throw new Error(t("embeddings:orchestrator.indexingFailedNoBlocks"))
}
}

// Check for partial failures - if a significant portion of blocks failed
const failureRate = (cumulativeBlocksFoundSoFar - cumulativeBlocksIndexed) / cumulativeBlocksFoundSoFar
if (batchErrors.length > 0 && failureRate > 0.1) {
// More than 10% of blocks failed to index
const firstError = batchErrors[0]
throw new Error(
`Indexing partially failed: Only ${cumulativeBlocksIndexed} of ${cumulativeBlocksFoundSoFar} blocks were indexed. ${firstError.message}`,
)
}

await this._startWatcher()
// CRITICAL: If there were ANY batch errors and NO blocks were successfully indexed,
// this is a complete failure regardless of the failure rate calculation
if (batchErrors.length > 0 && cumulativeBlocksIndexed === 0) {
const firstError = batchErrors[0]
throw new Error(`Indexing failed completely: ${firstError.message}`)
}

// Final sanity check: If we found blocks but indexed none and somehow no errors were reported,
// this is still a failure
if (cumulativeBlocksFoundSoFar > 0 && cumulativeBlocksIndexed === 0) {
throw new Error(t("embeddings:orchestrator.indexingFailedCritical"))
}

await this._startWatcher()

// Mark indexing as complete after successful full scan
await this.vectorStore.markIndexingComplete()

this.stateManager.setSystemState("Indexed", t("embeddings:orchestrator.fileWatcherStarted"))
this.stateManager.setSystemState("Indexed", t("embeddings:orchestrator.fileWatcherStarted"))
}
} catch (error: any) {
console.error("[CodeIndexOrchestrator] Error during indexing:", error)
TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, {
Expand All @@ -221,7 +299,20 @@ export class CodeIndexOrchestrator {
})
}

await this.cacheManager.clearCacheFile()
// Only clear cache if indexing had started (Qdrant connection succeeded)
// If we never connected to Qdrant, preserve cache for incremental scan when it comes back
if (indexingStarted) {
// Indexing started but failed mid-way - clear cache to avoid cache-Qdrant mismatch
await this.cacheManager.clearCacheFile()
console.log(
"[CodeIndexOrchestrator] Indexing failed after starting. Clearing cache to avoid inconsistency.",
)
} else {
// Never connected to Qdrant - preserve cache for future incremental scan
console.log(
"[CodeIndexOrchestrator] Failed to connect to Qdrant. Preserving cache for future incremental scan.",
)
}

this.stateManager.setSystemState(
"Error",
Expand Down
Loading