Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 19 additions & 11 deletions src/core/webview/webviewMessageHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2663,18 +2663,26 @@ export const webviewMessageHandler = async (
return
}
if (manager.isFeatureEnabled && manager.isFeatureConfigured) {
if (!manager.isInitialized) {
await manager.initialize(provider.contextProxy)
}

// startIndexing now handles error recovery internally
manager.startIndexing()

// If startIndexing recovered from error, we need to reinitialize
if (!manager.isInitialized) {
await manager.initialize(provider.contextProxy)
// Try starting again after initialization
// Mimic extension startup behavior: initialize first, which will
// check if Qdrant container is active and reuse existing collection
await manager.initialize(provider.contextProxy)

// Only call startIndexing if we're in a state that requires it
// (e.g., Standby or Error). If already Indexed or Indexing, the
// initialize() call above will have already started the watcher.
const currentState = manager.state
if (currentState === "Standby" || currentState === "Error") {
// startIndexing now handles error recovery internally
manager.startIndexing()

// If startIndexing recovered from error, we need to reinitialize
if (!manager.isInitialized) {
await manager.initialize(provider.contextProxy)
// Try starting again after initialization
if (manager.state === "Standby" || manager.state === "Error") {
manager.startIndexing()
}
}
}
}
} catch (error) {
Expand Down
12 changes: 12 additions & 0 deletions src/services/code-index/interfaces/vector-store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,18 @@ export interface IVectorStore {
* @returns Promise resolving to boolean indicating if the collection exists
*/
collectionExists(): Promise<boolean>

/**
* Checks if the collection exists and has indexed points
* @returns Promise resolving to boolean indicating if the collection exists and has points
*/
hasIndexedData(): Promise<boolean>

/**
* Marks the indexing process as complete by storing metadata
* Should be called after a successful full workspace scan
*/
markIndexingComplete(): Promise<void>
}

export interface VectorStoreSearchResult {
Expand Down
200 changes: 141 additions & 59 deletions src/services/code-index/orchestrator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,86 +123,155 @@ export class CodeIndexOrchestrator {
this._isProcessing = true
this.stateManager.setSystemState("Indexing", "Initializing services...")

// Track whether we successfully connected to Qdrant and started indexing
// This helps us decide whether to preserve cache on error
let indexingStarted = false

try {
const collectionCreated = await this.vectorStore.initialize()

// Successfully connected to Qdrant
indexingStarted = true

if (collectionCreated) {
await this.cacheManager.clearCacheFile()
}

this.stateManager.setSystemState("Indexing", "Services ready. Starting workspace scan...")
// Check if the collection already has indexed data
// If it does, we can skip the full scan and just start the watcher
const hasExistingData = await this.vectorStore.hasIndexedData()

let cumulativeBlocksIndexed = 0
let cumulativeBlocksFoundSoFar = 0
let batchErrors: Error[] = []
if (hasExistingData && !collectionCreated) {
// Collection exists with data - run incremental scan to catch any new/changed files
// This handles files added while workspace was closed or Qdrant was inactive
console.log(
"[CodeIndexOrchestrator] Collection already has indexed data. Running incremental scan for new/changed files...",
)
this.stateManager.setSystemState("Indexing", "Checking for new or modified files...")

const handleFileParsed = (fileBlockCount: number) => {
cumulativeBlocksFoundSoFar += fileBlockCount
this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
}
let cumulativeBlocksIndexed = 0
let cumulativeBlocksFoundSoFar = 0
let batchErrors: Error[] = []

const handleBlocksIndexed = (indexedCount: number) => {
cumulativeBlocksIndexed += indexedCount
this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
}
const handleFileParsed = (fileBlockCount: number) => {
cumulativeBlocksFoundSoFar += fileBlockCount
this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
}

const result = await this.scanner.scanDirectory(
this.workspacePath,
(batchError: Error) => {
console.error(
`[CodeIndexOrchestrator] Error during initial scan batch: ${batchError.message}`,
batchError,
)
batchErrors.push(batchError)
},
handleBlocksIndexed,
handleFileParsed,
)
const handleBlocksIndexed = (indexedCount: number) => {
cumulativeBlocksIndexed += indexedCount
this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
}

if (!result) {
throw new Error("Scan failed, is scanner initialized?")
}
// Run incremental scan - scanner will skip unchanged files using cache
const result = await this.scanner.scanDirectory(
this.workspacePath,
(batchError: Error) => {
console.error(
`[CodeIndexOrchestrator] Error during incremental scan batch: ${batchError.message}`,
batchError,
)
batchErrors.push(batchError)
},
handleBlocksIndexed,
handleFileParsed,
)

const { stats } = result
if (!result) {
throw new Error("Incremental scan failed, is scanner initialized?")
}

// Check if any blocks were actually indexed successfully
// If no blocks were indexed but blocks were found, it means all batches failed
if (cumulativeBlocksIndexed === 0 && cumulativeBlocksFoundSoFar > 0) {
if (batchErrors.length > 0) {
// Use the first batch error as it's likely representative of the main issue
const firstError = batchErrors[0]
throw new Error(`Indexing failed: ${firstError.message}`)
// If new files were found and indexed, log the results
if (cumulativeBlocksFoundSoFar > 0) {
console.log(
`[CodeIndexOrchestrator] Incremental scan completed: ${cumulativeBlocksIndexed} blocks indexed from new/changed files`,
)
} else {
throw new Error(t("embeddings:orchestrator.indexingFailedNoBlocks"))
console.log("[CodeIndexOrchestrator] No new or changed files found")
}
}

// Check for partial failures - if a significant portion of blocks failed
const failureRate = (cumulativeBlocksFoundSoFar - cumulativeBlocksIndexed) / cumulativeBlocksFoundSoFar
if (batchErrors.length > 0 && failureRate > 0.1) {
// More than 10% of blocks failed to index
const firstError = batchErrors[0]
throw new Error(
`Indexing partially failed: Only ${cumulativeBlocksIndexed} of ${cumulativeBlocksFoundSoFar} blocks were indexed. ${firstError.message}`,
await this._startWatcher()

this.stateManager.setSystemState("Indexed", t("embeddings:orchestrator.fileWatcherStarted"))
} else {
// No existing data or collection was just created - do a full scan
this.stateManager.setSystemState("Indexing", "Services ready. Starting workspace scan...")

let cumulativeBlocksIndexed = 0
let cumulativeBlocksFoundSoFar = 0
let batchErrors: Error[] = []

const handleFileParsed = (fileBlockCount: number) => {
cumulativeBlocksFoundSoFar += fileBlockCount
this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
}

const handleBlocksIndexed = (indexedCount: number) => {
cumulativeBlocksIndexed += indexedCount
this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
}

const result = await this.scanner.scanDirectory(
this.workspacePath,
(batchError: Error) => {
console.error(
`[CodeIndexOrchestrator] Error during initial scan batch: ${batchError.message}`,
batchError,
)
batchErrors.push(batchError)
},
handleBlocksIndexed,
handleFileParsed,
)
}

// CRITICAL: If there were ANY batch errors and NO blocks were successfully indexed,
// this is a complete failure regardless of the failure rate calculation
if (batchErrors.length > 0 && cumulativeBlocksIndexed === 0) {
const firstError = batchErrors[0]
throw new Error(`Indexing failed completely: ${firstError.message}`)
}
if (!result) {
throw new Error("Scan failed, is scanner initialized?")
}

// Final sanity check: If we found blocks but indexed none and somehow no errors were reported,
// this is still a failure
if (cumulativeBlocksFoundSoFar > 0 && cumulativeBlocksIndexed === 0) {
throw new Error(t("embeddings:orchestrator.indexingFailedCritical"))
}
const { stats } = result

// Check if any blocks were actually indexed successfully
// If no blocks were indexed but blocks were found, it means all batches failed
if (cumulativeBlocksIndexed === 0 && cumulativeBlocksFoundSoFar > 0) {
if (batchErrors.length > 0) {
// Use the first batch error as it's likely representative of the main issue
const firstError = batchErrors[0]
throw new Error(`Indexing failed: ${firstError.message}`)
} else {
throw new Error(t("embeddings:orchestrator.indexingFailedNoBlocks"))
}
}

await this._startWatcher()
// Check for partial failures - if a significant portion of blocks failed
const failureRate = (cumulativeBlocksFoundSoFar - cumulativeBlocksIndexed) / cumulativeBlocksFoundSoFar
if (batchErrors.length > 0 && failureRate > 0.1) {
// More than 10% of blocks failed to index
const firstError = batchErrors[0]
throw new Error(
`Indexing partially failed: Only ${cumulativeBlocksIndexed} of ${cumulativeBlocksFoundSoFar} blocks were indexed. ${firstError.message}`,
)
}

this.stateManager.setSystemState("Indexed", t("embeddings:orchestrator.fileWatcherStarted"))
// CRITICAL: If there were ANY batch errors and NO blocks were successfully indexed,
// this is a complete failure regardless of the failure rate calculation
if (batchErrors.length > 0 && cumulativeBlocksIndexed === 0) {
const firstError = batchErrors[0]
throw new Error(`Indexing failed completely: ${firstError.message}`)
}

// Final sanity check: If we found blocks but indexed none and somehow no errors were reported,
// this is still a failure
if (cumulativeBlocksFoundSoFar > 0 && cumulativeBlocksIndexed === 0) {
throw new Error(t("embeddings:orchestrator.indexingFailedCritical"))
}

await this._startWatcher()

// Mark indexing as complete after successful full scan
await this.vectorStore.markIndexingComplete()

this.stateManager.setSystemState("Indexed", t("embeddings:orchestrator.fileWatcherStarted"))
}
} catch (error: any) {
console.error("[CodeIndexOrchestrator] Error during indexing:", error)
TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, {
Expand All @@ -221,7 +290,20 @@ export class CodeIndexOrchestrator {
})
}

await this.cacheManager.clearCacheFile()
// Only clear cache if indexing had started (Qdrant connection succeeded)
// If we never connected to Qdrant, preserve cache for incremental scan when it comes back
if (indexingStarted) {
// Indexing started but failed mid-way - clear cache to avoid cache-Qdrant mismatch
await this.cacheManager.clearCacheFile()
console.log(
"[CodeIndexOrchestrator] Indexing failed after starting. Clearing cache to avoid inconsistency.",
)
} else {
// Never connected to Qdrant - preserve cache for future incremental scan
console.log(
"[CodeIndexOrchestrator] Failed to connect to Qdrant. Preserving cache for future incremental scan.",
)
}

this.stateManager.setSystemState(
"Error",
Expand Down
66 changes: 65 additions & 1 deletion src/services/code-index/vector-store/qdrant-client.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import { QdrantClient, Schemas } from "@qdrant/js-client-rest"
import { createHash } from "crypto"
import * as path from "path"
import { v5 as uuidv5 } from "uuid"
import { getWorkspacePath } from "../../../utils/path"
import { IVectorStore } from "../interfaces/vector-store"
import { Payload, VectorStoreSearchResult } from "../interfaces"
import { DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE } from "../constants"
import { DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE, QDRANT_CODE_BLOCK_NAMESPACE } from "../constants/"
import { t } from "../../../i18n"

/**
Expand Down Expand Up @@ -548,4 +549,67 @@ export class QdrantVectorStore implements IVectorStore {
const collectionInfo = await this.getCollectionInfo()
return collectionInfo !== null
}

/**
* Checks if the collection exists and has indexed points
* @returns Promise resolving to boolean indicating if the collection exists and has points
*/
async hasIndexedData(): Promise<boolean> {
try {
const collectionInfo = await this.getCollectionInfo()
if (!collectionInfo) {
return false
}
// Check if the collection has any points indexed
const pointsCount = collectionInfo.points_count ?? 0
if (pointsCount === 0) {
return false
}

// Check if the indexing completion marker exists
// This ensures we don't return true for partially indexed collections
// Use a deterministic UUID generated from a constant string
const metadataId = uuidv5("__indexing_metadata__", QDRANT_CODE_BLOCK_NAMESPACE)
const metadataPoints = await this.client.retrieve(this.collectionName, {
ids: [metadataId],
})

// Return true only if the completion marker exists and is marked as complete
return metadataPoints.length > 0 && metadataPoints[0].payload?.indexing_complete === true
} catch (error) {
console.warn("[QdrantVectorStore] Failed to check if collection has data:", error)
return false
}
}

/**
* Marks the indexing process as complete by storing metadata
* Should be called after a successful full workspace scan
*/
async markIndexingComplete(): Promise<void> {
try {
// Create a metadata point with a deterministic UUID to mark indexing as complete
// Use uuidv5 to generate a consistent UUID from a constant string
const metadataId = uuidv5("__indexing_metadata__", QDRANT_CODE_BLOCK_NAMESPACE)

await this.client.upsert(this.collectionName, {
points: [
{
id: metadataId,
vector: new Array(this.vectorSize).fill(0),
payload: {
type: "metadata",
indexing_complete: true,
completed_at: Date.now(),
},
},
],
wait: true,
})
console.log("[QdrantVectorStore] Marked indexing as complete")
} catch (error) {
console.error("[QdrantVectorStore] Failed to mark indexing as complete:", error)
throw error
}
}
}