diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index af5f9925c353..57cd829cce28 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -2663,18 +2663,26 @@ export const webviewMessageHandler = async ( return } if (manager.isFeatureEnabled && manager.isFeatureConfigured) { - if (!manager.isInitialized) { - await manager.initialize(provider.contextProxy) - } - - // startIndexing now handles error recovery internally - manager.startIndexing() - - // If startIndexing recovered from error, we need to reinitialize - if (!manager.isInitialized) { - await manager.initialize(provider.contextProxy) - // Try starting again after initialization + // Mimic extension startup behavior: initialize first, which will + // check if Qdrant container is active and reuse existing collection + await manager.initialize(provider.contextProxy) + + // Only call startIndexing if we're in a state that requires it + // (e.g., Standby or Error). If already Indexed or Indexing, the + // initialize() call above will have already started the watcher. + const currentState = manager.state + if (currentState === "Standby" || currentState === "Error") { + // startIndexing now handles error recovery internally manager.startIndexing() + + // If startIndexing recovered from error, we need to reinitialize + if (!manager.isInitialized) { + await manager.initialize(provider.contextProxy) + // Try starting again after initialization + if (manager.state === "Standby" || manager.state === "Error") { + manager.startIndexing() + } + } } } } catch (error) { diff --git a/src/services/code-index/__tests__/orchestrator.spec.ts b/src/services/code-index/__tests__/orchestrator.spec.ts new file mode 100644 index 000000000000..aab1ef888d3d --- /dev/null +++ b/src/services/code-index/__tests__/orchestrator.spec.ts @@ -0,0 +1,160 @@ +import { describe, it, expect, beforeEach, vi } from "vitest" +import { CodeIndexOrchestrator } from "../orchestrator" + +// Mock vscode workspace so startIndexing passes workspace check +vi.mock("vscode", () => { + const path = require("path") + const testWorkspacePath = path.join(path.sep, "test", "workspace") + return { + window: { + activeTextEditor: null, + }, + workspace: { + workspaceFolders: [ + { + uri: { fsPath: testWorkspacePath }, + name: "test", + index: 0, + }, + ], + createFileSystemWatcher: vi.fn().mockReturnValue({ + onDidCreate: vi.fn().mockReturnValue({ dispose: vi.fn() }), + onDidChange: vi.fn().mockReturnValue({ dispose: vi.fn() }), + onDidDelete: vi.fn().mockReturnValue({ dispose: vi.fn() }), + dispose: vi.fn(), + }), + }, + RelativePattern: vi.fn().mockImplementation((base: string, pattern: string) => ({ base, pattern })), + } +}) + +// Mock TelemetryService +vi.mock("@roo-code/telemetry", () => ({ + TelemetryService: { + instance: { + captureEvent: vi.fn(), + }, + }, +})) + +// Mock i18n translator used in orchestrator messages +vi.mock("../../i18n", () => ({ + t: (key: string, params?: any) => { + if (key === "embeddings:orchestrator.failedDuringInitialScan" && params?.errorMessage) { + return `Failed during initial scan: ${params.errorMessage}` + } + return key + }, +})) + +describe("CodeIndexOrchestrator - error path cleanup gating", () => { + const workspacePath = "/test/workspace" + + let configManager: any + let stateManager: any + let cacheManager: any + let vectorStore: any + let scanner: any + let fileWatcher: any + + beforeEach(() => { + vi.clearAllMocks() + + configManager = { + isFeatureConfigured: true, + } + + // Minimal state manager that tracks state transitions + let currentState = "Standby" + stateManager = { + get state() { + return currentState + }, + setSystemState: vi.fn().mockImplementation((state: string, _msg: string) => { + currentState = state + }), + reportFileQueueProgress: vi.fn(), + reportBlockIndexingProgress: vi.fn(), + } + + cacheManager = { + clearCacheFile: vi.fn().mockResolvedValue(undefined), + } + + vectorStore = { + initialize: vi.fn(), + hasIndexedData: vi.fn(), + markIndexingIncomplete: vi.fn(), + markIndexingComplete: vi.fn(), + clearCollection: vi.fn().mockResolvedValue(undefined), + } + + scanner = { + scanDirectory: vi.fn(), + } + + fileWatcher = { + initialize: vi.fn().mockResolvedValue(undefined), + onDidStartBatchProcessing: vi.fn().mockReturnValue({ dispose: vi.fn() }), + onBatchProgressUpdate: vi.fn().mockReturnValue({ dispose: vi.fn() }), + onDidFinishBatchProcessing: vi.fn().mockReturnValue({ dispose: vi.fn() }), + dispose: vi.fn(), + } + }) + + it("should not call clearCollection() or clear cache when initialize() fails (indexing not started)", async () => { + // Arrange: fail at initialize() + vectorStore.initialize.mockRejectedValue(new Error("Qdrant unreachable")) + + const orchestrator = new CodeIndexOrchestrator( + configManager, + stateManager, + workspacePath, + cacheManager, + vectorStore, + scanner, + fileWatcher, + ) + + // Act + await orchestrator.startIndexing() + + // Assert + expect(vectorStore.clearCollection).not.toHaveBeenCalled() + expect(cacheManager.clearCacheFile).not.toHaveBeenCalled() + + // Error state should be set + expect(stateManager.setSystemState).toHaveBeenCalled() + const lastCall = stateManager.setSystemState.mock.calls[stateManager.setSystemState.mock.calls.length - 1] + expect(lastCall[0]).toBe("Error") + }) + + it("should call clearCollection() and clear cache when an error occurs after initialize() succeeds (indexing started)", async () => { + // Arrange: initialize succeeds; fail soon after to enter error path with indexingStarted=true + vectorStore.initialize.mockResolvedValue(false) // existing collection + vectorStore.hasIndexedData.mockResolvedValue(false) // force full scan path + vectorStore.markIndexingIncomplete.mockRejectedValue(new Error("mark incomplete failure")) + + const orchestrator = new CodeIndexOrchestrator( + configManager, + stateManager, + workspacePath, + cacheManager, + vectorStore, + scanner, + fileWatcher, + ) + + // Act + await orchestrator.startIndexing() + + // Assert: cleanup gated behind indexingStarted should have happened + expect(vectorStore.clearCollection).toHaveBeenCalledTimes(1) + expect(cacheManager.clearCacheFile).toHaveBeenCalledTimes(1) + + // Error state should be set + expect(stateManager.setSystemState).toHaveBeenCalled() + const lastCall = stateManager.setSystemState.mock.calls[stateManager.setSystemState.mock.calls.length - 1] + expect(lastCall[0]).toBe("Error") + }) +}) diff --git a/src/services/code-index/interfaces/vector-store.ts b/src/services/code-index/interfaces/vector-store.ts index dde602fb4d9a..7946563fd57f 100644 --- a/src/services/code-index/interfaces/vector-store.ts +++ b/src/services/code-index/interfaces/vector-store.ts @@ -62,6 +62,24 @@ export interface IVectorStore { * @returns Promise resolving to boolean indicating if the collection exists */ collectionExists(): Promise + + /** + * Checks if the collection exists and has indexed points + * @returns Promise resolving to boolean indicating if the collection exists and has points + */ + hasIndexedData(): Promise + + /** + * Marks the indexing process as complete by storing metadata + * Should be called after a successful full workspace scan or incremental scan + */ + markIndexingComplete(): Promise + + /** + * Marks the indexing process as incomplete by storing metadata + * Should be called at the start of indexing to indicate work in progress + */ + markIndexingIncomplete(): Promise } export interface VectorStoreSearchResult { diff --git a/src/services/code-index/orchestrator.ts b/src/services/code-index/orchestrator.ts index fbc4a2411850..99f317882b84 100644 --- a/src/services/code-index/orchestrator.ts +++ b/src/services/code-index/orchestrator.ts @@ -123,86 +123,164 @@ export class CodeIndexOrchestrator { this._isProcessing = true this.stateManager.setSystemState("Indexing", "Initializing services...") + // Track whether we successfully connected to Qdrant and started indexing + // This helps us decide whether to preserve cache on error + let indexingStarted = false + try { const collectionCreated = await this.vectorStore.initialize() + // Successfully connected to Qdrant + indexingStarted = true + if (collectionCreated) { await this.cacheManager.clearCacheFile() } - this.stateManager.setSystemState("Indexing", "Services ready. Starting workspace scan...") + // Check if the collection already has indexed data + // If it does, we can skip the full scan and just start the watcher + const hasExistingData = await this.vectorStore.hasIndexedData() - let cumulativeBlocksIndexed = 0 - let cumulativeBlocksFoundSoFar = 0 - let batchErrors: Error[] = [] + if (hasExistingData && !collectionCreated) { + // Collection exists with data - run incremental scan to catch any new/changed files + // This handles files added while workspace was closed or Qdrant was inactive + console.log( + "[CodeIndexOrchestrator] Collection already has indexed data. Running incremental scan for new/changed files...", + ) + this.stateManager.setSystemState("Indexing", "Checking for new or modified files...") - const handleFileParsed = (fileBlockCount: number) => { - cumulativeBlocksFoundSoFar += fileBlockCount - this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar) - } + // Mark as incomplete at the start of incremental scan + await this.vectorStore.markIndexingIncomplete() - const handleBlocksIndexed = (indexedCount: number) => { - cumulativeBlocksIndexed += indexedCount - this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar) - } + let cumulativeBlocksIndexed = 0 + let cumulativeBlocksFoundSoFar = 0 + let batchErrors: Error[] = [] - const result = await this.scanner.scanDirectory( - this.workspacePath, - (batchError: Error) => { - console.error( - `[CodeIndexOrchestrator] Error during initial scan batch: ${batchError.message}`, - batchError, - ) - batchErrors.push(batchError) - }, - handleBlocksIndexed, - handleFileParsed, - ) + const handleFileParsed = (fileBlockCount: number) => { + cumulativeBlocksFoundSoFar += fileBlockCount + this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar) + } - if (!result) { - throw new Error("Scan failed, is scanner initialized?") - } + const handleBlocksIndexed = (indexedCount: number) => { + cumulativeBlocksIndexed += indexedCount + this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar) + } - const { stats } = result + // Run incremental scan - scanner will skip unchanged files using cache + const result = await this.scanner.scanDirectory( + this.workspacePath, + (batchError: Error) => { + console.error( + `[CodeIndexOrchestrator] Error during incremental scan batch: ${batchError.message}`, + batchError, + ) + batchErrors.push(batchError) + }, + handleBlocksIndexed, + handleFileParsed, + ) - // Check if any blocks were actually indexed successfully - // If no blocks were indexed but blocks were found, it means all batches failed - if (cumulativeBlocksIndexed === 0 && cumulativeBlocksFoundSoFar > 0) { - if (batchErrors.length > 0) { - // Use the first batch error as it's likely representative of the main issue - const firstError = batchErrors[0] - throw new Error(`Indexing failed: ${firstError.message}`) + if (!result) { + throw new Error("Incremental scan failed, is scanner initialized?") + } + + // If new files were found and indexed, log the results + if (cumulativeBlocksFoundSoFar > 0) { + console.log( + `[CodeIndexOrchestrator] Incremental scan completed: ${cumulativeBlocksIndexed} blocks indexed from new/changed files`, + ) } else { - throw new Error(t("embeddings:orchestrator.indexingFailedNoBlocks")) + console.log("[CodeIndexOrchestrator] No new or changed files found") } - } - // Check for partial failures - if a significant portion of blocks failed - const failureRate = (cumulativeBlocksFoundSoFar - cumulativeBlocksIndexed) / cumulativeBlocksFoundSoFar - if (batchErrors.length > 0 && failureRate > 0.1) { - // More than 10% of blocks failed to index - const firstError = batchErrors[0] - throw new Error( - `Indexing partially failed: Only ${cumulativeBlocksIndexed} of ${cumulativeBlocksFoundSoFar} blocks were indexed. ${firstError.message}`, + await this._startWatcher() + + // Mark indexing as complete after successful incremental scan + await this.vectorStore.markIndexingComplete() + + this.stateManager.setSystemState("Indexed", t("embeddings:orchestrator.fileWatcherStarted")) + } else { + // No existing data or collection was just created - do a full scan + this.stateManager.setSystemState("Indexing", "Services ready. Starting workspace scan...") + + // Mark as incomplete at the start of full scan + await this.vectorStore.markIndexingIncomplete() + + let cumulativeBlocksIndexed = 0 + let cumulativeBlocksFoundSoFar = 0 + let batchErrors: Error[] = [] + + const handleFileParsed = (fileBlockCount: number) => { + cumulativeBlocksFoundSoFar += fileBlockCount + this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar) + } + + const handleBlocksIndexed = (indexedCount: number) => { + cumulativeBlocksIndexed += indexedCount + this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar) + } + + const result = await this.scanner.scanDirectory( + this.workspacePath, + (batchError: Error) => { + console.error( + `[CodeIndexOrchestrator] Error during initial scan batch: ${batchError.message}`, + batchError, + ) + batchErrors.push(batchError) + }, + handleBlocksIndexed, + handleFileParsed, ) - } - // CRITICAL: If there were ANY batch errors and NO blocks were successfully indexed, - // this is a complete failure regardless of the failure rate calculation - if (batchErrors.length > 0 && cumulativeBlocksIndexed === 0) { - const firstError = batchErrors[0] - throw new Error(`Indexing failed completely: ${firstError.message}`) - } + if (!result) { + throw new Error("Scan failed, is scanner initialized?") + } - // Final sanity check: If we found blocks but indexed none and somehow no errors were reported, - // this is still a failure - if (cumulativeBlocksFoundSoFar > 0 && cumulativeBlocksIndexed === 0) { - throw new Error(t("embeddings:orchestrator.indexingFailedCritical")) - } + const { stats } = result + + // Check if any blocks were actually indexed successfully + // If no blocks were indexed but blocks were found, it means all batches failed + if (cumulativeBlocksIndexed === 0 && cumulativeBlocksFoundSoFar > 0) { + if (batchErrors.length > 0) { + // Use the first batch error as it's likely representative of the main issue + const firstError = batchErrors[0] + throw new Error(`Indexing failed: ${firstError.message}`) + } else { + throw new Error(t("embeddings:orchestrator.indexingFailedNoBlocks")) + } + } + + // Check for partial failures - if a significant portion of blocks failed + const failureRate = (cumulativeBlocksFoundSoFar - cumulativeBlocksIndexed) / cumulativeBlocksFoundSoFar + if (batchErrors.length > 0 && failureRate > 0.1) { + // More than 10% of blocks failed to index + const firstError = batchErrors[0] + throw new Error( + `Indexing partially failed: Only ${cumulativeBlocksIndexed} of ${cumulativeBlocksFoundSoFar} blocks were indexed. ${firstError.message}`, + ) + } + + // CRITICAL: If there were ANY batch errors and NO blocks were successfully indexed, + // this is a complete failure regardless of the failure rate calculation + if (batchErrors.length > 0 && cumulativeBlocksIndexed === 0) { + const firstError = batchErrors[0] + throw new Error(`Indexing failed completely: ${firstError.message}`) + } - await this._startWatcher() + // Final sanity check: If we found blocks but indexed none and somehow no errors were reported, + // this is still a failure + if (cumulativeBlocksFoundSoFar > 0 && cumulativeBlocksIndexed === 0) { + throw new Error(t("embeddings:orchestrator.indexingFailedCritical")) + } + + await this._startWatcher() - this.stateManager.setSystemState("Indexed", t("embeddings:orchestrator.fileWatcherStarted")) + // Mark indexing as complete after successful full scan + await this.vectorStore.markIndexingComplete() + + this.stateManager.setSystemState("Indexed", t("embeddings:orchestrator.fileWatcherStarted")) + } } catch (error: any) { console.error("[CodeIndexOrchestrator] Error during indexing:", error) TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, { @@ -210,18 +288,33 @@ export class CodeIndexOrchestrator { stack: error instanceof Error ? error.stack : undefined, location: "startIndexing", }) - try { - await this.vectorStore.clearCollection() - } catch (cleanupError) { - console.error("[CodeIndexOrchestrator] Failed to clean up after error:", cleanupError) - TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, { - error: cleanupError instanceof Error ? cleanupError.message : String(cleanupError), - stack: cleanupError instanceof Error ? cleanupError.stack : undefined, - location: "startIndexing.cleanup", - }) + if (indexingStarted) { + try { + await this.vectorStore.clearCollection() + } catch (cleanupError) { + console.error("[CodeIndexOrchestrator] Failed to clean up after error:", cleanupError) + TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, { + error: cleanupError instanceof Error ? cleanupError.message : String(cleanupError), + stack: cleanupError instanceof Error ? cleanupError.stack : undefined, + location: "startIndexing.cleanup", + }) + } } - await this.cacheManager.clearCacheFile() + // Only clear cache if indexing had started (Qdrant connection succeeded) + // If we never connected to Qdrant, preserve cache for incremental scan when it comes back + if (indexingStarted) { + // Indexing started but failed mid-way - clear cache to avoid cache-Qdrant mismatch + await this.cacheManager.clearCacheFile() + console.log( + "[CodeIndexOrchestrator] Indexing failed after starting. Clearing cache to avoid inconsistency.", + ) + } else { + // Never connected to Qdrant - preserve cache for future incremental scan + console.log( + "[CodeIndexOrchestrator] Failed to connect to Qdrant. Preserving cache for future incremental scan.", + ) + } this.stateManager.setSystemState( "Error", diff --git a/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts b/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts index 8947c2f3e79e..439962862b1a 100644 --- a/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts +++ b/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts @@ -1260,9 +1260,9 @@ describe("QdrantVectorStore", () => { const results = await vectorStore.search(queryVector) expect(mockQdrantClientInstance.query).toHaveBeenCalledTimes(1) - expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, { + const callArgs = mockQdrantClientInstance.query.mock.calls[0][1] + expect(callArgs).toMatchObject({ query: queryVector, - filter: undefined, score_threshold: DEFAULT_SEARCH_MIN_SCORE, limit: DEFAULT_MAX_SEARCH_RESULTS, params: { @@ -1273,6 +1273,9 @@ describe("QdrantVectorStore", () => { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], }, }) + expect(callArgs.filter).toEqual({ + must_not: [{ key: "type", match: { value: "metadata" } }], + }) expect(results).toEqual(mockQdrantResults.points) }) @@ -1300,29 +1303,20 @@ describe("QdrantVectorStore", () => { const results = await vectorStore.search(queryVector, directoryPrefix) - expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, { + const callArgs2 = mockQdrantClientInstance.query.mock.calls[0][1] + expect(callArgs2).toMatchObject({ query: queryVector, - filter: { - must: [ - { - key: "pathSegments.0", - match: { value: "src" }, - }, - { - key: "pathSegments.1", - match: { value: "components" }, - }, - ], - }, score_threshold: DEFAULT_SEARCH_MIN_SCORE, limit: DEFAULT_MAX_SEARCH_RESULTS, - params: { - hnsw_ef: 128, - exact: false, - }, - with_payload: { - include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], - }, + params: { hnsw_ef: 128, exact: false }, + with_payload: { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"] }, + }) + expect(callArgs2.filter).toEqual({ + must: [ + { key: "pathSegments.0", match: { value: "src" } }, + { key: "pathSegments.1", match: { value: "components" } }, + ], + must_not: [{ key: "type", match: { value: "metadata" } }], }) expect(results).toEqual(mockQdrantResults.points) @@ -1337,9 +1331,9 @@ describe("QdrantVectorStore", () => { await vectorStore.search(queryVector, undefined, customMinScore) - expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, { + const callArgs3 = mockQdrantClientInstance.query.mock.calls[0][1] + expect(callArgs3).toMatchObject({ query: queryVector, - filter: undefined, score_threshold: customMinScore, limit: DEFAULT_MAX_SEARCH_RESULTS, params: { @@ -1350,6 +1344,9 @@ describe("QdrantVectorStore", () => { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], }, }) + expect(callArgs3.filter).toEqual({ + must_not: [{ key: "type", match: { value: "metadata" } }], + }) }) it("should use custom maxResults when provided", async () => { @@ -1361,9 +1358,9 @@ describe("QdrantVectorStore", () => { await vectorStore.search(queryVector, undefined, undefined, customMaxResults) - expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, { + const callArgs4 = mockQdrantClientInstance.query.mock.calls[0][1] + expect(callArgs4).toMatchObject({ query: queryVector, - filter: undefined, score_threshold: DEFAULT_SEARCH_MIN_SCORE, limit: customMaxResults, params: { @@ -1374,6 +1371,9 @@ describe("QdrantVectorStore", () => { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], }, }) + expect(callArgs4.filter).toEqual({ + must_not: [{ key: "type", match: { value: "metadata" } }], + }) }) it("should filter out results with invalid payloads", async () => { @@ -1489,28 +1489,9 @@ describe("QdrantVectorStore", () => { await vectorStore.search(queryVector, directoryPrefix) - expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, { + const callArgs5 = mockQdrantClientInstance.query.mock.calls[0][1] + expect(callArgs5).toMatchObject({ query: queryVector, - filter: { - must: [ - { - key: "pathSegments.0", - match: { value: "src" }, - }, - { - key: "pathSegments.1", - match: { value: "components" }, - }, - { - key: "pathSegments.2", - match: { value: "ui" }, - }, - { - key: "pathSegments.3", - match: { value: "forms" }, - }, - ], - }, score_threshold: DEFAULT_SEARCH_MIN_SCORE, limit: DEFAULT_MAX_SEARCH_RESULTS, params: { @@ -1521,6 +1502,15 @@ describe("QdrantVectorStore", () => { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], }, }) + expect(callArgs5.filter).toEqual({ + must: [ + { key: "pathSegments.0", match: { value: "src" } }, + { key: "pathSegments.1", match: { value: "components" } }, + { key: "pathSegments.2", match: { value: "ui" } }, + { key: "pathSegments.3", match: { value: "forms" } }, + ], + must_not: [{ key: "type", match: { value: "metadata" } }], + }) }) it("should handle error scenarios when qdrantClient.query fails", async () => { @@ -1573,9 +1563,9 @@ describe("QdrantVectorStore", () => { const results = await vectorStore.search(queryVector, directoryPrefix) - expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, { + const callArgs7 = mockQdrantClientInstance.query.mock.calls[0][1] + expect(callArgs7).toMatchObject({ query: queryVector, - filter: undefined, // Should be undefined for current directory score_threshold: DEFAULT_SEARCH_MIN_SCORE, limit: DEFAULT_MAX_SEARCH_RESULTS, params: { @@ -1586,6 +1576,9 @@ describe("QdrantVectorStore", () => { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], }, }) + expect(callArgs7.filter).toEqual({ + must_not: [{ key: "type", match: { value: "metadata" } }], + }) expect(results).toEqual(mockQdrantResults.points) }) @@ -1599,9 +1592,9 @@ describe("QdrantVectorStore", () => { await vectorStore.search(queryVector, directoryPrefix) - expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, { + const callArgs6 = mockQdrantClientInstance.query.mock.calls[0][1] + expect(callArgs6).toMatchObject({ query: queryVector, - filter: undefined, // Should be undefined for current directory score_threshold: DEFAULT_SEARCH_MIN_SCORE, limit: DEFAULT_MAX_SEARCH_RESULTS, params: { @@ -1612,6 +1605,9 @@ describe("QdrantVectorStore", () => { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], }, }) + expect(callArgs6.filter).toEqual({ + must_not: [{ key: "type", match: { value: "metadata" } }], + }) }) it("should not apply filter when directoryPrefix is empty string", async () => { @@ -1623,9 +1619,9 @@ describe("QdrantVectorStore", () => { await vectorStore.search(queryVector, directoryPrefix) - expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, { + const callArgs8 = mockQdrantClientInstance.query.mock.calls[0][1] + expect(callArgs8).toMatchObject({ query: queryVector, - filter: undefined, // Should be undefined for empty string score_threshold: DEFAULT_SEARCH_MIN_SCORE, limit: DEFAULT_MAX_SEARCH_RESULTS, params: { @@ -1636,6 +1632,9 @@ describe("QdrantVectorStore", () => { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], }, }) + expect(callArgs8.filter).toEqual({ + must_not: [{ key: "type", match: { value: "metadata" } }], + }) }) it("should not apply filter when directoryPrefix is '.\\' (Windows style)", async () => { @@ -1647,9 +1646,9 @@ describe("QdrantVectorStore", () => { await vectorStore.search(queryVector, directoryPrefix) - expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, { + const callArgs9 = mockQdrantClientInstance.query.mock.calls[0][1] + expect(callArgs9).toMatchObject({ query: queryVector, - filter: undefined, // Should be undefined for Windows current directory score_threshold: DEFAULT_SEARCH_MIN_SCORE, limit: DEFAULT_MAX_SEARCH_RESULTS, params: { @@ -1660,6 +1659,9 @@ describe("QdrantVectorStore", () => { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], }, }) + expect(callArgs9.filter).toEqual({ + must_not: [{ key: "type", match: { value: "metadata" } }], + }) }) it("should not apply filter when directoryPrefix has trailing slashes", async () => { @@ -1671,9 +1673,9 @@ describe("QdrantVectorStore", () => { await vectorStore.search(queryVector, directoryPrefix) - expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, { + const callArgs10 = mockQdrantClientInstance.query.mock.calls[0][1] + expect(callArgs10).toMatchObject({ query: queryVector, - filter: undefined, // Should be undefined after normalizing trailing slashes score_threshold: DEFAULT_SEARCH_MIN_SCORE, limit: DEFAULT_MAX_SEARCH_RESULTS, params: { @@ -1684,6 +1686,9 @@ describe("QdrantVectorStore", () => { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], }, }) + expect(callArgs10.filter).toEqual({ + must_not: [{ key: "type", match: { value: "metadata" } }], + }) }) it("should still apply filter for relative paths like './src'", async () => { @@ -1695,16 +1700,9 @@ describe("QdrantVectorStore", () => { await vectorStore.search(queryVector, directoryPrefix) - expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, { + const callArgs11 = mockQdrantClientInstance.query.mock.calls[0][1] + expect(callArgs11).toMatchObject({ query: queryVector, - filter: { - must: [ - { - key: "pathSegments.0", - match: { value: "src" }, - }, - ], - }, // Should normalize "./src" to "src" score_threshold: DEFAULT_SEARCH_MIN_SCORE, limit: DEFAULT_MAX_SEARCH_RESULTS, params: { @@ -1715,6 +1713,15 @@ describe("QdrantVectorStore", () => { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], }, }) + expect(callArgs11.filter).toEqual({ + must: [ + { + key: "pathSegments.0", + match: { value: "src" }, + }, + ], + must_not: [{ key: "type", match: { value: "metadata" } }], + }) // Should normalize "./src" to "src" }) it("should still apply filter for regular directory paths", async () => { @@ -1726,16 +1733,9 @@ describe("QdrantVectorStore", () => { await vectorStore.search(queryVector, directoryPrefix) - expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, { + const callArgs12 = mockQdrantClientInstance.query.mock.calls[0][1] + expect(callArgs12).toMatchObject({ query: queryVector, - filter: { - must: [ - { - key: "pathSegments.0", - match: { value: "src" }, - }, - ], - }, // Should still create filter for regular paths score_threshold: DEFAULT_SEARCH_MIN_SCORE, limit: DEFAULT_MAX_SEARCH_RESULTS, params: { @@ -1746,6 +1746,15 @@ describe("QdrantVectorStore", () => { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], }, }) + expect(callArgs12.filter).toEqual({ + must: [ + { + key: "pathSegments.0", + match: { value: "src" }, + }, + ], + must_not: [{ key: "type", match: { value: "metadata" } }], + }) // Should still create filter for regular paths }) }) }) diff --git a/src/services/code-index/vector-store/qdrant-client.ts b/src/services/code-index/vector-store/qdrant-client.ts index ce152824a730..1efd9f8b3a18 100644 --- a/src/services/code-index/vector-store/qdrant-client.ts +++ b/src/services/code-index/vector-store/qdrant-client.ts @@ -1,10 +1,10 @@ import { QdrantClient, Schemas } from "@qdrant/js-client-rest" import { createHash } from "crypto" import * as path from "path" -import { getWorkspacePath } from "../../../utils/path" +import { v5 as uuidv5 } from "uuid" import { IVectorStore } from "../interfaces/vector-store" import { Payload, VectorStoreSearchResult } from "../interfaces" -import { DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE } from "../constants" +import { DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE, QDRANT_CODE_BLOCK_NAMESPACE } from "../constants" import { t } from "../../../i18n" /** @@ -386,7 +386,12 @@ export class QdrantVectorStore implements IVectorStore { maxResults?: number, ): Promise { try { - let filter = undefined + let filter: + | { + must: Array<{ key: string; match: { value: string } }> + must_not?: Array<{ key: string; match: { value: string } }> + } + | undefined = undefined if (directoryPrefix) { // Check if the path represents current directory @@ -412,9 +417,18 @@ export class QdrantVectorStore implements IVectorStore { } } + // Always exclude metadata points at query-time to avoid wasting top-k + const metadataExclusion = { + must_not: [{ key: "type", match: { value: "metadata" } }], + } + + const mergedFilter = filter + ? { ...filter, must_not: [...(filter.must_not || []), ...metadataExclusion.must_not] } + : metadataExclusion + const searchRequest = { query: queryVector, - filter, + filter: mergedFilter, score_threshold: minScore ?? DEFAULT_SEARCH_MIN_SCORE, limit: maxResults ?? DEFAULT_MAX_SEARCH_RESULTS, params: { @@ -548,4 +562,106 @@ export class QdrantVectorStore implements IVectorStore { const collectionInfo = await this.getCollectionInfo() return collectionInfo !== null } + + /** + * Checks if the collection exists and has indexed points + * @returns Promise resolving to boolean indicating if the collection exists and has points + */ + async hasIndexedData(): Promise { + try { + const collectionInfo = await this.getCollectionInfo() + if (!collectionInfo) { + return false + } + // Check if the collection has any points indexed + const pointsCount = collectionInfo.points_count ?? 0 + if (pointsCount === 0) { + return false + } + + // Check if the indexing completion marker exists + // Use a deterministic UUID generated from a constant string + const metadataId = uuidv5("__indexing_metadata__", QDRANT_CODE_BLOCK_NAMESPACE) + const metadataPoints = await this.client.retrieve(this.collectionName, { + ids: [metadataId], + }) + + // If marker exists, use it to determine completion status + if (metadataPoints.length > 0) { + return metadataPoints[0].payload?.indexing_complete === true + } + + // Backward compatibility: No marker exists (old index or pre-marker version) + // Fall back to old logic - assume complete if collection has points + console.log( + "[QdrantVectorStore] No indexing metadata marker found. Using backward compatibility mode (checking points_count > 0).", + ) + return pointsCount > 0 + } catch (error) { + console.warn("[QdrantVectorStore] Failed to check if collection has data:", error) + return false + } + } + + /** + * Marks the indexing process as complete by storing metadata + * Should be called after a successful full workspace scan or incremental scan + */ + async markIndexingComplete(): Promise { + try { + // Create a metadata point with a deterministic UUID to mark indexing as complete + // Use uuidv5 to generate a consistent UUID from a constant string + const metadataId = uuidv5("__indexing_metadata__", QDRANT_CODE_BLOCK_NAMESPACE) + + await this.client.upsert(this.collectionName, { + points: [ + { + id: metadataId, + vector: new Array(this.vectorSize).fill(0), + payload: { + type: "metadata", + indexing_complete: true, + completed_at: Date.now(), + }, + }, + ], + wait: true, + }) + console.log("[QdrantVectorStore] Marked indexing as complete") + } catch (error) { + console.error("[QdrantVectorStore] Failed to mark indexing as complete:", error) + throw error + } + } + + /** + * Marks the indexing process as incomplete by storing metadata + * Should be called at the start of indexing to indicate work in progress + */ + async markIndexingIncomplete(): Promise { + try { + // Create a metadata point with a deterministic UUID to mark indexing as incomplete + // Use uuidv5 to generate a consistent UUID from a constant string + const metadataId = uuidv5("__indexing_metadata__", QDRANT_CODE_BLOCK_NAMESPACE) + + await this.client.upsert(this.collectionName, { + points: [ + { + id: metadataId, + vector: new Array(this.vectorSize).fill(0), + payload: { + type: "metadata", + indexing_complete: false, + started_at: Date.now(), + }, + }, + ], + wait: true, + }) + console.log("[QdrantVectorStore] Marked indexing as incomplete (in progress)") + } catch (error) { + console.error("[QdrantVectorStore] Failed to mark indexing as incomplete:", error) + throw error + } + } }