From da4c8ec2ccd152e9f74e2bf68de7f93fac437ca1 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Sat, 19 Jul 2025 03:14:41 +0000 Subject: [PATCH] fix: prevent duplicated codebase indexing on VSCode restart - Add hasData() method to IVectorStore interface and QdrantVectorStore implementation - Check for existing index data in orchestrator before performing full scan - Update manager initialization logic to only trigger indexing when necessary - Skip re-indexing if valid index with data already exists Fixes #5941 --- .../code-index/__tests__/manager.spec.ts | 2 +- .../code-index/interfaces/vector-store.ts | 6 +++++ src/services/code-index/manager.ts | 17 +++++++------- src/services/code-index/orchestrator.ts | 11 ++++++++++ .../processors/__tests__/file-watcher.spec.ts | 1 + .../processors/__tests__/scanner.spec.ts | 1 + .../code-index/vector-store/qdrant-client.ts | 22 +++++++++++++++++++ 7 files changed, 51 insertions(+), 9 deletions(-) diff --git a/src/services/code-index/__tests__/manager.spec.ts b/src/services/code-index/__tests__/manager.spec.ts index 8c64c2fdc62..5d5265624d9 100644 --- a/src/services/code-index/__tests__/manager.spec.ts +++ b/src/services/code-index/__tests__/manager.spec.ts @@ -249,7 +249,7 @@ describe("CodeIndexManager - handleSettingsChange regression", () => { beforeEach(() => { // Mock service factory objects mockEmbedder = { embedderInfo: { name: "openai" } } - mockVectorStore = {} + mockVectorStore = { hasData: vi.fn().mockResolvedValue(false) } mockScanner = {} mockFileWatcher = { onDidStartBatchProcessing: vi.fn(), diff --git a/src/services/code-index/interfaces/vector-store.ts b/src/services/code-index/interfaces/vector-store.ts index dde602fb4d9..9951856d396 100644 --- a/src/services/code-index/interfaces/vector-store.ts +++ b/src/services/code-index/interfaces/vector-store.ts @@ -62,6 +62,12 @@ export interface IVectorStore { * @returns Promise resolving to boolean indicating if the collection exists */ collectionExists(): Promise + + /** + * Checks if the collection has any data (points) + * @returns Promise resolving to boolean indicating if the collection has data + */ + hasData(): Promise } export interface VectorStoreSearchResult { diff --git a/src/services/code-index/manager.ts b/src/services/code-index/manager.ts index 18e0752c34d..30be9080092 100644 --- a/src/services/code-index/manager.ts +++ b/src/services/code-index/manager.ts @@ -142,14 +142,15 @@ export class CodeIndexManager { } // 5. Handle Indexing Start/Restart - // The enhanced vectorStore.initialize() in startIndexing() now handles dimension changes automatically - // by detecting incompatible collections and recreating them, so we rely on that for dimension changes - const shouldStartOrRestartIndexing = - requiresRestart || - (needsServiceRecreation && (!this._orchestrator || this._orchestrator.state !== "Indexing")) - - if (shouldStartOrRestartIndexing) { - this._orchestrator?.startIndexing() // This method is async, but we don't await it here + // Only start indexing if: + // - Configuration requires restart (settings changed) + // - Or this is the first initialization (no orchestrator exists yet) + // The orchestrator's startIndexing method will check if an existing index exists + // and skip the full scan if data is already present + const shouldStartIndexing = requiresRestart || !this._orchestrator + + if (shouldStartIndexing && this._orchestrator) { + this._orchestrator.startIndexing() // This method is async, but we don't await it here } return { requiresRestart } diff --git a/src/services/code-index/orchestrator.ts b/src/services/code-index/orchestrator.ts index 505aee76684..bd4f61b6f93 100644 --- a/src/services/code-index/orchestrator.ts +++ b/src/services/code-index/orchestrator.ts @@ -122,6 +122,17 @@ export class CodeIndexOrchestrator { await this.cacheManager.clearCacheFile() } + // Check if we have an existing index with data + const hasExistingData = !collectionCreated && (await this.vectorStore.hasData()) + + if (hasExistingData) { + console.log("[CodeIndexOrchestrator] Existing index found with data. Skipping initial scan.") + // Start the file watcher without doing a full scan + await this._startWatcher() + this.stateManager.setSystemState("Indexed", "Using existing index. File watcher started.") + return + } + this.stateManager.setSystemState("Indexing", "Services ready. Starting workspace scan...") let cumulativeBlocksIndexed = 0 diff --git a/src/services/code-index/processors/__tests__/file-watcher.spec.ts b/src/services/code-index/processors/__tests__/file-watcher.spec.ts index 2a3b7e11677..2c2dcdf2fc6 100644 --- a/src/services/code-index/processors/__tests__/file-watcher.spec.ts +++ b/src/services/code-index/processors/__tests__/file-watcher.spec.ts @@ -118,6 +118,7 @@ describe("FileWatcher", () => { upsertPoints: vi.fn().mockResolvedValue(undefined), deletePointsByFilePath: vi.fn().mockResolvedValue(undefined), deletePointsByMultipleFilePaths: vi.fn().mockResolvedValue(undefined), + hasData: vi.fn().mockResolvedValue(false), } mockIgnoreInstance = { diff --git a/src/services/code-index/processors/__tests__/scanner.spec.ts b/src/services/code-index/processors/__tests__/scanner.spec.ts index 4d4150b4439..b1f89c1dba0 100644 --- a/src/services/code-index/processors/__tests__/scanner.spec.ts +++ b/src/services/code-index/processors/__tests__/scanner.spec.ts @@ -88,6 +88,7 @@ describe("DirectoryScanner", () => { clearCollection: vi.fn().mockResolvedValue(undefined), deleteCollection: vi.fn().mockResolvedValue(undefined), collectionExists: vi.fn().mockResolvedValue(true), + hasData: vi.fn().mockResolvedValue(false), } mockCodeParser = { parseFile: vi.fn().mockResolvedValue([]), diff --git a/src/services/code-index/vector-store/qdrant-client.ts b/src/services/code-index/vector-store/qdrant-client.ts index 5121d65b97c..b24d368b7cc 100644 --- a/src/services/code-index/vector-store/qdrant-client.ts +++ b/src/services/code-index/vector-store/qdrant-client.ts @@ -488,4 +488,26 @@ export class QdrantVectorStore implements IVectorStore { const collectionInfo = await this.getCollectionInfo() return collectionInfo !== null } + + /** + * Checks if the collection has any data (points) + * @returns Promise resolving to boolean indicating if the collection has data + */ + async hasData(): Promise { + try { + const collectionInfo = await this.getCollectionInfo() + if (!collectionInfo) { + return false + } + + // Check if the collection has any points + // The collection info includes points_count or vectors_count depending on the version + const pointsCount = (collectionInfo as any).points_count || (collectionInfo as any).vectors_count || 0 + + return pointsCount > 0 + } catch (error) { + console.warn(`[QdrantVectorStore] Error checking if collection has data:`, error) + return false + } + } }