Skip to content

Commit da4c8ec

Browse files
committed
fix: prevent duplicated codebase indexing on VSCode restart
- Add hasData() method to IVectorStore interface and QdrantVectorStore implementation - Check for existing index data in orchestrator before performing full scan - Update manager initialization logic to only trigger indexing when necessary - Skip re-indexing if valid index with data already exists Fixes #5941
1 parent 64d35dc commit da4c8ec

File tree

7 files changed

+51
-9
lines changed

7 files changed

+51
-9
lines changed

src/services/code-index/__tests__/manager.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ describe("CodeIndexManager - handleSettingsChange regression", () => {
249249
beforeEach(() => {
250250
// Mock service factory objects
251251
mockEmbedder = { embedderInfo: { name: "openai" } }
252-
mockVectorStore = {}
252+
mockVectorStore = { hasData: vi.fn().mockResolvedValue(false) }
253253
mockScanner = {}
254254
mockFileWatcher = {
255255
onDidStartBatchProcessing: vi.fn(),

src/services/code-index/interfaces/vector-store.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,12 @@ export interface IVectorStore {
6262
* @returns Promise resolving to boolean indicating if the collection exists
6363
*/
6464
collectionExists(): Promise<boolean>
65+
66+
/**
67+
* Checks if the collection has any data (points)
68+
* @returns Promise resolving to boolean indicating if the collection has data
69+
*/
70+
hasData(): Promise<boolean>
6571
}
6672

6773
export interface VectorStoreSearchResult {

src/services/code-index/manager.ts

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -142,14 +142,15 @@ export class CodeIndexManager {
142142
}
143143

144144
// 5. Handle Indexing Start/Restart
145-
// The enhanced vectorStore.initialize() in startIndexing() now handles dimension changes automatically
146-
// by detecting incompatible collections and recreating them, so we rely on that for dimension changes
147-
const shouldStartOrRestartIndexing =
148-
requiresRestart ||
149-
(needsServiceRecreation && (!this._orchestrator || this._orchestrator.state !== "Indexing"))
150-
151-
if (shouldStartOrRestartIndexing) {
152-
this._orchestrator?.startIndexing() // This method is async, but we don't await it here
145+
// Only start indexing if:
146+
// - Configuration requires restart (settings changed)
147+
// - Or this is the first initialization (no orchestrator exists yet)
148+
// The orchestrator's startIndexing method will check if an existing index exists
149+
// and skip the full scan if data is already present
150+
const shouldStartIndexing = requiresRestart || !this._orchestrator
151+
152+
if (shouldStartIndexing && this._orchestrator) {
153+
this._orchestrator.startIndexing() // This method is async, but we don't await it here
153154
}
154155

155156
return { requiresRestart }

src/services/code-index/orchestrator.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,17 @@ export class CodeIndexOrchestrator {
122122
await this.cacheManager.clearCacheFile()
123123
}
124124

125+
// Check if we have an existing index with data
126+
const hasExistingData = !collectionCreated && (await this.vectorStore.hasData())
127+
128+
if (hasExistingData) {
129+
console.log("[CodeIndexOrchestrator] Existing index found with data. Skipping initial scan.")
130+
// Start the file watcher without doing a full scan
131+
await this._startWatcher()
132+
this.stateManager.setSystemState("Indexed", "Using existing index. File watcher started.")
133+
return
134+
}
135+
125136
this.stateManager.setSystemState("Indexing", "Services ready. Starting workspace scan...")
126137

127138
let cumulativeBlocksIndexed = 0

src/services/code-index/processors/__tests__/file-watcher.spec.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ describe("FileWatcher", () => {
118118
upsertPoints: vi.fn().mockResolvedValue(undefined),
119119
deletePointsByFilePath: vi.fn().mockResolvedValue(undefined),
120120
deletePointsByMultipleFilePaths: vi.fn().mockResolvedValue(undefined),
121+
hasData: vi.fn().mockResolvedValue(false),
121122
}
122123

123124
mockIgnoreInstance = {

src/services/code-index/processors/__tests__/scanner.spec.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ describe("DirectoryScanner", () => {
8888
clearCollection: vi.fn().mockResolvedValue(undefined),
8989
deleteCollection: vi.fn().mockResolvedValue(undefined),
9090
collectionExists: vi.fn().mockResolvedValue(true),
91+
hasData: vi.fn().mockResolvedValue(false),
9192
}
9293
mockCodeParser = {
9394
parseFile: vi.fn().mockResolvedValue([]),

src/services/code-index/vector-store/qdrant-client.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,4 +488,26 @@ export class QdrantVectorStore implements IVectorStore {
488488
const collectionInfo = await this.getCollectionInfo()
489489
return collectionInfo !== null
490490
}
491+
492+
/**
493+
* Checks if the collection has any data (points)
494+
* @returns Promise resolving to boolean indicating if the collection has data
495+
*/
496+
async hasData(): Promise<boolean> {
497+
try {
498+
const collectionInfo = await this.getCollectionInfo()
499+
if (!collectionInfo) {
500+
return false
501+
}
502+
503+
// Check if the collection has any points
504+
// The collection info includes points_count or vectors_count depending on the version
505+
const pointsCount = (collectionInfo as any).points_count || (collectionInfo as any).vectors_count || 0
506+
507+
return pointsCount > 0
508+
} catch (error) {
509+
console.warn(`[QdrantVectorStore] Error checking if collection has data:`, error)
510+
return false
511+
}
512+
}
491513
}

0 commit comments

Comments
 (0)