Skip to content

Commit ea2df0d

Browse files
committed
Implements lazy (on-demand) creation of Qdrant collections.
Key points: Defer collection creation until first write Avoid allocating resources for unused branches Improve error handling on collection access Add promise-based locking to prevent race conditions Optimize vector dimension validation Reduce memory overhead through demand-driven initialization Suggested commit title: feat(qdrant): add lazy collection initialization with concurrency safety Optional extended description: Introduces on-demand collection creation in Qdrant. Collections are created only upon first write, preventing unnecessary allocation for inactive branches. Adds a promise-based lock to avoid concurrent creation races, improves error reporting when accessing missing collections, and streamlines vector dimension validation. This reduces memory usage and improves runtime efficiency. Smart re-indexing: only do full scan if collection doesn't exist or is empty
1 parent a3b6258 commit ea2df0d

File tree

2 files changed

+180
-54
lines changed

2 files changed

+180
-54
lines changed

src/services/code-index/manager.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,6 @@ export class CodeIndexManager {
163163
// Re-check Git branch watcher after services were recreated
164164
await this._setupGitHeadWatcher()
165165

166-
167166
// 5. Handle Indexing Start/Restart
168167
// The enhanced vectorStore.initialize() in startIndexing() now handles dimension changes automatically
169168
// by detecting incompatible collections and recreating them, so we rely on that for dimension changes
@@ -430,14 +429,24 @@ export class CodeIndexManager {
430429
if (newBranch === this._lastKnownBranch) return
431430
this._lastKnownBranch = newBranch
432431
await this._recreateServices()
433-
this._orchestrator?.startIndexing()
432+
433+
// Smart re-indexing: only do full scan if collection doesn't exist or is empty
434+
// If collection exists with data, file watcher will handle incremental updates
435+
const collectionExists = await this._serviceFactory?.getVectorStore()?.collectionExists()
436+
if (!collectionExists) {
437+
// New branch or first time indexing this branch - do full scan
438+
this._orchestrator?.startIndexing()
439+
} else {
440+
// Collection exists - just validate/initialize without full scan
441+
// File watcher will detect any file changes from the branch switch
442+
await this._serviceFactory?.getVectorStore()?.initialize()
443+
}
434444
} catch (error) {
435445
console.error("Failed to handle Git branch change:", error)
436446
}
437447
}, 250)
438448
}
439449

440-
441450
/**
442451
* Handle code index settings changes.
443452
* This method should be called when code index settings are updated
@@ -486,7 +495,6 @@ export class CodeIndexManager {
486495
throw error
487496
}
488497
}
489-
490498
}
491499
}
492500
}

src/services/code-index/vector-store/qdrant-client.ts

Lines changed: 168 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ export class QdrantVectorStore implements IVectorStore {
1919
private readonly qdrantUrl: string = "http://localhost:6333"
2020
private readonly workspacePath: string
2121

22+
// Lazy collection creation flag
23+
private _collectionEnsured = false
24+
private _ensurePromise?: Promise<void>
25+
2226
/**
2327
* Creates a new Qdrant vector store
2428
* @param workspacePath Path to the workspace
@@ -166,69 +170,110 @@ export class QdrantVectorStore implements IVectorStore {
166170
try {
167171
const collectionInfo = await this.client.getCollection(this.collectionName)
168172
return collectionInfo
169-
} catch (error: unknown) {
170-
if (error instanceof Error) {
171-
console.warn(
172-
`[QdrantVectorStore] Warning during getCollectionInfo for "${this.collectionName}". Collection may not exist or another error occurred:`,
173-
error.message,
174-
)
173+
} catch (error: any) {
174+
// Check if this is a "not found" error (404) vs a connection error
175+
const status = error?.status || error?.response?.status || error?.statusCode
176+
177+
if (status === 404) {
178+
// Collection doesn't exist - this is expected, return null
179+
return null
175180
}
176-
return null
181+
182+
// For other errors (connection issues, server errors, etc.), log and re-throw
183+
const errorMessage = error?.message || String(error)
184+
console.error(`[QdrantVectorStore] Error accessing collection "${this.collectionName}":`, errorMessage, {
185+
status,
186+
})
187+
188+
// Re-throw connection/server errors instead of silently returning null
189+
throw new Error(`Failed to access Qdrant collection "${this.collectionName}": ${errorMessage}`)
177190
}
178191
}
179192

180193
/**
181-
* Initializes the vector store
194+
* Helper method to create or validate collection with proper dimension checking.
195+
* Extracted to eliminate code duplication between initialize() and _ensureCollectionExists().
182196
* @returns Promise resolving to boolean indicating if a new collection was created
183197
*/
184-
async initialize(): Promise<boolean> {
198+
private async _createOrValidateCollection(): Promise<boolean> {
185199
let created = false
186-
try {
187-
const collectionInfo = await this.getCollectionInfo()
200+
const collectionInfo = await this.getCollectionInfo()
188201

189-
if (collectionInfo === null) {
190-
// Collection info not retrieved (assume not found or inaccessible), create it
191-
await this.client.createCollection(this.collectionName, {
192-
vectors: {
193-
size: this.vectorSize,
194-
distance: this.DISTANCE_METRIC,
195-
on_disk: true,
196-
},
197-
hnsw_config: {
198-
m: 64,
199-
ef_construct: 512,
200-
on_disk: true,
201-
},
202-
})
203-
created = true
202+
if (collectionInfo === null) {
203+
// Collection doesn't exist, create it
204+
console.log(`[QdrantVectorStore] Creating new collection "${this.collectionName}"...`)
205+
await this.client.createCollection(this.collectionName, {
206+
vectors: {
207+
size: this.vectorSize,
208+
distance: this.DISTANCE_METRIC,
209+
on_disk: true,
210+
},
211+
hnsw_config: {
212+
m: 64,
213+
ef_construct: 512,
214+
on_disk: true,
215+
},
216+
})
217+
await this._createPayloadIndexes()
218+
console.log(`[QdrantVectorStore] Successfully created collection "${this.collectionName}"`)
219+
created = true
220+
} else {
221+
// Collection exists, validate vector size
222+
console.log(`[QdrantVectorStore] Collection "${this.collectionName}" already exists, validating...`)
223+
const vectorsConfig = collectionInfo.config?.params?.vectors
224+
let existingVectorSize: number
225+
226+
if (typeof vectorsConfig === "number") {
227+
existingVectorSize = vectorsConfig
228+
} else if (
229+
vectorsConfig &&
230+
typeof vectorsConfig === "object" &&
231+
"size" in vectorsConfig &&
232+
typeof vectorsConfig.size === "number"
233+
) {
234+
existingVectorSize = vectorsConfig.size
204235
} else {
205-
// Collection exists, check vector size
206-
const vectorsConfig = collectionInfo.config?.params?.vectors
207-
let existingVectorSize: number
208-
209-
if (typeof vectorsConfig === "number") {
210-
existingVectorSize = vectorsConfig
211-
} else if (
212-
vectorsConfig &&
213-
typeof vectorsConfig === "object" &&
214-
"size" in vectorsConfig &&
215-
typeof vectorsConfig.size === "number"
216-
) {
217-
existingVectorSize = vectorsConfig.size
218-
} else {
219-
existingVectorSize = 0 // Fallback for unknown configuration
220-
}
236+
existingVectorSize = 0
237+
}
221238

222-
if (existingVectorSize === this.vectorSize) {
223-
created = false // Exists and correct
224-
} else {
225-
// Exists but wrong vector size, recreate with enhanced error handling
226-
created = await this._recreateCollectionWithNewDimension(existingVectorSize)
227-
}
239+
if (existingVectorSize !== this.vectorSize && existingVectorSize !== 0) {
240+
// Dimension mismatch, recreate
241+
console.warn(
242+
`[QdrantVectorStore] Dimension mismatch for "${this.collectionName}": expected ${this.vectorSize}, found ${existingVectorSize}. Recreating...`,
243+
)
244+
created = await this._recreateCollectionWithNewDimension(existingVectorSize)
245+
await this._createPayloadIndexes()
246+
} else {
247+
console.log(`[QdrantVectorStore] Collection "${this.collectionName}" validated successfully`)
228248
}
249+
}
250+
251+
return created
252+
}
253+
254+
/**
255+
* Initializes the vector store by eagerly creating or validating the collection.
256+
*
257+
* This method is called by the orchestrator before full workspace scans to ensure
258+
* the collection exists upfront. For file-watcher-only workflows, collection creation
259+
* is deferred to _ensureCollectionExists() (lazy creation) on first write.
260+
*
261+
* When to use:
262+
* - initialize(): Called before full scans; creates collection eagerly
263+
* - _ensureCollectionExists(): Called on first write; creates collection lazily
264+
*
265+
* @returns Promise resolving to boolean indicating if a new collection was created
266+
* @throws {Error} If collection creation fails or Qdrant connection fails
267+
* @throws {Error} If vector dimension mismatch cannot be resolved
268+
*/
269+
async initialize(): Promise<boolean> {
270+
try {
271+
// Use shared helper to create or validate collection
272+
const created = await this._createOrValidateCollection()
273+
274+
// Mark collection as ensured since we just created/validated it
275+
this._collectionEnsured = true
229276

230-
// Create payload indexes
231-
await this._createPayloadIndexes()
232277
return created
233278
} catch (error: any) {
234279
const errorMessage = error?.message || error
@@ -349,6 +394,63 @@ export class QdrantVectorStore implements IVectorStore {
349394
}
350395
}
351396

397+
/**
398+
* Ensures the collection exists before writing.
399+
* Creates the collection and indexes lazily on first write.
400+
* Uses promise-based locking to prevent race conditions from concurrent calls.
401+
*
402+
* This method is called by upsertPoints() to implement lazy collection creation.
403+
* Unlike initialize(), which eagerly creates collections for full scans, this method
404+
* defers creation until the first write operation, reducing storage overhead for
405+
* branches that are never indexed.
406+
*
407+
* @throws {Error} If collection creation fails or Qdrant connection fails
408+
* @throws {Error} If vector dimension mismatch cannot be resolved
409+
*/
410+
private async _ensureCollectionExists(): Promise<void> {
411+
if (this._collectionEnsured) return
412+
413+
// Prevent concurrent calls - return existing promise if already in progress
414+
if (this._ensurePromise) {
415+
return this._ensurePromise
416+
}
417+
418+
// Create and store the ensure promise
419+
this._ensurePromise = (async () => {
420+
try {
421+
// Use shared helper to create or validate collection
422+
await this._createOrValidateCollection()
423+
424+
// Only set flag on success
425+
this._collectionEnsured = true
426+
} catch (error: any) {
427+
// Reset promise on error so next call can retry
428+
this._ensurePromise = undefined
429+
430+
const errorMessage = error?.message || error
431+
console.error(
432+
`[QdrantVectorStore] Failed to ensure collection "${this.collectionName}" exists:`,
433+
errorMessage,
434+
)
435+
436+
// If this is already a vector dimension mismatch error, re-throw as-is
437+
if (error instanceof Error && error.cause !== undefined) {
438+
throw error
439+
}
440+
441+
// Otherwise, provide a user-friendly error message
442+
throw new Error(
443+
t("embeddings:vectorStore.qdrantConnectionFailed", { qdrantUrl: this.qdrantUrl, errorMessage }),
444+
)
445+
} finally {
446+
// Clear promise after completion (success or failure)
447+
this._ensurePromise = undefined
448+
}
449+
})()
450+
451+
return this._ensurePromise
452+
}
453+
352454
/**
353455
* Upserts points into the vector store
354456
* @param points Array of points to upsert
@@ -361,6 +463,9 @@ export class QdrantVectorStore implements IVectorStore {
361463
}>,
362464
): Promise<void> {
363465
try {
466+
// Ensure collection exists before writing
467+
await this._ensureCollectionExists()
468+
364469
const processedPoints = points.map((point) => {
365470
if (point.payload?.filePath) {
366471
const segments = point.payload.filePath.split(path.sep).filter(Boolean)
@@ -421,6 +526,12 @@ export class QdrantVectorStore implements IVectorStore {
421526
maxResults?: number,
422527
): Promise<VectorStoreSearchResult[]> {
423528
try {
529+
// If collection doesn't exist yet, return empty results
530+
const collectionInfo = await this.getCollectionInfo()
531+
if (collectionInfo === null) {
532+
return []
533+
}
534+
424535
let filter = undefined
425536

426537
if (directoryPrefix) {
@@ -563,6 +674,13 @@ export class QdrantVectorStore implements IVectorStore {
563674
*/
564675
async clearCollection(): Promise<void> {
565676
try {
677+
// Only clear if collection exists
678+
const exists = await this.collectionExists()
679+
if (!exists) {
680+
console.warn(`[QdrantVectorStore] Skipping clear - collection "${this.collectionName}" does not exist`)
681+
return
682+
}
683+
566684
await this.client.delete(this.collectionName, {
567685
filter: {
568686
must: [],

0 commit comments

Comments
 (0)