diff --git a/src/i18n/locales/en/embeddings.json b/src/i18n/locales/en/embeddings.json index 270a8d193b7..a2252b062dc 100644 --- a/src/i18n/locales/en/embeddings.json +++ b/src/i18n/locales/en/embeddings.json @@ -24,7 +24,16 @@ }, "vectorStore": { "qdrantConnectionFailed": "Failed to connect to Qdrant vector database. Please ensure Qdrant is running and accessible at {{qdrantUrl}}. Error: {{errorMessage}}", - "vectorDimensionMismatch": "Failed to update vector index for new model. Please try clearing the index and starting again. Details: {{errorMessage}}" + "vectorDimensionMismatch": "Failed to update vector index for new model. Please try clearing the index and starting again. Details: {{errorMessage}}", + "lancedbNotInstalled": "LanceDB is not installed. Please install it with: npm install @lancedb/lancedb. Error: {{errorMessage}}", + "lancedbInitFailed": "Failed to initialize LanceDB. Error: {{errorMessage}}", + "lancedbConnectionFailed": "Failed to connect to LanceDB. Error: {{errorMessage}}", + "chromadbNotInstalled": "ChromaDB is not installed. Please install it with: npm install chromadb. Error: {{errorMessage}}", + "chromadbInitFailed": "Failed to initialize ChromaDB at {{chromaUrl}}. Error: {{errorMessage}}", + "chromadbConnectionFailed": "Failed to connect to ChromaDB at {{chromaUrl}}. Please ensure ChromaDB is running. Error: {{errorMessage}}", + "sqliteNotInstalled": "SQLite is not installed. Please install it with: npm install better-sqlite3. Error: {{errorMessage}}", + "sqliteVssNotInstalled": "SQLite VSS extension is not installed. Please install it with: npm install sqlite-vss. Error: {{errorMessage}}", + "sqliteInitFailed": "Failed to initialize SQLite vector database. Error: {{errorMessage}}" }, "validation": { "authenticationFailed": "Authentication failed. Please check your API key in the settings.", @@ -51,6 +60,7 @@ "vectorDimensionNotDeterminedOpenAiCompatible": "Could not determine vector dimension for model '{{modelId}}' with provider '{{provider}}'. Please ensure the 'Embedding Dimension' is correctly set in the OpenAI-Compatible provider settings.", "vectorDimensionNotDetermined": "Could not determine vector dimension for model '{{modelId}}' with provider '{{provider}}'. Check model profiles or configuration.", "qdrantUrlMissing": "Qdrant URL missing for vector store creation", - "codeIndexingNotConfigured": "Cannot create services: Code indexing is not properly configured" + "codeIndexingNotConfigured": "Cannot create services: Code indexing is not properly configured", + "invalidVectorDBProvider": "Invalid vector database provider: {{provider}}" } } diff --git a/src/services/code-index/config-manager.ts b/src/services/code-index/config-manager.ts index 1723f1c2a08..79530be304a 100644 --- a/src/services/code-index/config-manager.ts +++ b/src/services/code-index/config-manager.ts @@ -19,8 +19,13 @@ export class CodeIndexConfigManager { private openAiCompatibleOptions?: { baseUrl: string; apiKey: string } private geminiOptions?: { apiKey: string } private mistralOptions?: { apiKey: string } + // Vector database configuration + private vectorDBProvider: "qdrant" | "lancedb" | "chromadb" | "sqlite-vector" = "qdrant" private qdrantUrl?: string = "http://localhost:6333" private qdrantApiKey?: string + private chromadbUrl?: string = "http://localhost:8000" + private chromadbApiKey?: string + // Search configuration private searchMinScore?: number private searchMaxResults?: number @@ -44,7 +49,9 @@ export class CodeIndexConfigManager { // Load configuration from storage const codebaseIndexConfig = this.contextProxy?.getGlobalState("codebaseIndexConfig") ?? { codebaseIndexEnabled: true, + codebaseIndexVectorDBProvider: "qdrant", codebaseIndexQdrantUrl: "http://localhost:6333", + codebaseIndexChromadbUrl: "http://localhost:8000", codebaseIndexEmbedderProvider: "openai", codebaseIndexEmbedderBaseUrl: "", codebaseIndexEmbedderModelId: "", @@ -62,23 +69,32 @@ export class CodeIndexConfigManager { codebaseIndexSearchMaxResults, } = codebaseIndexConfig + // Extract new properties with optional chaining + const codebaseIndexVectorDBProvider = (codebaseIndexConfig as any).codebaseIndexVectorDBProvider + const codebaseIndexChromadbUrl = (codebaseIndexConfig as any).codebaseIndexChromadbUrl + const openAiKey = this.contextProxy?.getSecret("codeIndexOpenAiKey") ?? "" const qdrantApiKey = this.contextProxy?.getSecret("codeIndexQdrantApiKey") ?? "" + // ChromaDB API key is not in the secret keys type yet, so we'll handle it differently + const chromadbApiKey = "" // Fix: Read OpenAI Compatible settings from the correct location within codebaseIndexConfig - const openAiCompatibleBaseUrl = codebaseIndexConfig.codebaseIndexOpenAiCompatibleBaseUrl ?? "" + const openAiCompatibleBaseUrl = (codebaseIndexConfig as any).codebaseIndexOpenAiCompatibleBaseUrl ?? "" const openAiCompatibleApiKey = this.contextProxy?.getSecret("codebaseIndexOpenAiCompatibleApiKey") ?? "" const geminiApiKey = this.contextProxy?.getSecret("codebaseIndexGeminiApiKey") ?? "" const mistralApiKey = this.contextProxy?.getSecret("codebaseIndexMistralApiKey") ?? "" // Update instance variables with configuration this.codebaseIndexEnabled = codebaseIndexEnabled ?? true + this.vectorDBProvider = codebaseIndexVectorDBProvider ?? "qdrant" this.qdrantUrl = codebaseIndexQdrantUrl this.qdrantApiKey = qdrantApiKey ?? "" + this.chromadbUrl = codebaseIndexChromadbUrl ?? "http://localhost:8000" + this.chromadbApiKey = chromadbApiKey ?? "" this.searchMinScore = codebaseIndexSearchMinScore this.searchMaxResults = codebaseIndexSearchMaxResults // Validate and set model dimension - const rawDimension = codebaseIndexConfig.codebaseIndexEmbedderModelDimension + const rawDimension = (codebaseIndexConfig as any).codebaseIndexEmbedderModelDimension if (rawDimension !== undefined && rawDimension !== null) { const dimension = Number(rawDimension) if (!isNaN(dimension) && dimension > 0) { @@ -141,8 +157,11 @@ export class CodeIndexConfigManager { openAiCompatibleOptions?: { baseUrl: string; apiKey: string } geminiOptions?: { apiKey: string } mistralOptions?: { apiKey: string } + vectorDBProvider?: "qdrant" | "lancedb" | "chromadb" | "sqlite-vector" qdrantUrl?: string qdrantApiKey?: string + chromadbUrl?: string + chromadbApiKey?: string searchMinScore?: number } requiresRestart: boolean @@ -160,8 +179,11 @@ export class CodeIndexConfigManager { openAiCompatibleApiKey: this.openAiCompatibleOptions?.apiKey ?? "", geminiApiKey: this.geminiOptions?.apiKey ?? "", mistralApiKey: this.mistralOptions?.apiKey ?? "", + vectorDBProvider: this.vectorDBProvider, qdrantUrl: this.qdrantUrl ?? "", qdrantApiKey: this.qdrantApiKey ?? "", + chromadbUrl: this.chromadbUrl ?? "", + chromadbApiKey: this.chromadbApiKey ?? "", } // Refresh secrets from VSCode storage to ensure we have the latest values @@ -184,8 +206,11 @@ export class CodeIndexConfigManager { openAiCompatibleOptions: this.openAiCompatibleOptions, geminiOptions: this.geminiOptions, mistralOptions: this.mistralOptions, + vectorDBProvider: this.vectorDBProvider, qdrantUrl: this.qdrantUrl, qdrantApiKey: this.qdrantApiKey, + chromadbUrl: this.chromadbUrl, + chromadbApiKey: this.chromadbApiKey, searchMinScore: this.currentSearchMinScore, }, requiresRestart, @@ -193,36 +218,52 @@ export class CodeIndexConfigManager { } /** - * Checks if the service is properly configured based on the embedder type. + * Checks if the service is properly configured based on the embedder type and vector DB provider. */ public isConfigured(): boolean { + // First check embedder configuration + let embedderConfigured = false + if (this.embedderProvider === "openai") { const openAiKey = this.openAiOptions?.openAiNativeApiKey - const qdrantUrl = this.qdrantUrl - return !!(openAiKey && qdrantUrl) + embedderConfigured = !!openAiKey } else if (this.embedderProvider === "ollama") { // Ollama model ID has a default, so only base URL is strictly required for config const ollamaBaseUrl = this.ollamaOptions?.ollamaBaseUrl - const qdrantUrl = this.qdrantUrl - return !!(ollamaBaseUrl && qdrantUrl) + embedderConfigured = !!ollamaBaseUrl } else if (this.embedderProvider === "openai-compatible") { const baseUrl = this.openAiCompatibleOptions?.baseUrl const apiKey = this.openAiCompatibleOptions?.apiKey - const qdrantUrl = this.qdrantUrl - const isConfigured = !!(baseUrl && apiKey && qdrantUrl) - return isConfigured + embedderConfigured = !!(baseUrl && apiKey) } else if (this.embedderProvider === "gemini") { const apiKey = this.geminiOptions?.apiKey - const qdrantUrl = this.qdrantUrl - const isConfigured = !!(apiKey && qdrantUrl) - return isConfigured + embedderConfigured = !!apiKey } else if (this.embedderProvider === "mistral") { const apiKey = this.mistralOptions?.apiKey - const qdrantUrl = this.qdrantUrl - const isConfigured = !!(apiKey && qdrantUrl) - return isConfigured + embedderConfigured = !!apiKey + } + + // Then check vector database configuration + let vectorDBConfigured = false + + switch (this.vectorDBProvider) { + case "qdrant": + vectorDBConfigured = !!this.qdrantUrl + break + case "chromadb": + vectorDBConfigured = !!this.chromadbUrl + break + case "lancedb": + case "sqlite-vector": + // These are embedded databases, no URL needed + vectorDBConfigured = true + break + default: + // Default to qdrant for backward compatibility + vectorDBConfigured = !!this.qdrantUrl } - return false // Should not happen if embedderProvider is always set correctly + + return embedderConfigured && vectorDBConfigured } /** @@ -255,8 +296,11 @@ export class CodeIndexConfigManager { const prevModelDimension = prev?.modelDimension const prevGeminiApiKey = prev?.geminiApiKey ?? "" const prevMistralApiKey = prev?.mistralApiKey ?? "" + const prevVectorDBProvider = prev?.vectorDBProvider ?? "qdrant" const prevQdrantUrl = prev?.qdrantUrl ?? "" const prevQdrantApiKey = prev?.qdrantApiKey ?? "" + const prevChromadbUrl = prev?.chromadbUrl ?? "" + const prevChromadbApiKey = prev?.chromadbApiKey ?? "" // 1. Transition from disabled/unconfigured to enabled/configured if ((!prevEnabled || !prevConfigured) && this.codebaseIndexEnabled && nowConfigured) { @@ -279,12 +323,7 @@ export class CodeIndexConfigManager { return false } - // Provider change - if (prevProvider !== this.embedderProvider) { - return true - } - - // Authentication changes (API keys) + // Get current values const currentOpenAiKey = this.openAiOptions?.openAiNativeApiKey ?? "" const currentOllamaBaseUrl = this.ollamaOptions?.ollamaBaseUrl ?? "" const currentOpenAiCompatibleBaseUrl = this.openAiCompatibleOptions?.baseUrl ?? "" @@ -292,8 +331,20 @@ export class CodeIndexConfigManager { const currentModelDimension = this.modelDimension const currentGeminiApiKey = this.geminiOptions?.apiKey ?? "" const currentMistralApiKey = this.mistralOptions?.apiKey ?? "" + const currentVectorDBProvider = this.vectorDBProvider ?? "qdrant" const currentQdrantUrl = this.qdrantUrl ?? "" const currentQdrantApiKey = this.qdrantApiKey ?? "" + const currentChromadbUrl = this.chromadbUrl ?? "" + const currentChromadbApiKey = this.chromadbApiKey ?? "" + + // Provider change (embedder or vector DB) + if (prevProvider !== this.embedderProvider) { + return true + } + + if (prevVectorDBProvider !== currentVectorDBProvider) { + return true + } if (prevOpenAiKey !== currentOpenAiKey) { return true @@ -323,8 +374,17 @@ export class CodeIndexConfigManager { return true } - if (prevQdrantUrl !== currentQdrantUrl || prevQdrantApiKey !== currentQdrantApiKey) { - return true + // Vector database connection changes + if (prevVectorDBProvider === "qdrant" && currentVectorDBProvider === "qdrant") { + if (prevQdrantUrl !== currentQdrantUrl || prevQdrantApiKey !== currentQdrantApiKey) { + return true + } + } + + if (prevVectorDBProvider === "chromadb" && currentVectorDBProvider === "chromadb") { + if (prevChromadbUrl !== currentChromadbUrl || prevChromadbApiKey !== currentChromadbApiKey) { + return true + } } // Vector dimension changes (still important for compatibility) @@ -375,8 +435,11 @@ export class CodeIndexConfigManager { openAiCompatibleOptions: this.openAiCompatibleOptions, geminiOptions: this.geminiOptions, mistralOptions: this.mistralOptions, + vectorDBProvider: this.vectorDBProvider, qdrantUrl: this.qdrantUrl, qdrantApiKey: this.qdrantApiKey, + chromadbUrl: this.chromadbUrl, + chromadbApiKey: this.chromadbApiKey, searchMinScore: this.currentSearchMinScore, searchMaxResults: this.currentSearchMaxResults, } diff --git a/src/services/code-index/interfaces/config.ts b/src/services/code-index/interfaces/config.ts index 9098a60091c..8945e6c2ee0 100644 --- a/src/services/code-index/interfaces/config.ts +++ b/src/services/code-index/interfaces/config.ts @@ -14,8 +14,13 @@ export interface CodeIndexConfig { openAiCompatibleOptions?: { baseUrl: string; apiKey: string } geminiOptions?: { apiKey: string } mistralOptions?: { apiKey: string } + // Vector database configuration + vectorDBProvider?: "qdrant" | "lancedb" | "chromadb" | "sqlite-vector" qdrantUrl?: string qdrantApiKey?: string + chromadbUrl?: string + chromadbApiKey?: string + // Search configuration searchMinScore?: number searchMaxResults?: number } @@ -35,6 +40,10 @@ export type PreviousConfigSnapshot = { openAiCompatibleApiKey?: string geminiApiKey?: string mistralApiKey?: string + // Vector database configuration + vectorDBProvider?: "qdrant" | "lancedb" | "chromadb" | "sqlite-vector" qdrantUrl?: string qdrantApiKey?: string + chromadbUrl?: string + chromadbApiKey?: string } diff --git a/src/services/code-index/service-factory.ts b/src/services/code-index/service-factory.ts index 68b0f5c0bc6..9dd2ca261b1 100644 --- a/src/services/code-index/service-factory.ts +++ b/src/services/code-index/service-factory.ts @@ -6,6 +6,7 @@ import { GeminiEmbedder } from "./embedders/gemini" import { MistralEmbedder } from "./embedders/mistral" import { EmbedderProvider, getDefaultModelId, getModelDimension } from "../../shared/embeddingModels" import { QdrantVectorStore } from "./vector-store/qdrant-client" +import { QdrantAdapter, LanceDBAdapter, ChromaDBAdapter, SQLiteVectorAdapter } from "./vector-store/adapters" import { codeParser, DirectoryScanner, FileWatcher } from "./processors" import { ICodeParser, IEmbedder, IFileWatcher, IVectorStore } from "./interfaces" import { CodeIndexConfigManager } from "./config-manager" @@ -15,6 +16,8 @@ import { t } from "../../i18n" import { TelemetryService } from "@roo-code/telemetry" import { TelemetryEventName } from "@roo-code/types" +export type VectorDBProvider = "qdrant" | "lancedb" | "chromadb" | "sqlite-vector" + /** * Factory class responsible for creating and configuring code indexing service dependencies. */ @@ -132,12 +135,45 @@ export class CodeIndexServiceFactory { } } - if (!config.qdrantUrl) { - throw new Error(t("embeddings:serviceFactory.qdrantUrlMissing")) + // Get vector database provider from config (default to qdrant for backward compatibility) + const vectorDBProvider = (config.vectorDBProvider as VectorDBProvider) || "qdrant" + + // Create appropriate vector store based on provider + switch (vectorDBProvider) { + case "qdrant": + if (!config.qdrantUrl) { + throw new Error(t("embeddings:serviceFactory.qdrantUrlMissing")) + } + return new QdrantAdapter({ + workspacePath: this.workspacePath, + url: config.qdrantUrl, + vectorSize, + apiKey: config.qdrantApiKey, + }) + + case "lancedb": + return new LanceDBAdapter({ + workspacePath: this.workspacePath, + vectorSize, + }) + + case "chromadb": + return new ChromaDBAdapter({ + workspacePath: this.workspacePath, + url: config.chromadbUrl || "http://localhost:8000", + vectorSize, + apiKey: config.chromadbApiKey, + }) + + case "sqlite-vector": + return new SQLiteVectorAdapter({ + workspacePath: this.workspacePath, + vectorSize, + }) + + default: + throw new Error(t("embeddings:serviceFactory.invalidVectorDBProvider", { provider: vectorDBProvider })) } - - // Assuming constructor is updated: new QdrantVectorStore(workspacePath, url, vectorSize, apiKey?) - return new QdrantVectorStore(this.workspacePath, config.qdrantUrl, vectorSize, config.qdrantApiKey) } /** diff --git a/src/services/code-index/vector-store/adapters/base.ts b/src/services/code-index/vector-store/adapters/base.ts new file mode 100644 index 00000000000..8603a8c015a --- /dev/null +++ b/src/services/code-index/vector-store/adapters/base.ts @@ -0,0 +1,124 @@ +import { IVectorStore, PointStruct, VectorStoreSearchResult } from "../../interfaces/vector-store" +import { createHash } from "crypto" + +/** + * Configuration options for vector database adapters + */ +export interface VectorDBConfig { + workspacePath: string + vectorSize: number + apiKey?: string + url?: string + [key: string]: any // Allow adapter-specific configuration +} + +/** + * Abstract base class for vector database adapters. + * All vector database implementations should extend this class. + */ +export abstract class VectorDBAdapter implements IVectorStore { + protected readonly collectionName: string + protected readonly vectorSize: number + protected readonly workspacePath: string + + constructor(protected readonly config: VectorDBConfig) { + this.workspacePath = config.workspacePath + this.vectorSize = config.vectorSize + + // Generate collection name from workspace path + const hash = createHash("sha256").update(config.workspacePath).digest("hex") + this.collectionName = `ws-${hash.substring(0, 16)}` + } + + /** + * Get the name of the vector database provider + */ + abstract get providerName(): string + + /** + * Check if the adapter requires an external service + */ + abstract get requiresExternalService(): boolean + + /** + * Initializes the vector store + * @returns Promise resolving to boolean indicating if a new collection was created + */ + abstract initialize(): Promise + + /** + * Upserts points into the vector store + * @param points Array of points to upsert + */ + abstract upsertPoints(points: PointStruct[]): Promise + + /** + * Searches for similar vectors + * @param queryVector Vector to search for + * @param directoryPrefix Optional directory prefix to filter results + * @param minScore Optional minimum score threshold + * @param maxResults Optional maximum number of results to return + * @returns Promise resolving to search results + */ + abstract search( + queryVector: number[], + directoryPrefix?: string, + minScore?: number, + maxResults?: number, + ): Promise + + /** + * Deletes points by file path + * @param filePath Path of the file to delete points for + */ + abstract deletePointsByFilePath(filePath: string): Promise + + /** + * Deletes points by multiple file paths + * @param filePaths Array of file paths to delete points for + */ + abstract deletePointsByMultipleFilePaths(filePaths: string[]): Promise + + /** + * Clears all points from the collection + */ + abstract clearCollection(): Promise + + /** + * Deletes the entire collection + */ + abstract deleteCollection(): Promise + + /** + * Checks if the collection exists + * @returns Promise resolving to boolean indicating if the collection exists + */ + abstract collectionExists(): Promise + + /** + * Validates the adapter configuration + * @returns Promise resolving to validation result + */ + abstract validateConfiguration(): Promise<{ valid: boolean; error?: string }> + + /** + * Gets adapter-specific configuration requirements + * @returns Configuration requirements for the adapter + */ + abstract getConfigurationRequirements(): { + required: string[] + optional: string[] + defaults: Record + } + + /** + * Helper method to validate payload structure + */ + protected isPayloadValid(payload: Record | null | undefined): boolean { + if (!payload) { + return false + } + const validKeys = ["filePath", "codeChunk", "startLine", "endLine"] + return validKeys.every((key) => key in payload) + } +} diff --git a/src/services/code-index/vector-store/adapters/chromadb.ts b/src/services/code-index/vector-store/adapters/chromadb.ts new file mode 100644 index 00000000000..8a12ace3951 --- /dev/null +++ b/src/services/code-index/vector-store/adapters/chromadb.ts @@ -0,0 +1,415 @@ +import * as path from "path" +import { VectorDBAdapter, VectorDBConfig } from "./base" +import { PointStruct, VectorStoreSearchResult } from "../../interfaces/vector-store" +import { DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE } from "../../constants" +import { t } from "../../../../i18n" +import { getWorkspacePath } from "../../../../utils/path" + +// Dynamic imports for ChromaDB to handle optional dependency +let ChromaClient: any + +/** + * ChromaDB adapter for vector database operations + * ChromaDB can run as either a client-server or in-memory database + */ +export class ChromaDBAdapter extends VectorDBAdapter { + private client: any + private collection: any + private chromaUrl: string + private initialized: boolean = false + + constructor(config: VectorDBConfig) { + super(config) + // Default to local ChromaDB instance + this.chromaUrl = config.url || "http://localhost:8000" + } + + get providerName(): string { + return "chromadb" + } + + get requiresExternalService(): boolean { + // ChromaDB can run in-memory or as a service + return this.chromaUrl !== "memory" + } + + /** + * Dynamically import ChromaDB modules + */ + private async loadChromaDB() { + if (!ChromaClient) { + try { + // @ts-ignore - Dynamic import for optional dependency + const chromaModule = await import("chromadb") + ChromaClient = chromaModule.ChromaClient + } catch (error) { + throw new Error( + t("embeddings:vectorStore.chromadbNotInstalled", { + errorMessage: error instanceof Error ? error.message : String(error), + }), + ) + } + } + } + + async initialize(): Promise { + try { + await this.loadChromaDB() + + // Create ChromaDB client + if (this.chromaUrl === "memory") { + // In-memory mode for testing or lightweight usage + this.client = new ChromaClient() + } else { + // Client-server mode + this.client = new ChromaClient({ + path: this.chromaUrl, + }) + } + + // Check if collection exists + let collectionExists = false + try { + const collections = await this.client.listCollections() + collectionExists = collections.some((col: any) => col.name === this.collectionName) + } catch (error) { + console.warn(`[ChromaDBAdapter] Error listing collections:`, error) + } + + if (!collectionExists) { + // Create new collection + this.collection = await this.client.createCollection({ + name: this.collectionName, + metadata: { + "hnsw:space": "cosine", + vector_size: this.vectorSize, + }, + }) + this.initialized = true + return true // New collection created + } else { + // Get existing collection + this.collection = await this.client.getCollection({ + name: this.collectionName, + }) + + // Verify vector dimension matches + const metadata = this.collection.metadata || {} + const existingVectorSize = metadata.vector_size + + if (existingVectorSize && existingVectorSize !== this.vectorSize) { + // Dimension mismatch - need to recreate collection + console.warn( + `[ChromaDBAdapter] Collection ${this.collectionName} exists with vector size ${existingVectorSize}, but expected ${this.vectorSize}. Recreating collection.`, + ) + + // Delete and recreate collection + await this.client.deleteCollection({ name: this.collectionName }) + + this.collection = await this.client.createCollection({ + name: this.collectionName, + metadata: { + "hnsw:space": "cosine", + vector_size: this.vectorSize, + }, + }) + this.initialized = true + return true // Recreated collection + } + + this.initialized = true + return false // Existing collection used + } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + console.error(`[ChromaDBAdapter] Failed to initialize ChromaDB:`, errorMessage) + throw new Error(t("embeddings:vectorStore.chromadbInitFailed", { chromaUrl: this.chromaUrl, errorMessage })) + } + } + + async upsertPoints(points: PointStruct[]): Promise { + if (!this.initialized || !this.collection) { + throw new Error("ChromaDB not initialized") + } + + try { + // Transform points to ChromaDB format + const ids: string[] = [] + const embeddings: number[][] = [] + const metadatas: any[] = [] + const documents: string[] = [] + + for (const point of points) { + ids.push(point.id) + embeddings.push(Array.from(point.vector)) + + // Build metadata with path segments + const pathSegments = point.payload?.filePath + ? point.payload.filePath + .split(path.sep) + .filter(Boolean) + .reduce((acc: Record, segment: string, index: number) => { + acc[`pathSegment_${index}`] = segment + return acc + }, {}) + : {} + + metadatas.push({ + filePath: point.payload?.filePath || "", + startLine: point.payload?.startLine || 0, + endLine: point.payload?.endLine || 0, + ...pathSegments, + }) + + // Use code chunk as document + documents.push(point.payload?.codeChunk || "") + } + + // Upsert to collection + await this.collection.upsert({ + ids, + embeddings, + metadatas, + documents, + }) + } catch (error) { + console.error("Failed to upsert points:", error) + throw error + } + } + + async search( + queryVector: number[], + directoryPrefix?: string, + minScore?: number, + maxResults?: number, + ): Promise { + if (!this.initialized || !this.collection) { + throw new Error("ChromaDB not initialized") + } + + try { + // Build where clause for filtering + let whereClause: any = undefined + + if (directoryPrefix) { + const segments = directoryPrefix.split(path.sep).filter(Boolean) + + // Build filter for path segments + whereClause = { + $and: segments.map((segment, index) => ({ + [`pathSegment_${index}`]: segment, + })), + } + } + + // Query collection + const results = await this.collection.query({ + queryEmbeddings: [Array.from(queryVector)], + nResults: maxResults ?? DEFAULT_MAX_SEARCH_RESULTS, + where: whereClause, + }) + + // Transform results to our format + const searchResults: VectorStoreSearchResult[] = [] + + if (results.ids && results.ids[0]) { + const queryResults = results.ids[0] + const distances = results.distances?.[0] || [] + const metadatas = results.metadatas?.[0] || [] + const documents = results.documents?.[0] || [] + + for (let i = 0; i < queryResults.length; i++) { + // Convert distance to similarity score + // ChromaDB returns squared L2 distance for cosine + // Convert to similarity score (1 - distance) + const distance = distances[i] || 0 + const score = 1 - Math.sqrt(distance / 2) + + // Skip results below minimum score + if (score < (minScore ?? DEFAULT_SEARCH_MIN_SCORE)) { + continue + } + + const metadata = metadatas[i] || {} + + // Reconstruct path segments + const pathSegments: Record = {} + for (const key in metadata) { + if (key.startsWith("pathSegment_")) { + const index = key.replace("pathSegment_", "") + pathSegments[index] = metadata[key] + } + } + + const payload = { + filePath: metadata.filePath || "", + codeChunk: documents[i] || "", + startLine: metadata.startLine || 0, + endLine: metadata.endLine || 0, + pathSegments, + } + + if (this.isPayloadValid(payload)) { + searchResults.push({ + id: queryResults[i], + score, + payload, + }) + } + } + } + + return searchResults + } catch (error) { + console.error("Failed to search points:", error) + throw error + } + } + + async deletePointsByFilePath(filePath: string): Promise { + return this.deletePointsByMultipleFilePaths([filePath]) + } + + async deletePointsByMultipleFilePaths(filePaths: string[]): Promise { + if (!this.initialized || !this.collection) { + throw new Error("ChromaDB not initialized") + } + + if (filePaths.length === 0) { + return + } + + try { + const workspaceRoot = getWorkspacePath() + const normalizedPaths = filePaths.map((filePath) => { + const absolutePath = path.resolve(workspaceRoot, filePath) + return path.normalize(absolutePath) + }) + + // Delete records matching any of the file paths + await this.collection.delete({ + where: { + $or: normalizedPaths.map((normalizedPath) => ({ + filePath: normalizedPath, + })), + }, + }) + } catch (error) { + console.error("Failed to delete points by file paths:", error) + throw error + } + } + + async deleteCollection(): Promise { + try { + if (this.client && (await this.collectionExists())) { + await this.client.deleteCollection({ name: this.collectionName }) + this.collection = null + this.initialized = false + } + } catch (error) { + console.error(`[ChromaDBAdapter] Failed to delete collection ${this.collectionName}:`, error) + throw error + } + } + + async clearCollection(): Promise { + if (!this.initialized || !this.collection) { + throw new Error("ChromaDB not initialized") + } + + try { + // Get all IDs and delete them + const allData = await this.collection.get() + if (allData.ids && allData.ids.length > 0) { + await this.collection.delete({ + ids: allData.ids, + }) + } + } catch (error) { + console.error("Failed to clear collection:", error) + throw error + } + } + + async collectionExists(): Promise { + try { + if (!this.client) { + await this.loadChromaDB() + + if (this.chromaUrl === "memory") { + this.client = new ChromaClient() + } else { + this.client = new ChromaClient({ + path: this.chromaUrl, + }) + } + } + + const collections = await this.client.listCollections() + return collections.some((col: any) => col.name === this.collectionName) + } catch { + return false + } + } + + async validateConfiguration(): Promise<{ valid: boolean; error?: string }> { + try { + // Try to load ChromaDB + await this.loadChromaDB() + + // Try to connect + let testClient: any + if (this.chromaUrl === "memory") { + testClient = new ChromaClient() + } else { + testClient = new ChromaClient({ + path: this.chromaUrl, + }) + } + + // List collections to verify connection works + await testClient.listCollections() + + return { valid: true } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + + if (errorMessage.includes("Cannot find module")) { + return { + valid: false, + error: t("embeddings:vectorStore.chromadbNotInstalled", { errorMessage }), + } + } + + if (errorMessage.includes("ECONNREFUSED") || errorMessage.includes("fetch failed")) { + return { + valid: false, + error: t("embeddings:vectorStore.chromadbConnectionFailed", { + chromaUrl: this.chromaUrl, + errorMessage, + }), + } + } + + return { + valid: false, + error: t("embeddings:vectorStore.chromadbInitFailed", { + chromaUrl: this.chromaUrl, + errorMessage, + }), + } + } + } + + getConfigurationRequirements() { + return { + required: ["vectorSize", "workspacePath"], + optional: ["url", "apiKey"], + defaults: { + url: "http://localhost:8000", + }, + } + } +} diff --git a/src/services/code-index/vector-store/adapters/index.ts b/src/services/code-index/vector-store/adapters/index.ts new file mode 100644 index 00000000000..32f41ec1cc9 --- /dev/null +++ b/src/services/code-index/vector-store/adapters/index.ts @@ -0,0 +1,5 @@ +export * from "./base" +export * from "./qdrant" +export * from "./lancedb" +export * from "./chromadb" +export * from "./sqlite-vector" diff --git a/src/services/code-index/vector-store/adapters/lancedb.ts b/src/services/code-index/vector-store/adapters/lancedb.ts new file mode 100644 index 00000000000..59a300bec45 --- /dev/null +++ b/src/services/code-index/vector-store/adapters/lancedb.ts @@ -0,0 +1,355 @@ +import * as path from "path" +import { VectorDBAdapter, VectorDBConfig } from "./base" +import { PointStruct, VectorStoreSearchResult } from "../../interfaces/vector-store" +import { DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE } from "../../constants" +import { t } from "../../../../i18n" +import { getWorkspacePath } from "../../../../utils/path" + +// Dynamic imports for LanceDB to handle optional dependency +let lancedb: any +let Table: any + +/** + * LanceDB adapter for vector database operations + * LanceDB is an embedded vector database that doesn't require a separate service + */ +export class LanceDBAdapter extends VectorDBAdapter { + private db: any + private table: any + private dbPath: string + private initialized: boolean = false + + constructor(config: VectorDBConfig) { + super(config) + // Store data in a .lancedb directory within the workspace + this.dbPath = path.join(config.workspacePath, ".lancedb") + } + + get providerName(): string { + return "lancedb" + } + + get requiresExternalService(): boolean { + return false // LanceDB is embedded + } + + /** + * Dynamically import LanceDB modules + */ + private async loadLanceDB() { + if (!lancedb) { + try { + // @ts-ignore - Dynamic import for optional dependency + const lancedbModule = await import("@lancedb/lancedb") + lancedb = lancedbModule.connect + Table = lancedbModule.Table + } catch (error) { + throw new Error( + t("embeddings:vectorStore.lancedbNotInstalled", { + errorMessage: error instanceof Error ? error.message : String(error), + }), + ) + } + } + } + + async initialize(): Promise { + try { + await this.loadLanceDB() + + // Connect to LanceDB (creates directory if it doesn't exist) + this.db = await lancedb(this.dbPath) + + // Check if table exists + const tables = await this.db.tableNames() + const tableExists = tables.includes(this.collectionName) + + if (!tableExists) { + // Create new table with schema + const schema = { + id: "string", + vector: `fixed_size_list<${this.vectorSize}>[float32]`, + filePath: "string", + codeChunk: "string", + startLine: "int32", + endLine: "int32", + pathSegments: "string", // JSON string for path segments + } + + // Create empty table with schema + await this.db.createEmptyTable(this.collectionName, schema) + this.table = await this.db.openTable(this.collectionName) + this.initialized = true + return true // New collection created + } else { + // Open existing table + this.table = await this.db.openTable(this.collectionName) + + // Verify vector dimension matches + const tableSchema = await this.table.schema + const vectorField = tableSchema.fields.find((f: any) => f.name === "vector") + + if (vectorField) { + // Extract dimension from field type + const dimensionMatch = vectorField.dataType.toString().match(/fixed_size_list<(\d+)>/) + const existingDimension = dimensionMatch ? parseInt(dimensionMatch[1]) : 0 + + if (existingDimension !== this.vectorSize) { + // Dimension mismatch - need to recreate table + console.warn( + `[LanceDBAdapter] Table ${this.collectionName} exists with vector size ${existingDimension}, but expected ${this.vectorSize}. Recreating table.`, + ) + + // Drop and recreate table + await this.db.dropTable(this.collectionName) + + const schema = { + id: "string", + vector: `fixed_size_list<${this.vectorSize}>[float32]`, + filePath: "string", + codeChunk: "string", + startLine: "int32", + endLine: "int32", + pathSegments: "string", + } + + await this.db.createEmptyTable(this.collectionName, schema) + this.table = await this.db.openTable(this.collectionName) + this.initialized = true + return true // Recreated collection + } + } + + this.initialized = true + return false // Existing collection used + } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + console.error(`[LanceDBAdapter] Failed to initialize LanceDB:`, errorMessage) + throw new Error(t("embeddings:vectorStore.lancedbInitFailed", { errorMessage })) + } + } + + async upsertPoints(points: PointStruct[]): Promise { + if (!this.initialized || !this.table) { + throw new Error("LanceDB not initialized") + } + + try { + // Transform points to LanceDB format + const records = points.map((point) => { + const pathSegments = point.payload?.filePath + ? point.payload.filePath + .split(path.sep) + .filter(Boolean) + .reduce((acc: Record, segment: string, index: number) => { + acc[index.toString()] = segment + return acc + }, {}) + : {} + + return { + id: point.id, + vector: Array.from(point.vector), // Ensure it's a regular array + filePath: point.payload?.filePath || "", + codeChunk: point.payload?.codeChunk || "", + startLine: point.payload?.startLine || 0, + endLine: point.payload?.endLine || 0, + pathSegments: JSON.stringify(pathSegments), + } + }) + + // Add records to table + await this.table.add(records) + } catch (error) { + console.error("Failed to upsert points:", error) + throw error + } + } + + async search( + queryVector: number[], + directoryPrefix?: string, + minScore?: number, + maxResults?: number, + ): Promise { + if (!this.initialized || !this.table) { + throw new Error("LanceDB not initialized") + } + + try { + // Build query + let query = this.table.vectorSearch(Array.from(queryVector)).limit(maxResults ?? DEFAULT_MAX_SEARCH_RESULTS) + + // LanceDB uses distance, not similarity score + // For cosine distance: 0 = identical, 2 = opposite + // Convert minScore (0-1 similarity) to maxDistance (0-2 distance) + const maxDistance = minScore !== undefined ? 2 * (1 - minScore) : 2 * (1 - DEFAULT_SEARCH_MIN_SCORE) + query = query.where(`distance <= ${maxDistance}`) + + // Apply directory filter if provided + if (directoryPrefix) { + const segments = directoryPrefix.split(path.sep).filter(Boolean) + + // Build filter for path segments + // LanceDB doesn't support JSON queries directly, so we'll filter in post-processing + // For now, use a simple filePath prefix filter + const normalizedPrefix = segments.join(path.sep) + query = query.where(`filePath LIKE '${normalizedPrefix}%'`) + } + + // Execute search + const results = await query.execute() + + // Transform results to our format + return results + .map((result: any) => { + // Convert distance to similarity score + const score = 1 - result._distance / 2 + + // Parse path segments + let pathSegments = {} + try { + pathSegments = JSON.parse(result.pathSegments || "{}") + } catch { + // Ignore parse errors + } + + return { + id: result.id, + score: score, + payload: { + filePath: result.filePath, + codeChunk: result.codeChunk, + startLine: result.startLine, + endLine: result.endLine, + pathSegments, + }, + } + }) + .filter((result: VectorStoreSearchResult) => { + // Additional filtering for directory prefix if needed + if (directoryPrefix) { + const segments = directoryPrefix.split(path.sep).filter(Boolean) + const resultSegments = result.payload?.pathSegments || {} + + // Check if all prefix segments match + return segments.every((segment, index) => resultSegments[index.toString()] === segment) + } + return true + }) + .filter((result: VectorStoreSearchResult) => this.isPayloadValid(result.payload)) + } catch (error) { + console.error("Failed to search points:", error) + throw error + } + } + + async deletePointsByFilePath(filePath: string): Promise { + return this.deletePointsByMultipleFilePaths([filePath]) + } + + async deletePointsByMultipleFilePaths(filePaths: string[]): Promise { + if (!this.initialized || !this.table) { + throw new Error("LanceDB not initialized") + } + + if (filePaths.length === 0) { + return + } + + try { + const workspaceRoot = getWorkspacePath() + const normalizedPaths = filePaths.map((filePath) => { + const absolutePath = path.resolve(workspaceRoot, filePath) + return path.normalize(absolutePath) + }) + + // Delete records matching any of the file paths + for (const normalizedPath of normalizedPaths) { + await this.table.delete(`filePath = '${normalizedPath}'`) + } + } catch (error) { + console.error("Failed to delete points by file paths:", error) + throw error + } + } + + async deleteCollection(): Promise { + try { + if (this.db && (await this.collectionExists())) { + await this.db.dropTable(this.collectionName) + this.table = null + this.initialized = false + } + } catch (error) { + console.error(`[LanceDBAdapter] Failed to delete collection ${this.collectionName}:`, error) + throw error + } + } + + async clearCollection(): Promise { + if (!this.initialized || !this.table) { + throw new Error("LanceDB not initialized") + } + + try { + // Delete all records + await this.table.delete("1 = 1") // Delete where true (all records) + } catch (error) { + console.error("Failed to clear collection:", error) + throw error + } + } + + async collectionExists(): Promise { + try { + if (!this.db) { + await this.loadLanceDB() + this.db = await lancedb(this.dbPath) + } + + const tables = await this.db.tableNames() + return tables.includes(this.collectionName) + } catch { + return false + } + } + + async validateConfiguration(): Promise<{ valid: boolean; error?: string }> { + try { + // Try to load LanceDB + await this.loadLanceDB() + + // Try to connect + const testDb = await lancedb(this.dbPath) + + // List tables to verify connection works + await testDb.tableNames() + + return { valid: true } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + + if (errorMessage.includes("Cannot find module")) { + return { + valid: false, + error: t("embeddings:vectorStore.lancedbNotInstalled", { errorMessage }), + } + } + + return { + valid: false, + error: t("embeddings:vectorStore.lancedbConnectionFailed", { errorMessage }), + } + } + } + + getConfigurationRequirements() { + return { + required: ["vectorSize", "workspacePath"], + optional: [], + defaults: {}, + } + } +} diff --git a/src/services/code-index/vector-store/adapters/qdrant.ts b/src/services/code-index/vector-store/adapters/qdrant.ts new file mode 100644 index 00000000000..97ef0c2c70f --- /dev/null +++ b/src/services/code-index/vector-store/adapters/qdrant.ts @@ -0,0 +1,455 @@ +import { QdrantClient, Schemas } from "@qdrant/js-client-rest" +import * as path from "path" +import { VectorDBAdapter, VectorDBConfig } from "./base" +import { PointStruct, VectorStoreSearchResult, Payload } from "../../interfaces/vector-store" +import { DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE } from "../../constants" +import { t } from "../../../../i18n" +import { getWorkspacePath } from "../../../../utils/path" + +/** + * Qdrant adapter for vector database operations + */ +export class QdrantAdapter extends VectorDBAdapter { + private client: QdrantClient + private readonly DISTANCE_METRIC = "Cosine" + private readonly qdrantUrl: string + + constructor(config: VectorDBConfig) { + super(config) + + // Parse the URL to determine the appropriate QdrantClient configuration + const parsedUrl = this.parseQdrantUrl(config.url) + this.qdrantUrl = parsedUrl + + try { + const urlObj = new URL(parsedUrl) + + // Always use host-based configuration with explicit ports to avoid QdrantClient defaults + let port: number + let useHttps: boolean + + if (urlObj.port) { + // Explicit port specified - use it and determine protocol + port = Number(urlObj.port) + useHttps = urlObj.protocol === "https:" + } else { + // No explicit port - use protocol defaults + if (urlObj.protocol === "https:") { + port = 443 + useHttps = true + } else { + // http: or other protocols default to port 80 + port = 80 + useHttps = false + } + } + + this.client = new QdrantClient({ + host: urlObj.hostname, + https: useHttps, + port: port, + prefix: urlObj.pathname === "/" ? undefined : urlObj.pathname.replace(/\/+$/, ""), + apiKey: config.apiKey, + headers: { + "User-Agent": "Roo-Code", + }, + }) + } catch (urlError) { + // If URL parsing fails, fall back to URL-based config + this.client = new QdrantClient({ + url: parsedUrl, + apiKey: config.apiKey, + headers: { + "User-Agent": "Roo-Code", + }, + }) + } + } + + get providerName(): string { + return "qdrant" + } + + get requiresExternalService(): boolean { + return true + } + + /** + * Parses and normalizes Qdrant server URLs to handle various input formats + */ + private parseQdrantUrl(url: string | undefined): string { + // Handle undefined/null/empty cases + if (!url || url.trim() === "") { + return "http://localhost:6333" + } + + const trimmedUrl = url.trim() + + // Check if it starts with a protocol + if (!trimmedUrl.startsWith("http://") && !trimmedUrl.startsWith("https://") && !trimmedUrl.includes("://")) { + // No protocol - treat as hostname + return this.parseHostname(trimmedUrl) + } + + try { + // Attempt to parse as complete URL - return as-is, let constructor handle ports + const parsedUrl = new URL(trimmedUrl) + return trimmedUrl + } catch { + // Failed to parse as URL - treat as hostname + return this.parseHostname(trimmedUrl) + } + } + + /** + * Handles hostname-only inputs + */ + private parseHostname(hostname: string): string { + if (hostname.includes(":")) { + // Has port - add http:// prefix if missing + return hostname.startsWith("http") ? hostname : `http://${hostname}` + } else { + // No port - add http:// prefix without port (let constructor handle port assignment) + return `http://${hostname}` + } + } + + private async getCollectionInfo(): Promise { + try { + const collectionInfo = await this.client.getCollection(this.collectionName) + return collectionInfo + } catch (error: unknown) { + if (error instanceof Error) { + console.warn( + `[QdrantAdapter] Warning during getCollectionInfo for "${this.collectionName}". Collection may not exist or another error occurred:`, + error.message, + ) + } + return null + } + } + + async initialize(): Promise { + let created = false + try { + const collectionInfo = await this.getCollectionInfo() + + if (collectionInfo === null) { + // Collection info not retrieved (assume not found or inaccessible), create it + await this.client.createCollection(this.collectionName, { + vectors: { + size: this.vectorSize, + distance: this.DISTANCE_METRIC, + }, + }) + created = true + } else { + // Collection exists, check vector size + const vectorsConfig = collectionInfo.config?.params?.vectors + let existingVectorSize: number + + if (typeof vectorsConfig === "number") { + existingVectorSize = vectorsConfig + } else if ( + vectorsConfig && + typeof vectorsConfig === "object" && + "size" in vectorsConfig && + typeof vectorsConfig.size === "number" + ) { + existingVectorSize = vectorsConfig.size + } else { + existingVectorSize = 0 // Fallback for unknown configuration + } + + if (existingVectorSize === this.vectorSize) { + created = false // Exists and correct + } else { + // Exists but wrong vector size, recreate with enhanced error handling + created = await this._recreateCollectionWithNewDimension(existingVectorSize) + } + } + + // Create payload indexes + await this._createPayloadIndexes() + return created + } catch (error: any) { + const errorMessage = error?.message || error + console.error( + `[QdrantAdapter] Failed to initialize Qdrant collection "${this.collectionName}":`, + errorMessage, + ) + + // If this is already a vector dimension mismatch error (identified by cause), re-throw it as-is + if (error instanceof Error && error.cause !== undefined) { + throw error + } + + // Otherwise, provide a more user-friendly error message that includes the original error + throw new Error( + t("embeddings:vectorStore.qdrantConnectionFailed", { qdrantUrl: this.qdrantUrl, errorMessage }), + ) + } + } + + /** + * Recreates the collection with a new vector dimension, handling failures gracefully. + */ + private async _recreateCollectionWithNewDimension(existingVectorSize: number): Promise { + console.warn( + `[QdrantAdapter] Collection ${this.collectionName} exists with vector size ${existingVectorSize}, but expected ${this.vectorSize}. Recreating collection.`, + ) + + let deletionSucceeded = false + let recreationAttempted = false + + try { + // Step 1: Attempt to delete the existing collection + console.log(`[QdrantAdapter] Deleting existing collection ${this.collectionName}...`) + await this.client.deleteCollection(this.collectionName) + deletionSucceeded = true + console.log(`[QdrantAdapter] Successfully deleted collection ${this.collectionName}`) + + // Step 2: Wait a brief moment to ensure deletion is processed + await new Promise((resolve) => setTimeout(resolve, 100)) + + // Step 3: Verify the collection is actually deleted + const verificationInfo = await this.getCollectionInfo() + if (verificationInfo !== null) { + throw new Error("Collection still exists after deletion attempt") + } + + // Step 4: Create the new collection with correct dimensions + console.log( + `[QdrantAdapter] Creating new collection ${this.collectionName} with vector size ${this.vectorSize}...`, + ) + recreationAttempted = true + await this.client.createCollection(this.collectionName, { + vectors: { + size: this.vectorSize, + distance: this.DISTANCE_METRIC, + }, + }) + console.log(`[QdrantAdapter] Successfully created new collection ${this.collectionName}`) + return true + } catch (recreationError) { + const errorMessage = recreationError instanceof Error ? recreationError.message : String(recreationError) + + // Provide detailed error context based on what stage failed + let contextualErrorMessage: string + if (!deletionSucceeded) { + contextualErrorMessage = `Failed to delete existing collection with vector size ${existingVectorSize}. ${errorMessage}` + } else if (!recreationAttempted) { + contextualErrorMessage = `Deleted existing collection but failed verification step. ${errorMessage}` + } else { + contextualErrorMessage = `Deleted existing collection but failed to create new collection with vector size ${this.vectorSize}. ${errorMessage}` + } + + console.error( + `[QdrantAdapter] CRITICAL: Failed to recreate collection ${this.collectionName} for dimension change (${existingVectorSize} -> ${this.vectorSize}). ${contextualErrorMessage}`, + ) + + // Create a comprehensive error message for the user + const dimensionMismatchError = new Error( + t("embeddings:vectorStore.vectorDimensionMismatch", { + errorMessage: contextualErrorMessage, + }), + ) + + // Preserve the original error context + dimensionMismatchError.cause = recreationError + throw dimensionMismatchError + } + } + + /** + * Creates payload indexes for the collection, handling errors gracefully. + */ + private async _createPayloadIndexes(): Promise { + for (let i = 0; i <= 4; i++) { + try { + await this.client.createPayloadIndex(this.collectionName, { + field_name: `pathSegments.${i}`, + field_schema: "keyword", + }) + } catch (indexError: any) { + const errorMessage = (indexError?.message || "").toLowerCase() + if (!errorMessage.includes("already exists")) { + console.warn( + `[QdrantAdapter] Could not create payload index for pathSegments.${i} on ${this.collectionName}. Details:`, + indexError?.message || indexError, + ) + } + } + } + } + + async upsertPoints(points: PointStruct[]): Promise { + try { + const processedPoints = points.map((point) => { + if (point.payload?.filePath) { + const segments = point.payload.filePath.split(path.sep).filter(Boolean) + const pathSegments = segments.reduce( + (acc: Record, segment: string, index: number) => { + acc[index.toString()] = segment + return acc + }, + {}, + ) + return { + ...point, + payload: { + ...point.payload, + pathSegments, + }, + } + } + return point + }) + + await this.client.upsert(this.collectionName, { + points: processedPoints, + wait: true, + }) + } catch (error) { + console.error("Failed to upsert points:", error) + throw error + } + } + + async search( + queryVector: number[], + directoryPrefix?: string, + minScore?: number, + maxResults?: number, + ): Promise { + try { + let filter = undefined + + if (directoryPrefix) { + const segments = directoryPrefix.split(path.sep).filter(Boolean) + + filter = { + must: segments.map((segment, index) => ({ + key: `pathSegments.${index}`, + match: { value: segment }, + })), + } + } + + const searchRequest = { + query: queryVector, + filter, + score_threshold: minScore ?? DEFAULT_SEARCH_MIN_SCORE, + limit: maxResults ?? DEFAULT_MAX_SEARCH_RESULTS, + params: { + hnsw_ef: 128, + exact: false, + }, + with_payload: { + include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], + }, + } + + const operationResult = await this.client.query(this.collectionName, searchRequest) + const filteredPoints = operationResult.points.filter((p) => this.isPayloadValid(p.payload)) + + return filteredPoints as VectorStoreSearchResult[] + } catch (error) { + console.error("Failed to search points:", error) + throw error + } + } + + async deletePointsByFilePath(filePath: string): Promise { + return this.deletePointsByMultipleFilePaths([filePath]) + } + + async deletePointsByMultipleFilePaths(filePaths: string[]): Promise { + if (filePaths.length === 0) { + return + } + + try { + const workspaceRoot = getWorkspacePath() + const normalizedPaths = filePaths.map((filePath) => { + const absolutePath = path.resolve(workspaceRoot, filePath) + return path.normalize(absolutePath) + }) + + const filter = { + should: normalizedPaths.map((normalizedPath) => ({ + key: "filePath", + match: { + value: normalizedPath, + }, + })), + } + + await this.client.delete(this.collectionName, { + filter, + wait: true, + }) + } catch (error) { + console.error("Failed to delete points by file paths:", error) + throw error + } + } + + async deleteCollection(): Promise { + try { + // Check if collection exists before attempting deletion to avoid errors + if (await this.collectionExists()) { + await this.client.deleteCollection(this.collectionName) + } + } catch (error) { + console.error(`[QdrantAdapter] Failed to delete collection ${this.collectionName}:`, error) + throw error // Re-throw to allow calling code to handle it + } + } + + async clearCollection(): Promise { + try { + await this.client.delete(this.collectionName, { + filter: { + must: [], + }, + wait: true, + }) + } catch (error) { + console.error("Failed to clear collection:", error) + throw error + } + } + + async collectionExists(): Promise { + const collectionInfo = await this.getCollectionInfo() + return collectionInfo !== null + } + + async validateConfiguration(): Promise<{ valid: boolean; error?: string }> { + try { + // Try to connect to Qdrant by checking collections + await this.client.getCollections() + return { valid: true } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + return { + valid: false, + error: t("embeddings:vectorStore.qdrantConnectionFailed", { + qdrantUrl: this.qdrantUrl, + errorMessage, + }), + } + } + } + + getConfigurationRequirements() { + return { + required: ["url", "vectorSize", "workspacePath"], + optional: ["apiKey"], + defaults: { + url: "http://localhost:6333", + }, + } + } +} diff --git a/src/services/code-index/vector-store/adapters/sqlite-vector.ts b/src/services/code-index/vector-store/adapters/sqlite-vector.ts new file mode 100644 index 00000000000..98896d5f598 --- /dev/null +++ b/src/services/code-index/vector-store/adapters/sqlite-vector.ts @@ -0,0 +1,478 @@ +import * as path from "path" +import { VectorDBAdapter, VectorDBConfig } from "./base" +import { PointStruct, VectorStoreSearchResult } from "../../interfaces/vector-store" +import { DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE } from "../../constants" +import { t } from "../../../../i18n" +import { getWorkspacePath } from "../../../../utils/path" + +// Dynamic imports for SQLite to handle optional dependency +let Database: any + +/** + * SQLite+Vector adapter for vector database operations + * Uses sqlite-vss extension for vector similarity search + */ +export class SQLiteVectorAdapter extends VectorDBAdapter { + private db: any + private dbPath: string + private initialized: boolean = false + private tableName: string + + constructor(config: VectorDBConfig) { + super(config) + // Store database in workspace directory + this.dbPath = path.join(config.workspacePath, ".roo-code-index.db") + // Use sanitized collection name for table + this.tableName = `vectors_${this.collectionName.replace(/[^a-zA-Z0-9_]/g, "_")}` + } + + get providerName(): string { + return "sqlite-vector" + } + + get requiresExternalService(): boolean { + return false // SQLite is embedded + } + + /** + * Dynamically import SQLite modules + */ + private async loadSQLite() { + if (!Database) { + try { + // @ts-ignore - Dynamic import for optional dependency + const sqliteModule = await import("better-sqlite3") + Database = sqliteModule.default + } catch (error) { + throw new Error( + t("embeddings:vectorStore.sqliteNotInstalled", { + errorMessage: error instanceof Error ? error.message : String(error), + }), + ) + } + } + } + + /** + * Load sqlite-vss extension + */ + private async loadVectorExtension() { + try { + // Load the vector extension + // @ts-ignore - Dynamic loading + const vssPath = require.resolve("sqlite-vss") + this.db.loadExtension(vssPath) + } catch (error) { + throw new Error( + t("embeddings:vectorStore.sqliteVssNotInstalled", { + errorMessage: error instanceof Error ? error.message : String(error), + }), + ) + } + } + + async initialize(): Promise { + try { + await this.loadSQLite() + + // Open database connection + this.db = new Database(this.dbPath) + + // Enable WAL mode for better concurrency + this.db.pragma("journal_mode = WAL") + + // Load vector extension + await this.loadVectorExtension() + + // Check if table exists + const tableExists = this.db + .prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name=?`) + .get(this.tableName) + + let created = false + + if (!tableExists) { + // Create tables + this.db.exec(` + CREATE TABLE IF NOT EXISTS ${this.tableName} ( + id TEXT PRIMARY KEY, + file_path TEXT NOT NULL, + code_chunk TEXT NOT NULL, + start_line INTEGER NOT NULL, + end_line INTEGER NOT NULL, + path_segments TEXT NOT NULL + ); + + CREATE INDEX IF NOT EXISTS idx_${this.tableName}_file_path + ON ${this.tableName}(file_path); + `) + + // Create virtual table for vector search + this.db.exec(` + CREATE VIRTUAL TABLE IF NOT EXISTS ${this.tableName}_vss USING vss0( + vector(${this.vectorSize}) + ); + `) + + created = true + } else { + // Verify vector dimension + const vssInfo = this.db + .prepare(`SELECT sql FROM sqlite_master WHERE name = ?`) + .get(`${this.tableName}_vss`) + + if (vssInfo && vssInfo.sql) { + const dimensionMatch = vssInfo.sql.match(/vector\((\d+)\)/) + const existingDimension = dimensionMatch ? parseInt(dimensionMatch[1]) : 0 + + if (existingDimension !== this.vectorSize) { + // Dimension mismatch - recreate tables + console.warn( + `[SQLiteVectorAdapter] Table ${this.tableName} exists with vector size ${existingDimension}, but expected ${this.vectorSize}. Recreating tables.`, + ) + + // Drop existing tables + this.db.exec(` + DROP TABLE IF EXISTS ${this.tableName}_vss; + DROP TABLE IF EXISTS ${this.tableName}; + `) + + // Recreate tables + this.db.exec(` + CREATE TABLE ${this.tableName} ( + id TEXT PRIMARY KEY, + file_path TEXT NOT NULL, + code_chunk TEXT NOT NULL, + start_line INTEGER NOT NULL, + end_line INTEGER NOT NULL, + path_segments TEXT NOT NULL + ); + + CREATE INDEX idx_${this.tableName}_file_path + ON ${this.tableName}(file_path); + + CREATE VIRTUAL TABLE ${this.tableName}_vss USING vss0( + vector(${this.vectorSize}) + ); + `) + + created = true + } + } + } + + this.initialized = true + return created + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + console.error(`[SQLiteVectorAdapter] Failed to initialize SQLite:`, errorMessage) + throw new Error(t("embeddings:vectorStore.sqliteInitFailed", { errorMessage })) + } + } + + async upsertPoints(points: PointStruct[]): Promise { + if (!this.initialized || !this.db) { + throw new Error("SQLite not initialized") + } + + const insertStmt = this.db.prepare(` + INSERT OR REPLACE INTO ${this.tableName} + (id, file_path, code_chunk, start_line, end_line, path_segments) + VALUES (?, ?, ?, ?, ?, ?) + `) + + const insertVectorStmt = this.db.prepare(` + INSERT OR REPLACE INTO ${this.tableName}_vss + (rowid, vector) + VALUES ((SELECT rowid FROM ${this.tableName} WHERE id = ?), ?) + `) + + const transaction = this.db.transaction((points: PointStruct[]) => { + for (const point of points) { + // Build path segments + const pathSegments = point.payload?.filePath + ? point.payload.filePath + .split(path.sep) + .filter(Boolean) + .reduce((acc: Record, segment: string, index: number) => { + acc[index.toString()] = segment + return acc + }, {}) + : {} + + // Insert metadata + insertStmt.run( + point.id, + point.payload?.filePath || "", + point.payload?.codeChunk || "", + point.payload?.startLine || 0, + point.payload?.endLine || 0, + JSON.stringify(pathSegments), + ) + + // Insert vector + // Convert vector to blob format expected by sqlite-vss + const vectorBlob = Buffer.from(new Float32Array(point.vector).buffer) + insertVectorStmt.run(point.id, vectorBlob) + } + }) + + try { + transaction(points) + } catch (error) { + console.error("Failed to upsert points:", error) + throw error + } + } + + async search( + queryVector: number[], + directoryPrefix?: string, + minScore?: number, + maxResults?: number, + ): Promise { + if (!this.initialized || !this.db) { + throw new Error("SQLite not initialized") + } + + try { + // Convert query vector to blob + const queryBlob = Buffer.from(new Float32Array(queryVector).buffer) + + // Build base query + let query = ` + SELECT + t.id, + t.file_path, + t.code_chunk, + t.start_line, + t.end_line, + t.path_segments, + vss.distance + FROM ${this.tableName}_vss vss + INNER JOIN ${this.tableName} t ON t.rowid = vss.rowid + WHERE vss_search(vss.vector, ?) + ` + + const params: any[] = [queryBlob] + + // Add directory filter if provided + if (directoryPrefix) { + const segments = directoryPrefix.split(path.sep).filter(Boolean) + const conditions: string[] = [] + + segments.forEach((segment, index) => { + conditions.push(`json_extract(t.path_segments, '$."${index}"') = ?`) + params.push(segment) + }) + + if (conditions.length > 0) { + query += ` AND ${conditions.join(" AND ")}` + } + } + + // Add limit + query += ` LIMIT ?` + params.push(maxResults ?? DEFAULT_MAX_SEARCH_RESULTS) + + // Execute search + const stmt = this.db.prepare(query) + const results = stmt.all(...params) + + // Transform results + return results + .map((row: any) => { + // Convert distance to similarity score + // SQLite-vss returns L2 distance, convert to cosine similarity + const distance = row.distance || 0 + const score = 1 / (1 + distance) + + // Skip results below minimum score + if (score < (minScore ?? DEFAULT_SEARCH_MIN_SCORE)) { + return null + } + + // Parse path segments + let pathSegments = {} + try { + pathSegments = JSON.parse(row.path_segments || "{}") + } catch { + // Ignore parse errors + } + + const payload = { + filePath: row.file_path, + codeChunk: row.code_chunk, + startLine: row.start_line, + endLine: row.end_line, + pathSegments, + } + + if (this.isPayloadValid(payload)) { + return { + id: row.id, + score, + payload, + } + } + return null + }) + .filter((result: VectorStoreSearchResult | null): result is VectorStoreSearchResult => result !== null) + } catch (error) { + console.error("Failed to search points:", error) + throw error + } + } + + async deletePointsByFilePath(filePath: string): Promise { + return this.deletePointsByMultipleFilePaths([filePath]) + } + + async deletePointsByMultipleFilePaths(filePaths: string[]): Promise { + if (!this.initialized || !this.db) { + throw new Error("SQLite not initialized") + } + + if (filePaths.length === 0) { + return + } + + try { + const workspaceRoot = getWorkspacePath() + const normalizedPaths = filePaths.map((filePath) => { + const absolutePath = path.resolve(workspaceRoot, filePath) + return path.normalize(absolutePath) + }) + + // Delete from both tables + const deleteStmt = this.db.prepare(` + DELETE FROM ${this.tableName} WHERE file_path = ? + `) + + const deleteVectorStmt = this.db.prepare(` + DELETE FROM ${this.tableName}_vss + WHERE rowid IN ( + SELECT rowid FROM ${this.tableName} WHERE file_path = ? + ) + `) + + const transaction = this.db.transaction((paths: string[]) => { + for (const normalizedPath of paths) { + deleteVectorStmt.run(normalizedPath) + deleteStmt.run(normalizedPath) + } + }) + + transaction(normalizedPaths) + } catch (error) { + console.error("Failed to delete points by file paths:", error) + throw error + } + } + + async deleteCollection(): Promise { + try { + if (this.db) { + // Drop tables + this.db.exec(` + DROP TABLE IF EXISTS ${this.tableName}_vss; + DROP TABLE IF EXISTS ${this.tableName}; + `) + + // Close database + this.db.close() + this.db = null + this.initialized = false + } + } catch (error) { + console.error(`[SQLiteVectorAdapter] Failed to delete collection ${this.collectionName}:`, error) + throw error + } + } + + async clearCollection(): Promise { + if (!this.initialized || !this.db) { + throw new Error("SQLite not initialized") + } + + try { + // Delete all records from both tables + this.db.exec(` + DELETE FROM ${this.tableName}_vss; + DELETE FROM ${this.tableName}; + `) + } catch (error) { + console.error("Failed to clear collection:", error) + throw error + } + } + + async collectionExists(): Promise { + try { + if (!this.db) { + await this.loadSQLite() + this.db = new Database(this.dbPath) + } + + const tableExists = this.db + .prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name=?`) + .get(this.tableName) + + return !!tableExists + } catch { + return false + } + } + + async validateConfiguration(): Promise<{ valid: boolean; error?: string }> { + try { + // Try to load SQLite + await this.loadSQLite() + + // Try to create a test database + const testDb = new Database(":memory:") + + // Try to load vector extension + try { + // @ts-ignore + const vssPath = require.resolve("sqlite-vss") + testDb.loadExtension(vssPath) + } catch (error) { + testDb.close() + throw new Error( + t("embeddings:vectorStore.sqliteVssNotInstalled", { + errorMessage: error instanceof Error ? error.message : String(error), + }), + ) + } + + // Test vector operations + testDb.exec(`CREATE VIRTUAL TABLE test_vss USING vss0(vector(3))`) + testDb.close() + + return { valid: true } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + + if (errorMessage.includes("Cannot find module")) { + return { + valid: false, + error: t("embeddings:vectorStore.sqliteNotInstalled", { errorMessage }), + } + } + + return { + valid: false, + error: t("embeddings:vectorStore.sqliteInitFailed", { errorMessage }), + } + } + } + + getConfigurationRequirements() { + return { + required: ["vectorSize", "workspacePath"], + optional: [], + defaults: {}, + } + } +} diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json index cfd5b042868..383a99c244a 100644 --- a/webview-ui/src/i18n/locales/en/settings.json +++ b/webview-ui/src/i18n/locales/en/settings.json @@ -71,10 +71,20 @@ "selectModelPlaceholder": "Select model", "ollamaUrlLabel": "Ollama URL:", "ollamaBaseUrlLabel": "Ollama Base URL", + "vectorDBProviderLabel": "Vector Database", + "vectorDBProviderDescription": "Choose the vector database to use for storing code embeddings", + "qdrantProvider": "Qdrant", + "lancedbProvider": "LanceDB (Embedded)", + "chromadbProvider": "ChromaDB", + "sqliteVectorProvider": "SQLite + Vector", "qdrantUrlLabel": "Qdrant URL", "qdrantKeyLabel": "Qdrant Key:", "qdrantApiKeyLabel": "Qdrant API Key", "qdrantApiKeyPlaceholder": "Enter your Qdrant API key (optional)", + "chromadbUrlLabel": "ChromaDB URL", + "chromadbUrlPlaceholder": "http://localhost:8000", + "chromadbApiKeyLabel": "ChromaDB API Key", + "chromadbApiKeyPlaceholder": "Enter your ChromaDB API key (optional)", "setupConfigLabel": "Setup", "advancedConfigLabel": "Advanced Configuration", "searchMinScoreLabel": "Search Score Threshold",