diff --git a/README.md b/README.md index 2916f20..97d6ff3 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,12 @@ Model Context Protocol (MCP) allows you to integrate Claude Context with your fa ### Prerequisites +**🚀 New: Zero-Config Local Mode with FAISS** + +You can now use Context Please with **no external database required**! Simply provide an OpenAI API key, and FAISS will handle local storage automatically. Perfect for getting started quickly or working with small-to-medium codebases. + +For production deployments or large codebases, consider using Zilliz Cloud or Qdrant: +
Get a free vector database on Zilliz Cloud 👈 @@ -62,7 +68,19 @@ Copy your key and use it in the configuration examples below as `your-openai-api #### Configuration -Use the command line interface to add the Claude Context MCP server: +**Option 1: Local Mode with FAISS (Recommended for Getting Started)** + +The simplest way to get started - no external database required: + +```bash +claude mcp add context-please \ + -e OPENAI_API_KEY=sk-your-openai-api-key \ + -- npx @pleaseai/context-please-mcp@latest +``` + +**Option 2: Cloud Mode with Zilliz (For Production/Large Codebases)** + +For larger codebases or production deployments: ```bash claude mcp add context-please \ @@ -483,6 +501,71 @@ npx @pleaseai/context-please-mcp@latest For more detailed MCP environment variable configuration, see our [Environment Variables Guide](docs/getting-started/environment-variables.md). +### Using FAISS for Local-Only Deployments + +**Context Please** now supports FAISS as a zero-configuration, local-only vector database option! This is perfect for: + +- 🚀 **Quick Start**: No external database setup required +- 💻 **Local Development**: All data stays on your machine +- 💰 **Zero Cost**: No cloud services or infrastructure costs +- 📦 **Small-to-Medium Codebases**: Ideal for personal projects and teams + +#### Quick Start with FAISS + +Simply omit the Milvus/Qdrant configuration, and Context Please will automatically use FAISS: + +```bash +claude mcp add context-please \ + -e OPENAI_API_KEY=sk-your-openai-api-key \ + -- npx @pleaseai/context-please-mcp@latest +``` + +That's it! Your code will be indexed to `~/.context/faiss-indexes/` automatically. + +#### Advanced FAISS Configuration + +You can customize the storage directory: + +```bash +claude mcp add context-please \ + -e OPENAI_API_KEY=sk-your-openai-api-key \ + -e FAISS_STORAGE_DIR=/path/to/your/indexes \ + -- npx @pleaseai/context-please-mcp@latest +``` + +Or explicitly specify FAISS as the vector database: + +```json +{ + "mcpServers": { + "context-please": { + "command": "npx", + "args": ["@pleaseai/context-please-mcp@latest"], + "env": { + "OPENAI_API_KEY": "your-openai-api-key", + "VECTOR_DB_TYPE": "faiss-local", + "FAISS_STORAGE_DIR": "~/.context/faiss-indexes" + } + } + } +} +``` + +#### FAISS Features + +- ✅ **Hybrid Search**: Combines dense (semantic) + sparse (BM25) vectors +- ✅ **File-based Persistence**: Indexes saved as `.index` files +- ✅ **Auto-selection**: Defaults to FAISS when no external DB configured +- ✅ **Same Interface**: Compatible with all existing tools and APIs + +#### Limitations + +- ⚠️ **Memory**: Entire index loads into RAM (suitable for ~100K files) +- ⚠️ **Concurrency**: Single-process file access +- ⚠️ **Scalability**: For larger codebases, consider Milvus or Qdrant + +For production deployments or large codebases (>100K files), we recommend using [Milvus](https://milvus.io) or [Qdrant](https://qdrant.tech). + ### Using Different Embedding Models To configure custom embedding models (e.g., `text-embedding-3-large` for OpenAI, `voyage-code-3` for VoyageAI), see the [MCP Configuration Examples](packages/mcp/README.md#embedding-provider-configuration) for detailed setup instructions for each provider. diff --git a/packages/core/src/context.ts b/packages/core/src/context.ts index c84af6b..c4d3cfd 100644 --- a/packages/core/src/context.ts +++ b/packages/core/src/context.ts @@ -637,8 +637,8 @@ export class Context { // Warn if we hit the query limit - actual collection may be larger if (totalChunks === 100000) { console.warn( - `[Context] ⚠️ Retrieved maximum limit of 100k chunks for ${codebasePath}. ` + - `Actual total may be higher. Stats may be incomplete.`, + `[Context] ⚠️ Retrieved maximum limit of 100k chunks for ${codebasePath}. ` + + `Actual total may be higher. Stats may be incomplete.`, ) } @@ -659,11 +659,11 @@ export class Context { // Log with full context for debugging console.error( - `[Context] ❌ Failed to retrieve collection stats\n` + - ` Codebase: ${codebasePath}\n` + - ` Collection: ${collectionName}\n` + - ` Database: ${dbType}\n` + - ` Error:`, + `[Context] ❌ Failed to retrieve collection stats\n` + + ` Codebase: ${codebasePath}\n` + + ` Collection: ${collectionName}\n` + + ` Database: ${dbType}\n` + + ` Error:`, error, ) @@ -673,9 +673,9 @@ export class Context { // Known recoverable errors that should return null if ( - errorMsg.includes('collection not loaded') || - errorMsg.includes('collection not exist') || - errorMsg.includes('Failed to query') + errorMsg.includes('collection not loaded') + || errorMsg.includes('collection not exist') + || errorMsg.includes('Failed to query') ) { console.warn(`[Context] ⚠️ Collection exists but query failed (recoverable): ${errorMsg}`) return null diff --git a/packages/core/src/splitter/ast-splitter.ts b/packages/core/src/splitter/ast-splitter.ts index a1d34b2..e530182 100644 --- a/packages/core/src/splitter/ast-splitter.ts +++ b/packages/core/src/splitter/ast-splitter.ts @@ -1,8 +1,8 @@ import Parser from 'tree-sitter' // Language parsers const JavaScript = require('tree-sitter-javascript') -import { CodeChunk, Splitter } from './index' +import { CodeChunk, Splitter } from './index' import { LangChainCodeSplitter } from './langchain-splitter' const TypeScript = require('tree-sitter-typescript').typescript const CSharp = require('tree-sitter-c-sharp') diff --git a/packages/core/src/vectordb/factory.ts b/packages/core/src/vectordb/factory.ts index 558a7c2..c425c26 100644 --- a/packages/core/src/vectordb/factory.ts +++ b/packages/core/src/vectordb/factory.ts @@ -1,3 +1,4 @@ +import type { FaissConfig } from './faiss-vectordb' import type { MilvusRestfulConfig } from './milvus-restful-vectordb' import type { MilvusConfig } from './milvus-vectordb' import type { QdrantConfig } from './qdrant-vectordb' @@ -6,6 +7,43 @@ import { MilvusRestfulVectorDatabase } from './milvus-restful-vectordb' import { MilvusVectorDatabase } from './milvus-vectordb' import { QdrantVectorDatabase } from './qdrant-vectordb' +// FAISS is optional - may not be available in all environments (e.g., CI without native bindings) +// Use lazy loading to avoid import errors +let FaissVectorDatabase: any +let faissAvailable: boolean | null = null // null = not checked yet +let faissCheckError: string | null = null + +function checkFaissAvailability(): boolean { + if (faissAvailable !== null) { + return faissAvailable + } + + try { + FaissVectorDatabase = require('./faiss-vectordb').FaissVectorDatabase + faissAvailable = true + return true + } + catch (error: any) { + const errorMsg = error.message || String(error) + + // Check if it's a FAISS bindings error (allow FAISS to be unavailable) + if (errorMsg.includes('Could not locate the bindings file') + || errorMsg.includes('faiss-node')) { + faissAvailable = false + faissCheckError = 'FAISS native bindings not available' + console.warn('[VectorDatabaseFactory] FAISS native bindings not available. FAISS support disabled.') + return false + } + + // For other errors (e.g., missing file during tests), also mark as unavailable + // but don't throw to allow tests to run + faissAvailable = false + faissCheckError = errorMsg + console.warn(`[VectorDatabaseFactory] FAISS unavailable: ${errorMsg}`) + return false + } +} + /** * Supported vector database types */ @@ -28,6 +66,13 @@ export enum VectorDatabaseType { * Supports both self-hosted and Qdrant Cloud */ QDRANT_GRPC = 'qdrant-grpc', + + /** + * FAISS local file-based vector database + * Use for local-only deployments with zero configuration + * Ideal for development and small-to-medium codebases + */ + FAISS_LOCAL = 'faiss-local', } /** @@ -37,6 +82,7 @@ export interface VectorDatabaseConfig { [VectorDatabaseType.MILVUS_GRPC]: MilvusConfig [VectorDatabaseType.MILVUS_RESTFUL]: MilvusRestfulConfig [VectorDatabaseType.QDRANT_GRPC]: QdrantConfig + [VectorDatabaseType.FAISS_LOCAL]: FaissConfig } /** @@ -77,6 +123,12 @@ export class VectorDatabaseFactory { * VectorDatabaseType.QDRANT_GRPC, * { address: 'localhost:6334', apiKey: 'xxx' } * ); + * + * // Create FAISS local database + * const faissDb = VectorDatabaseFactory.create( + * VectorDatabaseType.FAISS_LOCAL, + * { storageDir: '~/.context/faiss-indexes' } + * ); * ``` */ static create( @@ -93,6 +145,16 @@ export class VectorDatabaseFactory { case VectorDatabaseType.QDRANT_GRPC: return new QdrantVectorDatabase(config as QdrantConfig) + case VectorDatabaseType.FAISS_LOCAL: + if (!checkFaissAvailability()) { + throw new Error( + `FAISS vector database is not available. ${faissCheckError || 'Native bindings could not be loaded'}. ` + + 'This usually happens in environments without C++ build tools. ' + + 'Please use another vector database type (MILVUS_GRPC, MILVUS_RESTFUL, or QDRANT_GRPC).', + ) + } + return new FaissVectorDatabase(config as FaissConfig) + default: throw new Error(`Unsupported database type: ${type}`) } @@ -100,8 +162,20 @@ export class VectorDatabaseFactory { /** * Get all supported database types + * Note: FAISS may not be available if native bindings are missing */ static getSupportedTypes(): VectorDatabaseType[] { - return Object.values(VectorDatabaseType) + const types = Object.values(VectorDatabaseType) + if (!checkFaissAvailability()) { + return types.filter(t => t !== VectorDatabaseType.FAISS_LOCAL) + } + return types + } + + /** + * Check if FAISS is available in the current environment + */ + static isFaissAvailable(): boolean { + return checkFaissAvailability() } } diff --git a/packages/core/src/vectordb/faiss-vectordb.ts b/packages/core/src/vectordb/faiss-vectordb.ts new file mode 100644 index 0000000..0a720f4 --- /dev/null +++ b/packages/core/src/vectordb/faiss-vectordb.ts @@ -0,0 +1,897 @@ +import * as os from 'node:os' +import * as path from 'node:path' +import { IndexFlatL2 } from 'faiss-node' +import * as fs from 'fs-extra' +import { BaseDatabaseConfig, BaseVectorDatabase } from './base/base-vector-database' +import { BM25Config, SimpleBM25 } from './sparse/simple-bm25' +import { + HybridSearchOptions, + HybridSearchRequest, + HybridSearchResult, + SearchOptions, + VectorDocument, + VectorSearchResult, +} from './types' + +export interface FaissConfig extends BaseDatabaseConfig { + /** + * Storage directory for FAISS indexes + * @default ~/.context/faiss-indexes + */ + storageDir?: string + + /** + * BM25 configuration for sparse vector generation + */ + bm25Config?: BM25Config +} + +interface CollectionMetadata { + name: string + dimension: number + isHybrid: boolean + documentCount: number + createdAt: string +} + +interface DocumentMetadata { + id: string + content: string + relativePath: string + startLine: number + endLine: number + fileExtension: string + metadata: Record +} + +/** + * FAISS Vector Database implementation for local-only deployments + * + * Features: + * - Zero-configuration file-based storage + * - Hybrid search with BM25 sparse vectors + * - RRF (Reciprocal Rank Fusion) reranking + * - Perfect for local development and small-to-medium codebases + * + * Architecture: + * - Dense vectors: Stored in FAISS IndexFlatL2 (L2 distance) + * - Sparse vectors: Generated using SimpleBM25 for keyword matching + * - Hybrid search: Combines both using RRF fusion + * + * Storage structure: + * ~/.context/faiss-indexes/ + * └── {collection_name}/ + * ├── dense.index # FAISS index file + * ├── sparse.json # BM25 model (vocabulary, IDF) + * └── metadata.json # Document metadata + */ +export class FaissVectorDatabase extends BaseVectorDatabase { + private collections: Map + bm25?: SimpleBM25 + }> = new Map() + + constructor(config: FaissConfig) { + // Set storageDir default before calling super(), which triggers initialize() + const configWithDefaults: FaissConfig = { + ...config, + storageDir: config.storageDir || path.join(os.homedir(), '.context', 'faiss-indexes'), + } + super(configWithDefaults) + } + + /** + * Get storage directory (lazily computed from config) + */ + private get storageDir(): string { + return this.config.storageDir! + } + + /** + * Initialize FAISS storage directory + */ + protected async initialize(): Promise { + try { + console.log('[FaissDB] 🔧 Initializing FAISS storage at:', this.storageDir) + await fs.ensureDir(this.storageDir) + console.log('[FaissDB] ✅ FAISS storage initialized') + } + catch (error: any) { + const errorMsg = `Failed to initialize FAISS storage at ${this.storageDir}: ${error.message}` + console.error(`[FaissDB] ❌ ${errorMsg}`) + console.error(`[FaissDB] Error code: ${error.code || 'UNKNOWN'}`) + + if (error.code === 'EACCES') { + throw new Error(`${errorMsg}\nPermission denied. Check directory permissions.`) + } + else if (error.code === 'ENOSPC') { + throw new Error(`${errorMsg}\nDisk space exhausted. Free up disk space and try again.`) + } + else if (error.code === 'ENOENT') { + throw new Error(`${errorMsg}\nParent directory does not exist.`) + } + else { + throw new Error(errorMsg) + } + } + } + + /** + * FAISS indexes are loaded on-demand when accessed + */ + protected async ensureLoaded(collectionName: string): Promise { + if (this.collections.has(collectionName)) { + return + } + + const collectionPath = this.getCollectionPath(collectionName) + if (!(await fs.pathExists(collectionPath))) { + throw new Error(`Collection ${collectionName} does not exist`) + } + + await this.loadCollection(collectionName) + } + + /** + * Get collection storage path + */ + private getCollectionPath(collectionName: string): string { + return path.join(this.storageDir, collectionName) + } + + /** + * Load collection from disk + */ + private async loadCollection(collectionName: string): Promise { + const collectionPath = this.getCollectionPath(collectionName) + + console.log('[FaissDB] 📂 Loading collection:', collectionName) + + try { + // Load metadata + const metadataPath = path.join(collectionPath, 'metadata.json') + let metadata: CollectionMetadata + try { + metadata = await fs.readJson(metadataPath) + } + catch (error: any) { + throw new Error( + `Failed to load collection metadata from ${metadataPath}: ${error.message}. ` + + `The metadata file may be corrupted. Try re-indexing the collection.`, + ) + } + + // Load FAISS index + const indexPath = path.join(collectionPath, 'dense.index') + let index: IndexFlatL2 + try { + index = IndexFlatL2.read(indexPath) + } + catch (error: any) { + throw new Error( + `Failed to load FAISS index from ${indexPath}: ${error.message}. ` + + `The index file may be corrupted. Try re-indexing the collection.`, + ) + } + + // Load documents + const documentsPath = path.join(collectionPath, 'documents.json') + let documentsArray: DocumentMetadata[] + try { + documentsArray = await fs.readJson(documentsPath) + } + catch (error: any) { + throw new Error( + `Failed to load documents metadata from ${documentsPath}: ${error.message}. ` + + `The documents file may be corrupted. Try re-indexing the collection.`, + ) + } + const documents = new Map(documentsArray.map((doc) => [doc.id, doc])) + + // Load BM25 model if hybrid collection + let bm25: SimpleBM25 | undefined + if (metadata.isHybrid) { + const bm25Path = path.join(collectionPath, 'sparse.json') + try { + const bm25Json = await fs.readFile(bm25Path, 'utf-8') + bm25 = SimpleBM25.fromJSON(bm25Json) + } + catch (error: any) { + throw new Error( + `Failed to load BM25 model from ${bm25Path}: ${error.message}. ` + + `The BM25 file may be corrupted. Try re-indexing the collection.`, + ) + } + } + + this.collections.set(collectionName, { + index, + metadata, + documents, + bm25, + }) + + console.log('[FaissDB] ✅ Loaded collection:', collectionName) + console.log('[FaissDB] 📊 Document count:', documents.size) + } + catch (error: any) { + console.error(`[FaissDB] ❌ Failed to load collection ${collectionName}:`, error.message) + throw error + } + } + + /** + * Save collection to disk + */ + private async saveCollection(collectionName: string): Promise { + const collection = this.collections.get(collectionName) + if (!collection) { + throw new Error(`Collection ${collectionName} not found in memory`) + } + + const collectionPath = this.getCollectionPath(collectionName) + + try { + await fs.ensureDir(collectionPath) + } + catch (error: any) { + const errorMsg = `Failed to create collection directory ${collectionPath}: ${error.message}` + console.error(`[FaissDB] ❌ ${errorMsg}`) + throw new Error(errorMsg) + } + + try { + // Save FAISS index + const indexPath = path.join(collectionPath, 'dense.index') + try { + collection.index.write(indexPath) + } + catch (error: any) { + throw new Error(`Failed to write FAISS index to ${indexPath}: ${error.message}`) + } + + // Save metadata + const metadataPath = path.join(collectionPath, 'metadata.json') + try { + await fs.writeJson(metadataPath, collection.metadata, { spaces: 2 }) + } + catch (error: any) { + throw new Error(`Failed to write metadata to ${metadataPath}: ${error.message}`) + } + + // Save documents + const documentsPath = path.join(collectionPath, 'documents.json') + const documentsArray = Array.from(collection.documents.values()) + try { + await fs.writeJson(documentsPath, documentsArray, { spaces: 2 }) + } + catch (error: any) { + throw new Error(`Failed to write documents to ${documentsPath}: ${error.message}`) + } + + // Save BM25 model if hybrid collection + if (collection.bm25 && collection.metadata.isHybrid) { + const bm25Path = path.join(collectionPath, 'sparse.json') + try { + const bm25Json = collection.bm25.toJSON() + await fs.writeFile(bm25Path, bm25Json, 'utf-8') + } + catch (error: any) { + throw new Error(`Failed to write BM25 model to ${bm25Path}: ${error.message}`) + } + } + + console.log('[FaissDB] 💾 Saved collection:', collectionName) + } + catch (error: any) { + console.error(`[FaissDB] ❌ Failed to save collection ${collectionName}:`, error.message) + console.error(`[FaissDB] Collection may be in an inconsistent state. Consider re-indexing.`) + throw error + } + } + + /** + * Create collection with dense vectors only + */ + async createCollection(collectionName: string, dimension: number, description?: string): Promise { + await this.ensureInitialized() + + if (this.collections.has(collectionName)) { + throw new Error(`Collection ${collectionName} already exists`) + } + + const collectionPath = this.getCollectionPath(collectionName) + if (await fs.pathExists(collectionPath)) { + throw new Error(`Collection ${collectionName} already exists on disk`) + } + + console.log('[FaissDB] 🔧 Creating collection:', collectionName) + console.log('[FaissDB] 📏 Vector dimension:', dimension) + + // Create FAISS index + const index = new IndexFlatL2(dimension) + + // Create metadata + const metadata: CollectionMetadata = { + name: collectionName, + dimension, + isHybrid: false, + documentCount: 0, + createdAt: new Date().toISOString(), + } + + this.collections.set(collectionName, { + index, + metadata, + documents: new Map(), + }) + + await this.saveCollection(collectionName) + console.log('[FaissDB] ✅ Collection created:', collectionName) + } + + /** + * Create collection with hybrid search support (dense + sparse vectors) + */ + async createHybridCollection(collectionName: string, dimension: number, description?: string): Promise { + await this.ensureInitialized() + + if (this.collections.has(collectionName)) { + throw new Error(`Collection ${collectionName} already exists`) + } + + const collectionPath = this.getCollectionPath(collectionName) + if (await fs.pathExists(collectionPath)) { + throw new Error(`Collection ${collectionName} already exists on disk`) + } + + console.log('[FaissDB] 🔧 Creating hybrid collection:', collectionName) + console.log('[FaissDB] 📏 Vector dimension:', dimension) + + // Create FAISS index + const index = new IndexFlatL2(dimension) + + // Create BM25 generator + const bm25 = new SimpleBM25(this.config.bm25Config) + + // Create metadata + const metadata: CollectionMetadata = { + name: collectionName, + dimension, + isHybrid: true, + documentCount: 0, + createdAt: new Date().toISOString(), + } + + this.collections.set(collectionName, { + index, + metadata, + documents: new Map(), + bm25, + }) + + await this.saveCollection(collectionName) + console.log('[FaissDB] ✅ Hybrid collection created:', collectionName) + } + + /** + * Drop collection + */ + async dropCollection(collectionName: string): Promise { + await this.ensureInitialized() + + console.log('[FaissDB] 🗑️ Dropping collection:', collectionName) + + // Remove from memory + this.collections.delete(collectionName) + + // Remove from disk + const collectionPath = this.getCollectionPath(collectionName) + if (await fs.pathExists(collectionPath)) { + await fs.remove(collectionPath) + } + + console.log('[FaissDB] ✅ Collection dropped:', collectionName) + } + + /** + * Check if collection exists + */ + async hasCollection(collectionName: string): Promise { + await this.ensureInitialized() + + // Check memory first + if (this.collections.has(collectionName)) { + return true + } + + // Check disk + const collectionPath = this.getCollectionPath(collectionName) + return await fs.pathExists(collectionPath) + } + + /** + * List all collections + */ + async listCollections(): Promise { + await this.ensureInitialized() + + const collections: string[] = [] + + // Read from storage directory + if (await fs.pathExists(this.storageDir)) { + const entries = await fs.readdir(this.storageDir, { withFileTypes: true }) + for (const entry of entries) { + if (entry.isDirectory()) { + collections.push(entry.name) + } + } + } + + return collections + } + + /** + * Insert vector documents (dense only) + */ + async insert(collectionName: string, documents: VectorDocument[]): Promise { + await this.ensureInitialized() + await this.ensureLoaded(collectionName) + + const collection = this.collections.get(collectionName) + if (!collection) { + throw new Error(`Collection ${collectionName} not found`) + } + + console.log('[FaissDB] 📝 Inserting documents:', documents.length) + + // Validate vector dimensions + const expectedDim = collection.metadata.dimension + for (const doc of documents) { + if (doc.vector.length !== expectedDim) { + throw new Error( + `Vector dimension mismatch for document '${doc.id}': ` + + `expected ${expectedDim}, got ${doc.vector.length}`, + ) + } + } + + // Add vectors to FAISS index one at a time + documents.forEach((doc) => { + collection.index.add(doc.vector) + }) + + // Store document metadata + documents.forEach((doc) => { + collection.documents.set(doc.id, { + id: doc.id, + content: doc.content, + relativePath: doc.relativePath, + startLine: doc.startLine, + endLine: doc.endLine, + fileExtension: doc.fileExtension, + metadata: doc.metadata, + }) + }) + + // Update metadata + collection.metadata.documentCount = collection.documents.size + + await this.saveCollection(collectionName) + console.log('[FaissDB] ✅ Inserted documents:', documents.length) + } + + /** + * Insert hybrid vector documents (dense + sparse) + */ + async insertHybrid(collectionName: string, documents: VectorDocument[]): Promise { + await this.ensureInitialized() + await this.ensureLoaded(collectionName) + + const collection = this.collections.get(collectionName) + if (!collection) { + throw new Error(`Collection ${collectionName} not found`) + } + + if (!collection.metadata.isHybrid || !collection.bm25) { + throw new Error(`Collection ${collectionName} is not a hybrid collection`) + } + + console.log('[FaissDB] 📝 Inserting hybrid documents:', documents.length) + + // Validate vector dimensions + const expectedDim = collection.metadata.dimension + for (const doc of documents) { + if (doc.vector.length !== expectedDim) { + throw new Error( + `Vector dimension mismatch for document '${doc.id}': ` + + `expected ${expectedDim}, got ${doc.vector.length}`, + ) + } + } + + // Train BM25 on all documents (including new ones) + const allDocuments = [...collection.documents.values(), ...documents] + const allContents = allDocuments.map((doc) => doc.content) + collection.bm25.learn(allContents) + + // Add vectors to FAISS index one at a time + documents.forEach((doc) => { + collection.index.add(doc.vector) + }) + + // Store document metadata + documents.forEach((doc) => { + collection.documents.set(doc.id, { + id: doc.id, + content: doc.content, + relativePath: doc.relativePath, + startLine: doc.startLine, + endLine: doc.endLine, + fileExtension: doc.fileExtension, + metadata: doc.metadata, + }) + }) + + // Update metadata + collection.metadata.documentCount = collection.documents.size + + await this.saveCollection(collectionName) + console.log('[FaissDB] ✅ Inserted hybrid documents:', documents.length) + } + + /** + * Search similar vectors (dense search only) + */ + async search(collectionName: string, queryVector: number[], options?: SearchOptions): Promise { + await this.ensureInitialized() + await this.ensureLoaded(collectionName) + + const collection = this.collections.get(collectionName) + if (!collection) { + throw new Error(`Collection ${collectionName} not found`) + } + + // FAISS requires topK <= ntotal (number of vectors in index) + const ntotal = collection.index.ntotal() + if (ntotal === 0) { + console.log('[FaissDB] 🔍 Empty collection, returning no results') + return [] + } + + const requestedTopK = options?.topK || 10 + const topK = Math.min(requestedTopK, ntotal) + + console.log('[FaissDB] 🔍 Searching vectors, topK:', topK, '(requested:', requestedTopK, ', ntotal:', ntotal, ')') + + // Search FAISS index + const results = collection.index.search(queryVector, topK) + + // Convert to VectorSearchResult + const searchResults: VectorSearchResult[] = [] + const documentsArray = Array.from(collection.documents.values()) + + for (let i = 0; i < results.labels.length; i++) { + const idx = results.labels[i] + const distance = results.distances[i] + + if (idx >= 0 && idx < documentsArray.length) { + const doc = documentsArray[idx] + + // Convert L2 distance to cosine similarity score + // Lower distance = higher similarity + const score = 1 / (1 + distance) + + // Apply threshold filter if specified + if (options?.threshold !== undefined && score < options.threshold) { + continue + } + + searchResults.push({ + document: { + id: doc.id, + vector: [], // Vector not needed in results + content: doc.content, + relativePath: doc.relativePath, + startLine: doc.startLine, + endLine: doc.endLine, + fileExtension: doc.fileExtension, + metadata: doc.metadata, + }, + score, + }) + } + } + + console.log('[FaissDB] ✅ Found results:', searchResults.length) + return searchResults + } + + /** + * Hybrid search with multiple vector fields (dense + sparse) + */ + async hybridSearch( + collectionName: string, + searchRequests: HybridSearchRequest[], + options?: HybridSearchOptions, + ): Promise { + await this.ensureInitialized() + await this.ensureLoaded(collectionName) + + const collection = this.collections.get(collectionName) + if (!collection) { + throw new Error(`Collection ${collectionName} not found`) + } + + if (!collection.metadata.isHybrid || !collection.bm25) { + throw new Error(`Collection ${collectionName} is not a hybrid collection`) + } + + const limit = options?.limit || 10 + + console.log('[FaissDB] 🔍 Hybrid search, requests:', searchRequests.length) + + // FAISS requires topK <= ntotal + const ntotal = collection.index.ntotal() + + // Separate dense and sparse search requests + const denseResults: Map = new Map() + const sparseResults: Map = new Map() + + for (const request of searchRequests) { + if (request.anns_field === 'vector' || request.anns_field === 'dense') { + // Dense search + if (ntotal === 0) { + continue // Skip dense search on empty index + } + + const queryVector = request.data as number[] + const topK = Math.min(limit * 2, ntotal) + const results = collection.index.search(queryVector, topK) + + const documentsArray = Array.from(collection.documents.values()) + for (let i = 0; i < results.labels.length; i++) { + const idx = results.labels[i] + const distance = results.distances[i] + + if (idx >= 0 && idx < documentsArray.length) { + const doc = documentsArray[idx] + const score = 1 / (1 + distance) + denseResults.set(doc.id, score) + } + } + } + else if (request.anns_field === 'sparse' || request.anns_field === 'sparse_vector') { + // Sparse search using BM25 + const queryText = request.data as string + + // Score all documents + const documentsArray = Array.from(collection.documents.values()) + for (const doc of documentsArray) { + const sparseVector = collection.bm25.generate(doc.content) + const queryVector = collection.bm25.generate(queryText) + + // Calculate dot product of sparse vectors + let score = 0 + const queryMap = new Map() + for (let i = 0; i < queryVector.indices.length; i++) { + queryMap.set(queryVector.indices[i], queryVector.values[i]) + } + + for (let i = 0; i < sparseVector.indices.length; i++) { + const idx = sparseVector.indices[i] + const val = sparseVector.values[i] + const queryVal = queryMap.get(idx) + if (queryVal !== undefined) { + score += val * queryVal + } + } + + if (score > 0) { + sparseResults.set(doc.id, score) + } + } + } + } + + // Apply RRF (Reciprocal Rank Fusion) reranking + const rrfResults = this.applyRRF(collectionName, denseResults, sparseResults, options) + + console.log('[FaissDB] ✅ Hybrid search results:', rrfResults.length) + return rrfResults.slice(0, limit) + } + + /** + * Apply Reciprocal Rank Fusion (RRF) reranking + */ + private applyRRF( + collectionName: string, + denseResults: Map, + sparseResults: Map, + options?: HybridSearchOptions, + ): HybridSearchResult[] { + const k = options?.rerank?.params?.k || 60 + const collection = this.collections.get(collectionName) + if (!collection) { + throw new Error(`Collection ${collectionName} not found`) + } + + // Combine all document IDs + const allDocIds = new Set([...denseResults.keys(), ...sparseResults.keys()]) + + // Calculate RRF scores + const rrfScores = new Map() + + for (const docId of allDocIds) { + let rrfScore = 0 + + // Add dense rank contribution + const denseScore = denseResults.get(docId) + if (denseScore !== undefined) { + // Convert score to rank (higher score = lower rank number) + const denseRank = Array.from(denseResults.entries()) + .sort((a, b) => b[1] - a[1]) + .findIndex(([id]) => id === docId) + 1 + rrfScore += 1 / (k + denseRank) + } + + // Add sparse rank contribution + const sparseScore = sparseResults.get(docId) + if (sparseScore !== undefined) { + const sparseRank = Array.from(sparseResults.entries()) + .sort((a, b) => b[1] - a[1]) + .findIndex(([id]) => id === docId) + 1 + rrfScore += 1 / (k + sparseRank) + } + + rrfScores.set(docId, rrfScore) + } + + // Sort by RRF score and convert to results + const sortedResults = Array.from(rrfScores.entries()) + .sort((a, b) => b[1] - a[1]) + + const results: HybridSearchResult[] = [] + for (const [docId, score] of sortedResults) { + const doc = collection.documents.get(docId) + if (doc) { + results.push({ + document: { + id: doc.id, + vector: [], + content: doc.content, + relativePath: doc.relativePath, + startLine: doc.startLine, + endLine: doc.endLine, + fileExtension: doc.fileExtension, + metadata: doc.metadata, + }, + score, + }) + } + } + + return results + } + + /** + * Delete documents by IDs + * + * ⚠️ NOT IMPLEMENTED: FAISS does not support document deletion + * + * The FAISS IndexFlatL2 library does not provide a way to remove vectors + * from an existing index. To fully remove documents, you must: + * + * 1. Drop the collection using dropCollection() + * 2. Recreate it using createCollection() or createHybridCollection() + * 3. Re-insert all documents except the ones you want to delete + * + * @throws Error Always throws - deletion is not supported + * @param collectionName Collection name + * @param ids Document IDs to delete (not used) + */ + async delete(collectionName: string, ids: string[]): Promise { + await this.ensureInitialized() + await this.ensureLoaded(collectionName) + + const collection = this.collections.get(collectionName) + if (!collection) { + throw new Error(`Collection ${collectionName} not found`) + } + + console.error(`[FaissDB] ❌ FAISS does not support document deletion`) + console.error(`[FaissDB] ❌ Attempted to delete ${ids.length} document(s) from collection '${collectionName}'`) + + throw new Error( + `FAISS does not support document deletion. ` + + `To remove documents from collection '${collectionName}', you must:\n` + + ` 1. Drop the collection using dropCollection()\n` + + ` 2. Recreate it using createCollection() or createHybridCollection()\n` + + ` 3. Re-insert all documents except the ones you want to delete\n\n` + + `Attempted to delete document IDs: ${ids.join(', ')}`, + ) + } + + /** + * Query documents with filter conditions + * + * ⚠️ LIMITATION: Filter parameter is currently ignored + * + * This method returns ALL documents in the collection (up to limit), + * not filtered results. Filter parsing is not yet implemented for FAISS. + * + * @param collectionName Collection name + * @param filter Filter expression (currently ignored - returns all documents) + * @param outputFields Fields to return in results + * @param limit Maximum number of results (only limit is enforced) + * @returns All documents with specified fields (up to limit) + */ + async query( + collectionName: string, + filter: string, + outputFields: string[], + limit?: number, + ): Promise[]> { + await this.ensureInitialized() + await this.ensureLoaded(collectionName) + + const collection = this.collections.get(collectionName) + if (!collection) { + throw new Error(`Collection ${collectionName} not found`) + } + + if (filter && filter.trim() !== '') { + console.warn(`[FaissDB] ⚠️ Query filters are not implemented. Filter '${filter}' will be ignored.`) + console.warn(`[FaissDB] ⚠️ All documents will be returned (up to limit). Consider using another vector database if filtering is required.`) + } + + console.log('[FaissDB] 🔍 Querying documents (no filter support)') + + const results: Record[] = [] + + for (const doc of collection.documents.values()) { + // For now, we'll return all documents since filter parsing is complex + const result: Record = {} + for (const field of outputFields) { + if (field === 'id') { + result.id = doc.id + } + else if (field === 'content') { + result.content = doc.content + } + else if (field === 'relativePath') { + result.relativePath = doc.relativePath + } + else if (field === 'startLine') { + result.startLine = doc.startLine + } + else if (field === 'endLine') { + result.endLine = doc.endLine + } + else if (field === 'fileExtension') { + result.fileExtension = doc.fileExtension + } + else if (doc.metadata[field] !== undefined) { + result[field] = doc.metadata[field] + } + } + results.push(result) + + if (limit && results.length >= limit) { + break + } + } + + return results + } + + /** + * Check collection limit + * FAISS has no inherent collection limit (only limited by disk space) + */ + async checkCollectionLimit(): Promise { + return true + } +} diff --git a/packages/core/src/vectordb/index.ts b/packages/core/src/vectordb/index.ts index dd4cc7f..b8e9513 100644 --- a/packages/core/src/vectordb/index.ts +++ b/packages/core/src/vectordb/index.ts @@ -6,10 +6,32 @@ export { VectorDatabaseFactory, VectorDatabaseType } from './factory' export type { VectorDatabaseConfig } from './factory' // Implementation class exports +export type { FaissConfig } from './faiss-vectordb' export { MilvusRestfulConfig, MilvusRestfulVectorDatabase } from './milvus-restful-vectordb' export { MilvusConfig, MilvusVectorDatabase } from './milvus-vectordb' export { QdrantConfig, QdrantVectorDatabase } from './qdrant-vectordb' + +// FAISS is conditionally exported (may not be available without native bindings) +// Use VectorDatabaseFactory to check availability: VectorDatabaseFactory.isFaissAvailable() +try { + const { FaissVectorDatabase: FaissDB } = require('./faiss-vectordb') + // Re-export if successfully loaded + module.exports.FaissVectorDatabase = FaissDB +} +catch (error: any) { + const errorMsg = error.message || String(error) + // Allow FAISS to be unavailable (bindings or module not found) + if (errorMsg.includes('Could not locate the bindings file') + || errorMsg.includes('faiss-node') + || errorMsg.includes('Cannot find module')) { + // FAISS not available, don't export it + console.warn('[vectordb/index] FAISS not available - FaissVectorDatabase not exported') + } + else { + throw error // Re-throw unexpected errors + } +} // Sparse vector exports export { BM25Config, SimpleBM25 } from './sparse/simple-bm25' export { SparseVectorGenerator } from './sparse/sparse-vector-generator' diff --git a/packages/core/src/vectordb/sparse/simple-bm25.ts b/packages/core/src/vectordb/sparse/simple-bm25.ts index 4cf7ac1..c261e26 100644 --- a/packages/core/src/vectordb/sparse/simple-bm25.ts +++ b/packages/core/src/vectordb/sparse/simple-bm25.ts @@ -299,12 +299,9 @@ export class SimpleBM25 implements SparseVectorGenerator { /** * Serialize the BM25 model to JSON * Exports the trained state including vocabulary, IDF scores, and avgDocLength + * Can serialize untrained models (for empty hybrid collections) */ toJSON(): string { - if (!this.trained) { - throw new Error('Cannot serialize untrained BM25 model') - } - return JSON.stringify({ k1: this.k1, b: this.b, diff --git a/packages/core/test/vectordb/factory.test.ts b/packages/core/test/vectordb/factory.test.ts index 7206f16..d90629c 100644 --- a/packages/core/test/vectordb/factory.test.ts +++ b/packages/core/test/vectordb/factory.test.ts @@ -7,6 +7,7 @@ import { VectorDatabaseFactory, VectorDatabaseType, } from '../../src/vectordb/factory' +import { FaissVectorDatabase } from '../../src/vectordb/faiss-vectordb' import { MilvusRestfulVectorDatabase } from '../../src/vectordb/milvus-restful-vectordb' import { MilvusVectorDatabase } from '../../src/vectordb/milvus-vectordb' import { QdrantVectorDatabase } from '../../src/vectordb/qdrant-vectordb' @@ -57,6 +58,27 @@ describe('vectorDatabaseFactory', () => { expect(db).toHaveProperty('hybridSearch') }) + it('should create FaissVectorDatabase with FAISS_LOCAL type', () => { + // Skip if FAISS bindings not available + if (!VectorDatabaseFactory.isFaissAvailable()) { + console.log('⏭️ Skipping FAISS test (native bindings not available)') + return + } + + const db = VectorDatabaseFactory.create( + VectorDatabaseType.FAISS_LOCAL, + { + storageDir: '/tmp/faiss-test', + }, + ) + + expect(db).toBeInstanceOf(FaissVectorDatabase) + expect(db).toHaveProperty('createCollection') + expect(db).toHaveProperty('createHybridCollection') + expect(db).toHaveProperty('search') + expect(db).toHaveProperty('hybridSearch') + }) + it('should pass correct config to MilvusVectorDatabase', () => { const config = { address: 'localhost:19530', diff --git a/packages/core/test/vectordb/faiss-vectordb.test.ts b/packages/core/test/vectordb/faiss-vectordb.test.ts new file mode 100644 index 0000000..2de9061 --- /dev/null +++ b/packages/core/test/vectordb/faiss-vectordb.test.ts @@ -0,0 +1,262 @@ +import type { VectorDocument } from '../../src/types' +import * as os from 'node:os' +import * as path from 'node:path' +import * as fs from 'fs-extra' +import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { FaissVectorDatabase } from '../../src/vectordb/faiss-vectordb' + +describe('faissVectorDatabase', () => { + let faissDb: FaissVectorDatabase + let tempDir: string + + beforeEach(async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'faiss-test-')) + faissDb = new FaissVectorDatabase({ storageDir: tempDir }) + }) + + afterEach(async () => { + await fs.remove(tempDir) + }) + + describe('initialization', () => { + it('should initialize storage directory', async () => { + await (faissDb as any).initialize() + expect(await fs.pathExists(tempDir)).toBe(true) + }) + + it('should throw error with invalid storage directory permissions', async () => { + const readOnlyDb = new FaissVectorDatabase({ storageDir: '/root/faiss-test-readonly' }) + // Initialize is called in constructor, so we need to wait for it to reject + await expect((readOnlyDb as any).initializationPromise).rejects.toThrow(/Failed to initialize/) + }) + }) + + describe('createCollection', () => { + it('should create a dense-only collection', async () => { + await faissDb.createCollection('test', 128) + + expect(await faissDb.hasCollection('test')).toBe(true) + const collections = await faissDb.listCollections() + expect(collections).toContain('test') + }) + + it('should create a hybrid collection with BM25', async () => { + await faissDb.createHybridCollection('hybrid-test', 128) + + expect(await faissDb.hasCollection('hybrid-test')).toBe(true) + const collections = await faissDb.listCollections() + expect(collections).toContain('hybrid-test') + }) + + it('should throw error when creating duplicate collection', async () => { + await faissDb.createCollection('test', 128) + await expect(faissDb.createCollection('test', 128)).rejects.toThrow(/already exists/) + }) + }) + + describe('insert and search', () => { + const testDocs: VectorDocument[] = [ + { + id: 'doc1', + vector: Array.from({ length: 128 }).fill(0).map((_, i) => i === 0 ? 1.0 : 0.0), + content: 'First document about testing', + relativePath: 'test1.ts', + startLine: 1, + endLine: 10, + fileExtension: '.ts', + metadata: {}, + }, + { + id: 'doc2', + vector: Array.from({ length: 128 }).fill(0).map((_, i) => i === 1 ? 1.0 : 0.0), + content: 'Second document about implementation', + relativePath: 'test2.ts', + startLine: 1, + endLine: 10, + fileExtension: '.ts', + metadata: {}, + }, + ] + + it('should insert and search documents', async () => { + await faissDb.createCollection('test', 128) + await faissDb.insert('test', testDocs) + + const queryVector = Array.from({ length: 128 }).fill(0).map((_, i) => i === 0 ? 1.0 : 0.0) + const results = await faissDb.search('test', queryVector, { topK: 5 }) + + expect(results).toHaveLength(2) + expect(results[0].document.id).toBe('doc1') + expect(results[0].score).toBeGreaterThan(0) + }) + + it('should return empty array for empty collection', async () => { + await faissDb.createCollection('empty', 128) + const queryVector = Array.from({ length: 128 }).fill(0.1) + const results = await faissDb.search('empty', queryVector) + + expect(results).toEqual([]) + expect(results).toBeInstanceOf(Array) + }) + + it('should handle dimension mismatch gracefully', async () => { + await faissDb.createCollection('test', 128) + + const wrongDimDoc: VectorDocument = { + id: 'wrong', + vector: Array.from({ length: 256 }).fill(0.1), // Wrong dimension! + content: 'test', + relativePath: 'test.ts', + startLine: 1, + endLine: 1, + fileExtension: '.ts', + metadata: {}, + } + + // FAISS will throw when adding wrong dimension vector + await expect(faissDb.insert('test', [wrongDimDoc])).rejects.toThrow() + }) + }) + + describe('persistence', () => { + const testDoc: VectorDocument = { + id: 'persist-test', + vector: Array.from({ length: 128 }).fill(0.1), + content: 'persistence test', + relativePath: 'test.ts', + startLine: 1, + endLine: 10, + fileExtension: '.ts', + metadata: {}, + } + + it('should persist and reload collection', async () => { + // Create and save + await faissDb.createCollection('persist', 128) + await faissDb.insert('persist', [testDoc]) + + // Force unload from memory + ;(faissDb as any).collections.delete('persist') + + // Reload + const queryVector = Array.from({ length: 128 }).fill(0.1) + const results = await faissDb.search('persist', queryVector) + + expect(results).toHaveLength(1) + expect(results[0].document.id).toBe('persist-test') + }) + + it('should handle corrupt metadata file gracefully', async () => { + await faissDb.createCollection('corrupt', 128) + await faissDb.insert('corrupt', [testDoc]) + + // Corrupt metadata file + const metadataPath = path.join(tempDir, 'corrupt', 'metadata.json') + await fs.writeFile(metadataPath, 'CORRUPTED_JSON{') + + // Force unload and reload + ;(faissDb as any).collections.delete('corrupt') + + await expect((faissDb as any).loadCollection('corrupt')) + .rejects + .toThrow(/Failed to load collection metadata/) + }) + }) + + describe('hybrid search', () => { + it('should perform hybrid search with BM25', async () => { + await faissDb.createHybridCollection('hybrid', 128) + + const docs: VectorDocument[] = [ + { + id: 'doc1', + vector: Array.from({ length: 128 }).fill(0).map((_, i) => i === 0 ? 1.0 : 0.0), + content: 'machine learning algorithms', + relativePath: 'ml.ts', + startLine: 1, + endLine: 10, + fileExtension: '.ts', + metadata: {}, + }, + { + id: 'doc2', + vector: Array.from({ length: 128 }).fill(0).map((_, i) => i === 1 ? 1.0 : 0.0), + content: 'neural network implementation', + relativePath: 'nn.ts', + startLine: 1, + endLine: 10, + fileExtension: '.ts', + metadata: {}, + }, + ] + + await faissDb.insertHybrid('hybrid', docs) + + const results = await faissDb.hybridSearch('hybrid', [ + { anns_field: 'dense', data: Array.from({ length: 128 }).fill(0).map((_, i) => i === 0 ? 1.0 : 0.0), limit: 10 }, + { anns_field: 'sparse', data: 'machine learning', limit: 10 }, + ]) + + expect(results.length).toBeGreaterThan(0) + expect(results[0].document.content).toContain('machine') + }) + }) + + describe('delete operation', () => { + it('should throw error when attempting to delete', async () => { + await faissDb.createCollection('test', 128) + const doc: VectorDocument = { + id: 'delete-me', + vector: Array.from({ length: 128 }).fill(0.1), + content: 'test', + relativePath: 'test.ts', + startLine: 1, + endLine: 1, + fileExtension: '.ts', + metadata: {}, + } + await faissDb.insert('test', [doc]) + + await expect(faissDb.delete('test', ['delete-me'])) + .rejects + .toThrow(/FAISS does not support document deletion/) + }) + }) + + describe('query operation', () => { + it('should warn when filter is provided', async () => { + await faissDb.createCollection('test', 128) + const doc: VectorDocument = { + id: 'query-test', + vector: Array.from({ length: 128 }).fill(0.1), + content: 'test', + relativePath: 'test.ts', + startLine: 1, + endLine: 1, + fileExtension: '.ts', + metadata: {}, + } + await faissDb.insert('test', [doc]) + + // Should not throw, but should warn + const results = await faissDb.query('test', 'some_field = "value"', ['id', 'content']) + + expect(results).toHaveLength(1) + expect(results[0].id).toBe('query-test') + }) + }) + + describe('dropCollection', () => { + it('should remove all files when dropping collection', async () => { + await faissDb.createCollection('drop-test', 128) + const collectionPath = path.join(tempDir, 'drop-test') + + expect(await fs.pathExists(collectionPath)).toBe(true) + + await faissDb.dropCollection('drop-test') + + expect(await fs.pathExists(collectionPath)).toBe(false) + expect(await faissDb.hasCollection('drop-test')).toBe(false) + }) + }) +}) diff --git a/packages/mcp/src/config.ts b/packages/mcp/src/config.ts index 325f056..e0bed16 100644 --- a/packages/mcp/src/config.ts +++ b/packages/mcp/src/config.ts @@ -16,7 +16,7 @@ export interface ContextMcpConfig { ollamaModel?: string ollamaHost?: string // Vector database configuration - vectorDbType?: 'milvus' | 'qdrant' // Vector database type (default: milvus) + vectorDbType?: 'milvus' | 'qdrant' | 'faiss' | 'faiss-local' // Vector database type (default: faiss for local, milvus for remote) milvusAddress?: string // Optional, can be auto-resolved from token milvusToken?: string qdrantUrl?: string // Qdrant URL (e.g., http://localhost:6333 or cloud URL) diff --git a/packages/mcp/src/handlers.ts b/packages/mcp/src/handlers.ts index 22c3500..d21315d 100644 --- a/packages/mcp/src/handlers.ts +++ b/packages/mcp/src/handlers.ts @@ -520,16 +520,16 @@ export class ToolHandlers { return { content: [{ type: 'text', - text: `Error: Collection exists for '${absolutePath}' but statistics could not be retrieved from the vector database.\n\n` + - `This may indicate:\n` + - ` - Collection is not loaded or in an invalid state\n` + - ` - Vector database connectivity issues\n` + - `\n` + - `Recommended actions:\n` + - ` 1. Try searching again in a moment\n` + - ` 2. If the problem persists, re-index the codebase:\n` + - ` index_codebase(path='${absolutePath}', force=true)\n` + - ` 3. Check your vector database connection settings`, + text: `Error: Collection exists for '${absolutePath}' but statistics could not be retrieved from the vector database.\n\n` + + `This may indicate:\n` + + ` - Collection is not loaded or in an invalid state\n` + + ` - Vector database connectivity issues\n` + + `\n` + + `Recommended actions:\n` + + ` 1. Try searching again in a moment\n` + + ` 2. If the problem persists, re-index the codebase:\n` + + ` index_codebase(path='${absolutePath}', force=true)\n` + + ` 3. Check your vector database connection settings`, }], isError: true, } @@ -542,18 +542,18 @@ export class ToolHandlers { return { content: [{ type: 'text', - text: `Error: Failed to sync codebase '${absolutePath}' from vector database.\n\n` + - `Error: ${error instanceof Error ? error.message : String(error)}\n\n` + - `This may indicate:\n` + - ` - Network connectivity issues with the vector database\n` + - ` - Authentication or permission problems\n` + - ` - Vector database service unavailable\n` + - `\n` + - `Recommended actions:\n` + - ` 1. Check your MILVUS_ADDRESS and MILVUS_TOKEN settings\n` + - ` 2. Verify network connectivity to the database\n` + - ` 3. Check the logs above for detailed error information\n` + - ` 4. Try re-indexing: index_codebase(path='${absolutePath}', force=true)`, + text: `Error: Failed to sync codebase '${absolutePath}' from vector database.\n\n` + + `Error: ${error instanceof Error ? error.message : String(error)}\n\n` + + `This may indicate:\n` + + ` - Network connectivity issues with the vector database\n` + + ` - Authentication or permission problems\n` + + ` - Vector database service unavailable\n` + + `\n` + + `Recommended actions:\n` + + ` 1. Check your MILVUS_ADDRESS and MILVUS_TOKEN settings\n` + + ` 2. Verify network connectivity to the database\n` + + ` 3. Check the logs above for detailed error information\n` + + ` 4. Try re-indexing: index_codebase(path='${absolutePath}', force=true)`, }], isError: true, } diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 44b2af8..9aaf08e 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -10,7 +10,7 @@ import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js' -import { Context, MilvusVectorDatabase, QdrantVectorDatabase, VectorDatabase } from '@pleaseai/context-please-core' +import { Context, FaissVectorDatabase, MilvusVectorDatabase, QdrantVectorDatabase, VectorDatabase } from '@pleaseai/context-please-core' // Import our modular components import { ContextMcpConfig, createMcpConfig, logConfigurationSummary, showHelpMessage } from './config.js' @@ -59,11 +59,25 @@ class ContextMcpServer { logEmbeddingProviderInfo(config, embedding) // Initialize vector database based on configuration - console.log(`[VECTORDB] Initializing vector database: ${config.vectorDbType || 'milvus'}`) - + // Auto-select FAISS if no external database is configured let vectorDatabase: VectorDatabase - if (config.vectorDbType === 'qdrant') { + const hasExternalDb = config.milvusAddress || config.milvusToken || config.qdrantUrl + + if (!hasExternalDb && !config.vectorDbType) { + // Default to FAISS for zero-config local development + console.log('[VECTORDB] No external vector database configured, using FAISS (local file-based)') + vectorDatabase = new FaissVectorDatabase({ + storageDir: process.env.FAISS_STORAGE_DIR, + }) + } + else if (config.vectorDbType === 'faiss' || config.vectorDbType === 'faiss-local') { + console.log('[VECTORDB] Using FAISS (local file-based)') + vectorDatabase = new FaissVectorDatabase({ + storageDir: process.env.FAISS_STORAGE_DIR, + }) + } + else if (config.vectorDbType === 'qdrant') { // Parse Qdrant URL to get address for gRPC const qdrantUrl = config.qdrantUrl || 'http://localhost:6333' const url = new URL(qdrantUrl.startsWith('http') ? qdrantUrl : `http://${qdrantUrl}`) @@ -86,6 +100,7 @@ class ContextMcpServer { } else { // Default to Milvus + console.log(`[VECTORDB] Using Milvus: ${config.milvusAddress || 'default'}`) vectorDatabase = new MilvusVectorDatabase({ address: config.milvusAddress, ...(config.milvusToken && { token: config.milvusToken }),