From 09ec2db03a116e4ca195f92e5b894bb4d0a8b28e Mon Sep 17 00:00:00 2001 From: spumer Date: Sat, 20 Sep 2025 22:05:39 +0500 Subject: [PATCH 1/4] add llamacpp embedding provider support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add LlamaCppEmbedding class with OpenAI-compatible API - Support for local llama.cpp servers with nomic-embed-code model - Automatic code prefix for improved code search quality - Configurable timeout and dimension auto-detection - Integration with MCP configuration system - Environment variables: LLAMACPP_HOST, LLAMACPP_MODEL, LLAMACPP_TIMEOUT, LLAMACPP_CODE_PREFIX 🤖 Generated with Claude Code Co-Authored-By: Claude --- packages/core/src/embedding/index.ts | 3 +- .../core/src/embedding/llamacpp-embedding.ts | 460 ++++++++++++++++++ packages/mcp/src/config.ts | 49 +- packages/mcp/src/embedding.ts | 26 +- 4 files changed, 529 insertions(+), 9 deletions(-) create mode 100644 packages/core/src/embedding/llamacpp-embedding.ts diff --git a/packages/core/src/embedding/index.ts b/packages/core/src/embedding/index.ts index e6110941..3db35c1e 100644 --- a/packages/core/src/embedding/index.ts +++ b/packages/core/src/embedding/index.ts @@ -5,4 +5,5 @@ export * from './base-embedding'; export * from './openai-embedding'; export * from './voyageai-embedding'; export * from './ollama-embedding'; -export * from './gemini-embedding'; \ No newline at end of file +export * from './gemini-embedding'; +export * from './llamacpp-embedding'; \ No newline at end of file diff --git a/packages/core/src/embedding/llamacpp-embedding.ts b/packages/core/src/embedding/llamacpp-embedding.ts new file mode 100644 index 00000000..98f557e6 --- /dev/null +++ b/packages/core/src/embedding/llamacpp-embedding.ts @@ -0,0 +1,460 @@ +import { Embedding, EmbeddingVector } from './base-embedding'; + +export interface LlamaCppEmbeddingConfig { + host?: string; + model?: string; + codePrefix?: boolean; // Enable automatic code prefix + dimension?: number; // Optional dimension parameter + maxTokens?: number; // Optional max tokens parameter + timeout?: number; // Request timeout in milliseconds +} + +export class LlamaCppConfigurationError extends Error { + constructor(message: string) { + super(`LlamaCpp configuration error: ${message}`); + this.name = 'LlamaCppConfigurationError'; + } +} + +export class LlamaCppNetworkError extends Error { + constructor(message: string, public readonly originalError?: Error) { + super(`LlamaCpp network error: ${message}`); + this.name = 'LlamaCppNetworkError'; + } +} + +export class LlamaCppEmbedding extends Embedding { + private config: LlamaCppEmbeddingConfig; + private dimension: number = 768; // Default dimension + private dimensionDetected: boolean = false; + protected maxTokens: number = 8192; // Default for code models like nomic-embed-code + private host: string; + private codePrefix: string = "Represent this query for searching relevant code:"; + + constructor(config: LlamaCppEmbeddingConfig) { + super(); + + this.validateConfig(config); + + this.config = config; + this.host = this.normalizeHost(config.host || 'http://localhost:8080'); + + // Set dimension if provided + if (config.dimension) { + if (config.dimension <= 0) { + throw new LlamaCppConfigurationError('Dimension must be a positive number'); + } + this.dimension = config.dimension; + this.dimensionDetected = true; + } + + // Set max tokens if provided + if (config.maxTokens) { + if (config.maxTokens <= 0) { + throw new LlamaCppConfigurationError('Max tokens must be a positive number'); + } + this.maxTokens = config.maxTokens; + } + + // Enable code prefix by default for llamacpp (designed for code) + if (config.codePrefix === undefined) { + this.config.codePrefix = true; + } + } + + private validateConfig(config: LlamaCppEmbeddingConfig): void { + if (!config) { + throw new LlamaCppConfigurationError('Configuration object is required'); + } + + if (config.host !== undefined && typeof config.host !== 'string') { + throw new LlamaCppConfigurationError('Host must be a string'); + } + + if (config.model !== undefined && typeof config.model !== 'string') { + throw new LlamaCppConfigurationError('Model must be a string'); + } + + if (config.timeout !== undefined && (typeof config.timeout !== 'number' || config.timeout <= 0)) { + throw new LlamaCppConfigurationError('Timeout must be a positive number'); + } + + if (config.host) { + this.validateHostUrl(config.host); + } + } + + private validateHostUrl(host: string): void { + try { + const url = new URL(host); + if (!['http:', 'https:'].includes(url.protocol)) { + throw new LlamaCppConfigurationError(`Unsupported protocol: ${url.protocol}. Only HTTP and HTTPS are supported`); + } + } catch (error) { + if (error instanceof LlamaCppConfigurationError) { + throw error; + } + throw new LlamaCppConfigurationError(`Invalid host URL: ${host}`); + } + } + + private normalizeHost(host: string): string { + // Remove trailing slash for consistency + return host.replace(/\/$/, ''); + } + + private async makeRequest(url: string, body: any): Promise { + const timeout = this.config.timeout || 30000; // 30s default timeout + + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), timeout); + + try { + console.log(`[LlamaCppEmbedding] Making request to ${url} with timeout ${timeout}ms`); + + const response = await fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify(body), + signal: controller.signal, + }); + + clearTimeout(timeoutId); + + if (!response.ok) { + let errorDetails: string; + try { + errorDetails = await response.text(); + } catch { + errorDetails = 'Unable to read error response'; + } + + const errorMessage = `HTTP ${response.status} (${response.statusText}): ${errorDetails}`; + console.error(`[LlamaCppEmbedding] Request failed: ${errorMessage}`); + throw new LlamaCppNetworkError(errorMessage); + } + + let responseData: any; + try { + responseData = await response.json(); + } catch (parseError) { + const errorMessage = 'Invalid JSON response from server'; + console.error(`[LlamaCppEmbedding] ${errorMessage}:`, parseError); + throw new LlamaCppNetworkError(errorMessage, parseError instanceof Error ? parseError : undefined); + } + + return responseData; + } catch (error) { + clearTimeout(timeoutId); + + // Re-throw specific errors without wrapping + if (error instanceof LlamaCppNetworkError || error instanceof LlamaCppConfigurationError) { + throw error; + } + + if (error instanceof Error) { + if (error.name === 'AbortError') { + const timeoutError = `Request timeout after ${timeout}ms - server at ${this.host} not responding`; + console.error(`[LlamaCppEmbedding] ${timeoutError}`); + throw new LlamaCppNetworkError(timeoutError, error); + } + + if (error.name === 'TypeError' && error.message.includes('fetch')) { + const connectionError = `Unable to connect to llama.cpp server at ${this.host}. Please ensure the server is running and accessible.`; + console.error(`[LlamaCppEmbedding] ${connectionError}`); + throw new LlamaCppNetworkError(connectionError, error); + } + + // Log original error with full stack trace + console.error(`[LlamaCppEmbedding] Unexpected error during request:`, error); + throw new LlamaCppNetworkError(`Unexpected error: ${error.message}`, error); + } + + // Fallback for non-Error objects + const unknownError = 'Unknown error occurred during request'; + console.error(`[LlamaCppEmbedding] ${unknownError}:`, error); + throw new LlamaCppNetworkError(unknownError); + } + } + + private preprocessTextForCode(text: string): string { + if (typeof text !== 'string') { + throw new LlamaCppConfigurationError('Text must be a string'); + } + + const processedText = this.preprocessText(text); + + // Add code prefix if enabled and not already present + if (this.config.codePrefix && !processedText.startsWith(this.codePrefix)) { + return `${this.codePrefix} ${processedText}`; + } + + return processedText; + } + + async embed(text: string): Promise { + // Preprocess the text with optional code prefix + const processedText = this.preprocessTextForCode(text); + + // Ensure dimension is detected + await this.ensureDimensionDetected(); + + const requestBody = { + input: processedText, + model: this.config.model || 'embedding-model', + }; + + const url = `${this.host}/v1/embeddings`; + const response = await this.makeRequest(url, requestBody); + + if (!response || typeof response !== 'object') { + throw new LlamaCppNetworkError('Invalid response format: expected object'); + } + + if (!response.data || !Array.isArray(response.data)) { + throw new LlamaCppNetworkError('Invalid response format: missing or invalid data array'); + } + + if (response.data.length === 0) { + throw new LlamaCppNetworkError('Invalid response format: empty data array'); + } + + const firstItem = response.data[0]; + if (!firstItem || typeof firstItem !== 'object') { + throw new LlamaCppNetworkError('Invalid response format: invalid first data item'); + } + + if (!firstItem.embedding || !Array.isArray(firstItem.embedding)) { + throw new LlamaCppNetworkError('Invalid response format: missing or invalid embedding array'); + } + + const embedding = response.data[0].embedding; + + return { + vector: embedding, + dimension: this.dimension + }; + } + + async embedBatch(texts: string[]): Promise { + if (!Array.isArray(texts)) { + throw new LlamaCppConfigurationError('Texts must be an array'); + } + + if (texts.length === 0) { + throw new LlamaCppConfigurationError('Texts array cannot be empty'); + } + + // Preprocess all texts with optional code prefix + const processedTexts = texts.map(text => this.preprocessTextForCode(text)); + + // Ensure dimension is detected + await this.ensureDimensionDetected(); + + const requestBody = { + input: processedTexts, + model: this.config.model || 'embedding-model', + }; + + const url = `${this.host}/v1/embeddings`; + const response = await this.makeRequest(url, requestBody); + + if (!response || typeof response !== 'object') { + throw new LlamaCppNetworkError('Invalid batch response format: expected object'); + } + + if (!response.data || !Array.isArray(response.data)) { + throw new LlamaCppNetworkError('Invalid batch response format: missing or invalid data array'); + } + + if (response.data.length === 0) { + throw new LlamaCppNetworkError('Invalid batch response format: empty data array'); + } + + return response.data.map((item: any, index: number) => { + if (!item || typeof item !== 'object') { + throw new LlamaCppNetworkError(`Invalid batch response format: invalid item at index ${index}`); + } + + if (!item.embedding || !Array.isArray(item.embedding)) { + throw new LlamaCppNetworkError(`Invalid batch response format: missing or invalid embedding at index ${index}`); + } + + return { + vector: item.embedding, + dimension: this.dimension + }; + }); + } + + getDimension(): number { + return this.dimension; + } + + getProvider(): string { + return 'LlamaCpp'; + } + + /** + * Ensure dimension is detected before making embedding requests + */ + private async ensureDimensionDetected(): Promise { + if (!this.dimensionDetected && !this.config.dimension) { + this.dimension = await this.detectDimension(); + this.dimensionDetected = true; + console.log(`[LlamaCppEmbedding] 📏 Detected embedding dimension: ${this.dimension} for model: ${this.config.model || 'unknown'}`); + } + } + + async detectDimension(testText: string = "test"): Promise { + console.log(`[LlamaCppEmbedding] Detecting embedding dimension...`); + + if (typeof testText !== 'string') { + throw new LlamaCppConfigurationError('Test text must be a string'); + } + + try { + // Use raw test text without code prefix for dimension detection + const processedText = this.preprocessText(testText); + + const requestBody = { + input: processedText, + model: this.config.model || 'embedding-model', + }; + + const url = `${this.host}/v1/embeddings`; + const response = await this.makeRequest(url, requestBody); + + if (!response || typeof response !== 'object') { + throw new LlamaCppNetworkError('Invalid response format: expected object'); + } + + if (!response.data || !Array.isArray(response.data)) { + throw new LlamaCppNetworkError('Invalid response format: missing or invalid data array'); + } + + if (response.data.length === 0) { + throw new LlamaCppNetworkError('Invalid response format: empty data array'); + } + + const firstItem = response.data[0]; + if (!firstItem || typeof firstItem !== 'object') { + throw new LlamaCppNetworkError('Invalid response format: invalid first data item'); + } + + if (!firstItem.embedding || !Array.isArray(firstItem.embedding)) { + throw new LlamaCppNetworkError('Invalid response format: missing or invalid embedding array'); + } + + const dimension = firstItem.embedding.length; + if (dimension <= 0) { + throw new LlamaCppNetworkError(`Invalid embedding dimension: ${dimension}`); + } + + console.log(`[LlamaCppEmbedding] Successfully detected embedding dimension: ${dimension}`); + return dimension; + } catch (error) { + if (error instanceof LlamaCppNetworkError || error instanceof LlamaCppConfigurationError) { + throw error; + } + + const errorMessage = error instanceof Error ? error.message : 'Unknown error'; + console.error(`[LlamaCppEmbedding] Failed to detect dimension:`, error); + throw new LlamaCppNetworkError(`Failed to detect embedding dimension: ${errorMessage}`, error instanceof Error ? error : undefined); + } + } + + /** + * Set the host URL for llama.cpp server + * @param host Host URL (e.g., 'http://localhost:8080') + */ + setHost(host: string): void { + if (typeof host !== 'string') { + throw new LlamaCppConfigurationError('Host must be a string'); + } + + this.validateHostUrl(host); + + this.host = this.normalizeHost(host); + this.config.host = host; + } + + /** + * Set the model name + * @param model Model name + */ + async setModel(model: string): Promise { + if (typeof model !== 'string') { + throw new LlamaCppConfigurationError('Model must be a string'); + } + + if (model.trim() === '') { + throw new LlamaCppConfigurationError('Model name cannot be empty'); + } + + this.config.model = model; + // Reset dimension detection when model changes + this.dimensionDetected = false; + if (!this.config.dimension) { + await this.ensureDimensionDetected(); + } + } + + /** + * Enable or disable automatic code prefix + * @param enabled Whether to enable code prefix + */ + setCodePrefix(enabled: boolean): void { + if (typeof enabled !== 'boolean') { + throw new LlamaCppConfigurationError('Code prefix enabled flag must be a boolean'); + } + + this.config.codePrefix = enabled; + } + + /** + * Set custom code prefix + * @param prefix Custom prefix text + */ + setCustomCodePrefix(prefix: string): void { + if (typeof prefix !== 'string') { + throw new LlamaCppConfigurationError('Code prefix must be a string'); + } + + if (prefix.trim() === '') { + throw new LlamaCppConfigurationError('Code prefix cannot be empty'); + } + + this.codePrefix = prefix; + this.config.codePrefix = true; + } + + /** + * Set request timeout + * @param timeout Timeout in milliseconds + */ + setTimeout(timeout: number): void { + if (typeof timeout !== 'number') { + throw new LlamaCppConfigurationError('Timeout must be a number'); + } + + if (timeout <= 0) { + throw new LlamaCppConfigurationError('Timeout must be a positive number'); + } + + if (timeout > 600000) { // 10 minutes max + throw new LlamaCppConfigurationError('Timeout cannot exceed 600000ms (10 minutes)'); + } + + this.config.timeout = timeout; + } + + /** + * Get current configuration + */ + getConfig(): LlamaCppEmbeddingConfig { + return { ...this.config }; + } +} \ No newline at end of file diff --git a/packages/mcp/src/config.ts b/packages/mcp/src/config.ts index 428f9474..b2615512 100644 --- a/packages/mcp/src/config.ts +++ b/packages/mcp/src/config.ts @@ -4,7 +4,7 @@ export interface ContextMcpConfig { name: string; version: string; // Embedding provider configuration - embeddingProvider: 'OpenAI' | 'VoyageAI' | 'Gemini' | 'Ollama'; + embeddingProvider: 'OpenAI' | 'VoyageAI' | 'Gemini' | 'Ollama' | 'LlamaCpp'; embeddingModel: string; // Provider-specific API keys openaiApiKey?: string; @@ -15,6 +15,11 @@ export interface ContextMcpConfig { // Ollama configuration ollamaModel?: string; ollamaHost?: string; + // LlamaCpp configuration + llamacppHost?: string; + llamacppModel?: string; + llamacppTimeout?: number; + llamacppCodePrefix?: boolean; // Vector database configuration milvusAddress?: string; // Optional, can be auto-resolved from token milvusToken?: string; @@ -78,6 +83,8 @@ export function getDefaultModelForProvider(provider: string): string { return 'gemini-embedding-001'; case 'Ollama': return 'nomic-embed-text'; + case 'LlamaCpp': + return 'nomic-embed-code'; default: return 'text-embedding-3-small'; } @@ -91,6 +98,11 @@ export function getEmbeddingModelForProvider(provider: string): string { const ollamaModel = envManager.get('OLLAMA_MODEL') || envManager.get('EMBEDDING_MODEL') || getDefaultModelForProvider(provider); console.log(`[DEBUG] 🎯 Ollama model selection: OLLAMA_MODEL=${envManager.get('OLLAMA_MODEL') || 'NOT SET'}, EMBEDDING_MODEL=${envManager.get('EMBEDDING_MODEL') || 'NOT SET'}, selected=${ollamaModel}`); return ollamaModel; + case 'LlamaCpp': + // For LlamaCpp, prioritize LLAMACPP_MODEL over EMBEDDING_MODEL + const llamacppModel = envManager.get('LLAMACPP_MODEL') || envManager.get('EMBEDDING_MODEL') || getDefaultModelForProvider(provider); + console.log(`[DEBUG] 🎯 LlamaCpp model selection: LLAMACPP_MODEL=${envManager.get('LLAMACPP_MODEL') || 'NOT SET'}, EMBEDDING_MODEL=${envManager.get('EMBEDDING_MODEL') || 'NOT SET'}, selected=${llamacppModel}`); + return llamacppModel; case 'OpenAI': case 'VoyageAI': case 'Gemini': @@ -108,6 +120,8 @@ export function createMcpConfig(): ContextMcpConfig { console.log(`[DEBUG] EMBEDDING_PROVIDER: ${envManager.get('EMBEDDING_PROVIDER') || 'NOT SET'}`); console.log(`[DEBUG] EMBEDDING_MODEL: ${envManager.get('EMBEDDING_MODEL') || 'NOT SET'}`); console.log(`[DEBUG] OLLAMA_MODEL: ${envManager.get('OLLAMA_MODEL') || 'NOT SET'}`); + console.log(`[DEBUG] LLAMACPP_MODEL: ${envManager.get('LLAMACPP_MODEL') || 'NOT SET'}`); + console.log(`[DEBUG] LLAMACPP_HOST: ${envManager.get('LLAMACPP_HOST') || 'NOT SET'}`); console.log(`[DEBUG] GEMINI_API_KEY: ${envManager.get('GEMINI_API_KEY') ? 'SET (length: ' + envManager.get('GEMINI_API_KEY')!.length + ')' : 'NOT SET'}`); console.log(`[DEBUG] OPENAI_API_KEY: ${envManager.get('OPENAI_API_KEY') ? 'SET (length: ' + envManager.get('OPENAI_API_KEY')!.length + ')' : 'NOT SET'}`); console.log(`[DEBUG] MILVUS_ADDRESS: ${envManager.get('MILVUS_ADDRESS') || 'NOT SET'}`); @@ -117,7 +131,7 @@ export function createMcpConfig(): ContextMcpConfig { name: envManager.get('MCP_SERVER_NAME') || "Context MCP Server", version: envManager.get('MCP_SERVER_VERSION') || "1.0.0", // Embedding provider configuration - embeddingProvider: (envManager.get('EMBEDDING_PROVIDER') as 'OpenAI' | 'VoyageAI' | 'Gemini' | 'Ollama') || 'OpenAI', + embeddingProvider: (envManager.get('EMBEDDING_PROVIDER') as 'OpenAI' | 'VoyageAI' | 'Gemini' | 'Ollama' | 'LlamaCpp') || 'OpenAI', embeddingModel: getEmbeddingModelForProvider(envManager.get('EMBEDDING_PROVIDER') || 'OpenAI'), // Provider-specific API keys openaiApiKey: envManager.get('OPENAI_API_KEY'), @@ -128,6 +142,11 @@ export function createMcpConfig(): ContextMcpConfig { // Ollama configuration ollamaModel: envManager.get('OLLAMA_MODEL'), ollamaHost: envManager.get('OLLAMA_HOST'), + // LlamaCpp configuration + llamacppHost: envManager.get('LLAMACPP_HOST'), + llamacppModel: envManager.get('LLAMACPP_MODEL'), + llamacppTimeout: envManager.get('LLAMACPP_TIMEOUT') ? parseInt(envManager.get('LLAMACPP_TIMEOUT')!, 10) : undefined, + llamacppCodePrefix: envManager.get('LLAMACPP_CODE_PREFIX') ? envManager.get('LLAMACPP_CODE_PREFIX') === 'true' : undefined, // Vector database configuration - address can be auto-resolved from token milvusAddress: envManager.get('MILVUS_ADDRESS'), // Optional, can be resolved from token milvusToken: envManager.get('MILVUS_TOKEN') @@ -166,6 +185,14 @@ export function logConfigurationSummary(config: ContextMcpConfig): void { console.log(`[MCP] Ollama Host: ${config.ollamaHost || 'http://127.0.0.1:11434'}`); console.log(`[MCP] Ollama Model: ${config.embeddingModel}`); break; + case 'LlamaCpp': + console.log(`[MCP] LlamaCpp Host: ${config.llamacppHost || 'http://localhost:8080'}`); + console.log(`[MCP] LlamaCpp Model: ${config.embeddingModel}`); + if (config.llamacppTimeout) { + console.log(`[MCP] LlamaCpp Timeout: ${config.llamacppTimeout}ms`); + } + console.log(`[MCP] LlamaCpp Code Prefix: ${config.llamacppCodePrefix !== false ? '✅ Enabled' : '❌ Disabled'}`); + break; } console.log(`[MCP] 🔧 Initializing server components...`); @@ -185,19 +212,25 @@ Environment Variables: MCP_SERVER_VERSION Server version Embedding Provider Configuration: - EMBEDDING_PROVIDER Embedding provider: OpenAI, VoyageAI, Gemini, Ollama (default: OpenAI) + EMBEDDING_PROVIDER Embedding provider: OpenAI, VoyageAI, Gemini, Ollama, LlamaCpp (default: OpenAI) EMBEDDING_MODEL Embedding model name (works for all providers) - + Provider-specific API Keys: OPENAI_API_KEY OpenAI API key (required for OpenAI provider) OPENAI_BASE_URL OpenAI API base URL (optional, for custom endpoints) VOYAGEAI_API_KEY VoyageAI API key (required for VoyageAI provider) GEMINI_API_KEY Google AI API key (required for Gemini provider) GEMINI_BASE_URL Gemini API base URL (optional, for custom endpoints) - + Ollama Configuration: OLLAMA_HOST Ollama server host (default: http://127.0.0.1:11434) OLLAMA_MODEL Ollama model name (alternative to EMBEDDING_MODEL for Ollama) + + LlamaCpp Configuration: + LLAMACPP_HOST LlamaCpp server host (default: http://localhost:8080) + LLAMACPP_MODEL LlamaCpp model name (alternative to EMBEDDING_MODEL for LlamaCpp) + LLAMACPP_TIMEOUT Request timeout in milliseconds (default: 30000) + LLAMACPP_CODE_PREFIX Enable automatic code prefix for embeddings (default: true) Vector Database Configuration: MILVUS_ADDRESS Milvus address (optional, can be auto-resolved from token) @@ -221,5 +254,11 @@ Examples: # Start MCP server with Ollama and specific model (using EMBEDDING_MODEL) EMBEDDING_PROVIDER=Ollama EMBEDDING_MODEL=nomic-embed-text MILVUS_TOKEN=your-token npx @zilliz/claude-context-mcp@latest + + # Start MCP server with LlamaCpp and nomic-embed-code model + EMBEDDING_PROVIDER=LlamaCpp LLAMACPP_HOST=http://localhost:8080 LLAMACPP_MODEL=nomic-embed-code MILVUS_TOKEN=your-token npx @zilliz/claude-context-mcp@latest + + # Start MCP server with LlamaCpp and custom timeout (useful for slower hardware) + EMBEDDING_PROVIDER=LlamaCpp LLAMACPP_TIMEOUT=60000 EMBEDDING_MODEL=nomic-embed-code MILVUS_TOKEN=your-token npx @zilliz/claude-context-mcp@latest `); } \ No newline at end of file diff --git a/packages/mcp/src/embedding.ts b/packages/mcp/src/embedding.ts index 6ebd71a0..e113326d 100644 --- a/packages/mcp/src/embedding.ts +++ b/packages/mcp/src/embedding.ts @@ -1,8 +1,8 @@ -import { OpenAIEmbedding, VoyageAIEmbedding, GeminiEmbedding, OllamaEmbedding } from "@zilliz/claude-context-core"; +import { OpenAIEmbedding, VoyageAIEmbedding, GeminiEmbedding, OllamaEmbedding, LlamaCppEmbedding } from "@zilliz/claude-context-core"; import { ContextMcpConfig } from "./config.js"; // Helper function to create embedding instance based on provider -export function createEmbeddingInstance(config: ContextMcpConfig): OpenAIEmbedding | VoyageAIEmbedding | GeminiEmbedding | OllamaEmbedding { +export function createEmbeddingInstance(config: ContextMcpConfig): OpenAIEmbedding | VoyageAIEmbedding | GeminiEmbedding | OllamaEmbedding | LlamaCppEmbedding { console.log(`[EMBEDDING] Creating ${config.embeddingProvider} embedding instance...`); switch (config.embeddingProvider) { @@ -57,13 +57,29 @@ export function createEmbeddingInstance(config: ContextMcpConfig): OpenAIEmbeddi console.log(`[EMBEDDING] ✅ Ollama embedding instance created successfully`); return ollamaEmbedding; + case 'LlamaCpp': + const llamacppHost = config.llamacppHost || 'http://localhost:8080'; + console.log(`[EMBEDDING] 🔧 Configuring LlamaCpp with model: ${config.embeddingModel}, host: ${llamacppHost}`); + + const llamacppEmbeddingConfig = { + host: llamacppHost, + model: config.embeddingModel, + ...(config.llamacppTimeout !== undefined && { timeout: config.llamacppTimeout }), + ...(config.llamacppCodePrefix !== undefined && { codePrefix: config.llamacppCodePrefix }) + }; + + const llamacppEmbedding = new LlamaCppEmbedding(llamacppEmbeddingConfig); + console.log(`[EMBEDDING] ✅ LlamaCpp embedding instance created successfully`); + console.log(`[EMBEDDING] 📝 LlamaCpp configuration: timeout=${config.llamacppTimeout || 30000}ms, codePrefix=${config.llamacppCodePrefix !== false}`); + return llamacppEmbedding; + default: console.error(`[EMBEDDING] ❌ Unsupported embedding provider: ${config.embeddingProvider}`); throw new Error(`Unsupported embedding provider: ${config.embeddingProvider}`); } } -export function logEmbeddingProviderInfo(config: ContextMcpConfig, embedding: OpenAIEmbedding | VoyageAIEmbedding | GeminiEmbedding | OllamaEmbedding): void { +export function logEmbeddingProviderInfo(config: ContextMcpConfig, embedding: OpenAIEmbedding | VoyageAIEmbedding | GeminiEmbedding | OllamaEmbedding | LlamaCppEmbedding): void { console.log(`[EMBEDDING] ✅ Successfully initialized ${config.embeddingProvider} embedding provider`); console.log(`[EMBEDDING] Provider details - Model: ${config.embeddingModel}, Dimension: ${embedding.getDimension()}`); @@ -81,5 +97,9 @@ export function logEmbeddingProviderInfo(config: ContextMcpConfig, embedding: Op case 'Ollama': console.log(`[EMBEDDING] Ollama configuration - Host: ${config.ollamaHost || 'http://127.0.0.1:11434'}, Model: ${config.embeddingModel}`); break; + case 'LlamaCpp': + console.log(`[EMBEDDING] LlamaCpp configuration - Host: ${config.llamacppHost || 'http://localhost:8080'}, Model: ${config.embeddingModel}`); + console.log(`[EMBEDDING] LlamaCpp advanced - Timeout: ${config.llamacppTimeout || 30000}ms, Code Prefix: ${config.llamacppCodePrefix !== false ? 'Enabled' : 'Disabled'}`); + break; } } \ No newline at end of file From f4adaf73ff340c4295cf513667c03b054f2989ec Mon Sep 17 00:00:00 2001 From: spumer Date: Sat, 20 Sep 2025 22:13:08 +0500 Subject: [PATCH 2/4] docs: add llamacpp provider documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add LlamaCpp to supported embedding providers in main README - Update environment variables documentation with LlamaCpp options - Add comprehensive LlamaCpp configuration guide to MCP README - Include setup instructions for local inference on consumer hardware - Add configuration examples for various MCP clients - Document LlamaCpp's goal: enable large model inference on Apple Silicon and desktop GPUs 🤖 Generated with Claude Code Co-Authored-By: Claude --- README.md | 2 +- docs/getting-started/environment-variables.md | 10 ++- packages/mcp/README.md | 68 ++++++++++++++++++- 3 files changed, 76 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ebacef1d..24653c4e 100644 --- a/README.md +++ b/README.md @@ -543,7 +543,7 @@ Claude Context is a monorepo containing three main packages: ### Supported Technologies -- **Embedding Providers**: [OpenAI](https://openai.com), [VoyageAI](https://voyageai.com), [Ollama](https://ollama.ai), [Gemini](https://gemini.google.com) +- **Embedding Providers**: [OpenAI](https://openai.com), [VoyageAI](https://voyageai.com), [Ollama](https://ollama.ai), [Gemini](https://gemini.google.com), [LlamaCpp](https://github.com/ggerganov/llama.cpp) (local inference on consumer hardware) - **Vector Databases**: [Milvus](https://milvus.io) or [Zilliz Cloud](https://zilliz.com/cloud)(fully managed vector database as a service) - **Code Splitters**: AST-based splitter (with automatic fallback), LangChain character-based splitter - **Languages**: TypeScript, JavaScript, Python, Java, C++, C#, Go, Rust, PHP, Ruby, Swift, Kotlin, Scala, Markdown diff --git a/docs/getting-started/environment-variables.md b/docs/getting-started/environment-variables.md index d2b813df..4ddde5c3 100644 --- a/docs/getting-started/environment-variables.md +++ b/docs/getting-started/environment-variables.md @@ -20,7 +20,7 @@ Claude Context supports a global configuration file at `~/.context/.env` to simp ### Embedding Provider | Variable | Description | Default | |----------|-------------|---------| -| `EMBEDDING_PROVIDER` | Provider: `OpenAI`, `VoyageAI`, `Gemini`, `Ollama` | `OpenAI` | +| `EMBEDDING_PROVIDER` | Provider: `OpenAI`, `VoyageAI`, `Gemini`, `Ollama`, `LlamaCpp` | `OpenAI` | | `EMBEDDING_MODEL` | Embedding model name (works for all providers) | Provider-specific default | | `OPENAI_API_KEY` | OpenAI API key | Required for OpenAI | | `OPENAI_BASE_URL` | OpenAI API base URL (optional, for custom endpoints) | `https://api.openai.com/v1` | @@ -54,6 +54,14 @@ Claude Context supports a global configuration file at `~/.context/.env` to simp | `OLLAMA_HOST` | Ollama server URL | `http://127.0.0.1:11434` | | `OLLAMA_MODEL`(alternative to `EMBEDDING_MODEL`) | Model name | | +### LlamaCpp (Optional) +| Variable | Description | Default | +|----------|-------------|---------| +| `LLAMACPP_HOST` | LlamaCpp server URL | `http://localhost:8080` | +| `LLAMACPP_MODEL` (alternative to `EMBEDDING_MODEL`) | Model name | | +| `LLAMACPP_TIMEOUT` | Request timeout in milliseconds | `30000` | +| `LLAMACPP_CODE_PREFIX` | Enable automatic code prefix for embeddings | `true` | + ### Advanced Configuration | Variable | Description | Default | diff --git a/packages/mcp/README.md b/packages/mcp/README.md index 4a562af5..45ed03f4 100644 --- a/packages/mcp/README.md +++ b/packages/mcp/README.md @@ -20,7 +20,7 @@ Model Context Protocol (MCP) allows you to integrate Claude Context with your fa Before using the MCP server, make sure you have: -- API key for your chosen embedding provider (OpenAI, VoyageAI, Gemini, or Ollama setup) +- API key for your chosen embedding provider (OpenAI, VoyageAI, Gemini, Ollama, or LlamaCpp setup) - Milvus vector database (local or cloud) > 💡 **Setup Help:** See the [main project setup guide](../../README.md#-quick-start) for detailed installation instructions. @@ -34,7 +34,7 @@ Claude Context MCP supports multiple embedding providers. Choose the one that be > 📋 **Quick Reference**: For a complete list of environment variables and their descriptions, see the [Environment Variables Guide](../../docs/getting-started/environment-variables.md). ```bash -# Supported providers: OpenAI, VoyageAI, Gemini, Ollama +# Supported providers: OpenAI, VoyageAI, Gemini, Ollama, LlamaCpp EMBEDDING_PROVIDER=OpenAI ``` @@ -149,6 +149,51 @@ OLLAMA_HOST=http://127.0.0.1:11434 +
+5. LlamaCpp Configuration (Local/Self-hosted) + +LlamaCpp enables running large language models locally on consumer hardware including Apple Silicon, desktop GPUs, and CPU-only systems with state-of-the-art performance. It allows you to run embeddings locally with GGUF models without sending data to external services. + +```bash +# Required: Specify which LlamaCpp model to use +EMBEDDING_MODEL=nomic-embed-code + +# Optional: Specify LlamaCpp host (default: http://localhost:8080) +LLAMACPP_HOST=http://localhost:8080 + +# Optional: Request timeout in milliseconds (default: 30000) +LLAMACPP_TIMEOUT=30000 + +# Optional: Enable code prefix for better code search (default: true) +LLAMACPP_CODE_PREFIX=true +``` + +**Setup Instructions:** + +1. Install llama.cpp from [GitHub](https://github.com/ggerganov/llama.cpp) +2. Download the GGUF embedding model (e.g., nomic-embed-code): + + ```bash + # Example: Download nomic-embed-code model + curl -L "https://huggingface.co/nomic-ai/nomic-embed-code-GGUF/resolve/main/nomic-embed-code.Q4_1.gguf" -o nomic-embed-code.Q4_1.gguf + ``` + +3. Start the llama.cpp server with embeddings: + + ```bash + llama-server -m nomic-embed-code.Q4_1.gguf --embeddings --pooling last + ``` + +4. Verify the server is running: + + ```bash + curl http://localhost:8080/v1/embeddings \ + -H "Content-Type: application/json" \ + -d '{"model": "nomic-embed-code", "input": "test"}' + ``` + +
+ #### Get a free vector database on Zilliz Cloud Claude Context needs a vector database. You can [sign up](https://cloud.zilliz.com/signup?utm_source=github&utm_medium=referral&utm_campaign=2507-codecontext-readme) on Zilliz Cloud to get an API key. @@ -354,6 +399,25 @@ Pasting the following configuration into your Cursor `~/.cursor/mcp.json` file i } ``` +**LlamaCpp Configuration:** + +```json +{ + "mcpServers": { + "claude-context": { + "command": "npx", + "args": ["-y", "@zilliz/claude-context-mcp@latest"], + "env": { + "EMBEDDING_PROVIDER": "LlamaCpp", + "EMBEDDING_MODEL": "nomic-embed-code", + "LLAMACPP_HOST": "http://localhost:8080", + "MILVUS_TOKEN": "your-zilliz-cloud-api-key" + } + } + } +} +``` +
From 895d7be711df8bdc11f7b37802f0fa06ecb46f3d Mon Sep 17 00:00:00 2001 From: spumer Date: Tue, 7 Oct 2025 07:31:30 +0500 Subject: [PATCH 3/4] feat: add strict collection naming with provider and model isolation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add configurable collection naming strategy to prevent conflicts between different embedding providers and models. This ensures complete isolation when switching between providers like Ollama and LlamaCpp. ## Core Changes ### Embedding Providers - Add abstract `getModel()` method to base Embedding class - Implement `getModel()` in all providers: - OllamaEmbedding: returns config.model - LlamaCppEmbedding: returns config.model with fallback - OpenAIEmbedding: returns config.model - GeminiEmbedding: returns config.model with fallback - VoyageAIEmbedding: returns config.model ### Collection Naming - Add `EMBEDDING_STRICT_COLLECTION_NAMES` environment variable - Implement dual naming strategies in Context.getCollectionName(): - Legacy (default): `hybrid_code_chunks_` (backward compatible) - Strict: `hybrid____` - Add `customCollectionName` support in ContextConfig - Ensure model names are sanitized for safe collection naming ### MCP Integration - Add `embeddingStrictCollectionNames` to ContextMcpConfig - Auto-set environment variable from MCP config - Add new variables to debug output: - MILVUS_TOKEN (shows length only for security) - MILVUS_COLLECTION_NAME - LLAMACPP_TIMEOUT - LLAMACPP_CODE_PREFIX - EMBEDDING_STRICT_COLLECTION_NAMES - Update help message with new configuration options - Add examples for strict collection naming usage ### Documentation - Update .env.example with collection naming configuration - Add comprehensive examples in MCP help text - Document all new environment variables ## Benefits - **Zero conflict risk**: Each provider+model combination gets unique collection - **Safe experimentation**: Switch providers without data contamination - **Backward compatible**: Legacy naming works by default - **Full isolation**: Ollama and LlamaCpp collections never intersect ## Example Collection Names - Ollama: `hybrid_ollama_nomic_embed_text_abc12345_def67890` - LlamaCpp: `hybrid_llamacpp_nomic_embed_code_Q4_1_gguf_abc12345_fed09876` 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .env.example | 9 +++++ packages/core/src/context.ts | 36 ++++++++++++++++--- packages/core/src/embedding/base-embedding.ts | 6 ++++ .../core/src/embedding/gemini-embedding.ts | 4 +++ .../core/src/embedding/llamacpp-embedding.ts | 4 +++ .../core/src/embedding/ollama-embedding.ts | 4 +++ .../core/src/embedding/openai-embedding.ts | 4 +++ .../core/src/embedding/voyageai-embedding.ts | 4 +++ packages/mcp/src/config.ts | 25 ++++++++++++- packages/mcp/src/index.ts | 8 ++++- 10 files changed, 98 insertions(+), 6 deletions(-) diff --git a/.env.example b/.env.example index 8eb0266a..2d088dce 100644 --- a/.env.example +++ b/.env.example @@ -90,3 +90,12 @@ SPLITTER_TYPE=ast # Whether to use hybrid search mode. If true, it will use both dense vector and BM25; if false, it will use only dense vector search. # HYBRID_MODE=true + +# ============================================================================= +# Collection Naming Configuration +# ============================================================================= + +# Whether to use strict collection naming that includes provider and model info +# This prevents conflicts when switching between different embedding providers/models +# If false (default), uses legacy naming for backward compatibility +# EMBEDDING_STRICT_COLLECTION_NAMES=false diff --git a/packages/core/src/context.ts b/packages/core/src/context.ts index 1af13058..93c1a7d3 100644 --- a/packages/core/src/context.ts +++ b/packages/core/src/context.ts @@ -94,6 +94,7 @@ export interface ContextConfig { ignorePatterns?: string[]; customExtensions?: string[]; // New: custom extensions from MCP customIgnorePatterns?: string[]; // New: custom ignore patterns from MCP + customCollectionName?: string; // New: custom collection name from MCP config } export class Context { @@ -103,6 +104,7 @@ export class Context { private supportedExtensions: string[]; private ignorePatterns: string[]; private synchronizers = new Map(); + private customCollectionName?: string; constructor(config: ContextConfig = {}) { // Initialize services @@ -145,6 +147,9 @@ export class Context { // Remove duplicates this.ignorePatterns = [...new Set(allIgnorePatterns)]; + // Store custom collection name if provided + this.customCollectionName = config.customCollectionName; + console.log(`[Context] 🔧 Initialized with ${this.supportedExtensions.length} supported extensions and ${this.ignorePatterns.length} ignore patterns`); if (envCustomExtensions.length > 0) { console.log(`[Context] 📎 Loaded ${envCustomExtensions.length} custom extensions from environment: ${envCustomExtensions.join(', ')}`); @@ -229,14 +234,37 @@ export class Context { } /** - * Generate collection name based on codebase path and hybrid mode + * Generate collection name based on codebase path, provider, model and hybrid mode */ public getCollectionName(codebasePath: string): string { + // If custom collection name is provided, use it directly + if (this.customCollectionName) { + return this.customCollectionName; + } + const isHybrid = this.getIsHybrid(); const normalizedPath = path.resolve(codebasePath); - const hash = crypto.createHash('md5').update(normalizedPath).digest('hex'); - const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks'; - return `${prefix}_${hash.substring(0, 8)}`; + const pathHash = crypto.createHash('md5').update(normalizedPath).digest('hex'); + + // Check if strict collection naming is enabled + const strictNaming = envManager.get('EMBEDDING_STRICT_COLLECTION_NAMES')?.toLowerCase() === 'true'; + + if (strictNaming) { + // Generate collection name including provider and model to prevent conflicts + const provider = this.embedding.getProvider().toLowerCase(); + const model = this.embedding.getModel().replace(/[^a-zA-Z0-9]/g, '_'); // Sanitize model name + + // Create a comprehensive hash including provider and model to ensure uniqueness + const uniqueString = `${provider}_${model}_${normalizedPath}`; + const fullHash = crypto.createHash('md5').update(uniqueString).digest('hex'); + + const prefix = isHybrid === true ? 'hybrid' : 'code'; + return `${prefix}_${provider}_${model}_${pathHash.substring(0, 8)}_${fullHash.substring(0, 8)}`; + } else { + // Legacy collection naming (default behavior) + const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks'; + return `${prefix}_${pathHash.substring(0, 8)}`; + } } /** diff --git a/packages/core/src/embedding/base-embedding.ts b/packages/core/src/embedding/base-embedding.ts index 18aae9f0..b4d3c913 100644 --- a/packages/core/src/embedding/base-embedding.ts +++ b/packages/core/src/embedding/base-embedding.ts @@ -73,4 +73,10 @@ export abstract class Embedding { * @returns Provider name */ abstract getProvider(): string; + + /** + * Get model name/identifier + * @returns Model name + */ + abstract getModel(): string; } \ No newline at end of file diff --git a/packages/core/src/embedding/gemini-embedding.ts b/packages/core/src/embedding/gemini-embedding.ts index 480c80d1..f1c9f618 100644 --- a/packages/core/src/embedding/gemini-embedding.ts +++ b/packages/core/src/embedding/gemini-embedding.ts @@ -119,6 +119,10 @@ export class GeminiEmbedding extends Embedding { return 'Gemini'; } + getModel(): string { + return this.config.model || 'gemini-embedding-001'; + } + /** * Set model type * @param model Model name diff --git a/packages/core/src/embedding/llamacpp-embedding.ts b/packages/core/src/embedding/llamacpp-embedding.ts index 98f557e6..94ad2ccf 100644 --- a/packages/core/src/embedding/llamacpp-embedding.ts +++ b/packages/core/src/embedding/llamacpp-embedding.ts @@ -297,6 +297,10 @@ export class LlamaCppEmbedding extends Embedding { return 'LlamaCpp'; } + getModel(): string { + return this.config.model || 'nomic-embed-code'; + } + /** * Ensure dimension is detected before making embedding requests */ diff --git a/packages/core/src/embedding/ollama-embedding.ts b/packages/core/src/embedding/ollama-embedding.ts index ef3ba070..c81b081a 100644 --- a/packages/core/src/embedding/ollama-embedding.ts +++ b/packages/core/src/embedding/ollama-embedding.ts @@ -132,6 +132,10 @@ export class OllamaEmbedding extends Embedding { return 'Ollama'; } + getModel(): string { + return this.config.model; + } + /** * Set model type and detect its dimension * @param model Model name diff --git a/packages/core/src/embedding/openai-embedding.ts b/packages/core/src/embedding/openai-embedding.ts index be0de117..476032a1 100644 --- a/packages/core/src/embedding/openai-embedding.ts +++ b/packages/core/src/embedding/openai-embedding.ts @@ -134,6 +134,10 @@ export class OpenAIEmbedding extends Embedding { return 'OpenAI'; } + getModel(): string { + return this.config.model; + } + /** * Set model type * @param model Model name diff --git a/packages/core/src/embedding/voyageai-embedding.ts b/packages/core/src/embedding/voyageai-embedding.ts index 9ad3ee89..00c88ddf 100644 --- a/packages/core/src/embedding/voyageai-embedding.ts +++ b/packages/core/src/embedding/voyageai-embedding.ts @@ -119,6 +119,10 @@ export class VoyageAIEmbedding extends Embedding { return 'VoyageAI'; } + getModel(): string { + return this.config.model; + } + /** * Set model type * @param model Model name diff --git a/packages/mcp/src/config.ts b/packages/mcp/src/config.ts index b2615512..407a30b5 100644 --- a/packages/mcp/src/config.ts +++ b/packages/mcp/src/config.ts @@ -23,6 +23,9 @@ export interface ContextMcpConfig { // Vector database configuration milvusAddress?: string; // Optional, can be auto-resolved from token milvusToken?: string; + milvusCollectionName?: string; // Optional, customizable collection name + // Collection naming configuration + embeddingStrictCollectionNames?: boolean; // Whether to include provider/model in collection names } // Legacy format (v1) - for backward compatibility @@ -122,9 +125,14 @@ export function createMcpConfig(): ContextMcpConfig { console.log(`[DEBUG] OLLAMA_MODEL: ${envManager.get('OLLAMA_MODEL') || 'NOT SET'}`); console.log(`[DEBUG] LLAMACPP_MODEL: ${envManager.get('LLAMACPP_MODEL') || 'NOT SET'}`); console.log(`[DEBUG] LLAMACPP_HOST: ${envManager.get('LLAMACPP_HOST') || 'NOT SET'}`); + console.log(`[DEBUG] LLAMACPP_TIMEOUT: ${envManager.get('LLAMACPP_TIMEOUT') || 'NOT SET'}`); + console.log(`[DEBUG] LLAMACPP_CODE_PREFIX: ${envManager.get('LLAMACPP_CODE_PREFIX') || 'NOT SET'}`); console.log(`[DEBUG] GEMINI_API_KEY: ${envManager.get('GEMINI_API_KEY') ? 'SET (length: ' + envManager.get('GEMINI_API_KEY')!.length + ')' : 'NOT SET'}`); console.log(`[DEBUG] OPENAI_API_KEY: ${envManager.get('OPENAI_API_KEY') ? 'SET (length: ' + envManager.get('OPENAI_API_KEY')!.length + ')' : 'NOT SET'}`); console.log(`[DEBUG] MILVUS_ADDRESS: ${envManager.get('MILVUS_ADDRESS') || 'NOT SET'}`); + console.log(`[DEBUG] MILVUS_TOKEN: ${envManager.get('MILVUS_TOKEN') ? 'SET (length: ' + envManager.get('MILVUS_TOKEN')!.length + ')' : 'NOT SET'}`); + console.log(`[DEBUG] MILVUS_COLLECTION_NAME: ${envManager.get('MILVUS_COLLECTION_NAME') || 'NOT SET'}`); + console.log(`[DEBUG] EMBEDDING_STRICT_COLLECTION_NAMES: ${envManager.get('EMBEDDING_STRICT_COLLECTION_NAMES') || 'NOT SET'}`); console.log(`[DEBUG] NODE_ENV: ${envManager.get('NODE_ENV') || 'NOT SET'}`); const config: ContextMcpConfig = { @@ -149,7 +157,10 @@ export function createMcpConfig(): ContextMcpConfig { llamacppCodePrefix: envManager.get('LLAMACPP_CODE_PREFIX') ? envManager.get('LLAMACPP_CODE_PREFIX') === 'true' : undefined, // Vector database configuration - address can be auto-resolved from token milvusAddress: envManager.get('MILVUS_ADDRESS'), // Optional, can be resolved from token - milvusToken: envManager.get('MILVUS_TOKEN') + milvusToken: envManager.get('MILVUS_TOKEN'), + milvusCollectionName: envManager.get('MILVUS_COLLECTION_NAME'), // Optional, customizable collection name + // Collection naming configuration + embeddingStrictCollectionNames: envManager.get('EMBEDDING_STRICT_COLLECTION_NAMES')?.toLowerCase() === 'true' }; return config; @@ -163,6 +174,8 @@ export function logConfigurationSummary(config: ContextMcpConfig): void { console.log(`[MCP] Embedding Provider: ${config.embeddingProvider}`); console.log(`[MCP] Embedding Model: ${config.embeddingModel}`); console.log(`[MCP] Milvus Address: ${config.milvusAddress || (config.milvusToken ? '[Auto-resolve from token]' : '[Not configured]')}`); + console.log(`[MCP] Milvus Collection: ${config.milvusCollectionName || '[Default: based on provider]'}`); + console.log(`[MCP] Strict Collection Names: ${config.embeddingStrictCollectionNames ? 'Enabled (provider+model)' : 'Disabled (legacy)'}`); // Log provider-specific configuration without exposing sensitive data switch (config.embeddingProvider) { @@ -235,6 +248,10 @@ Environment Variables: Vector Database Configuration: MILVUS_ADDRESS Milvus address (optional, can be auto-resolved from token) MILVUS_TOKEN Milvus token (optional, used for authentication and address resolution) + MILVUS_COLLECTION_NAME Custom collection name (optional, defaults to provider-based name) + + Collection Naming Configuration: + EMBEDDING_STRICT_COLLECTION_NAMES Use strict collection naming with provider+model (default: false) Examples: # Start MCP server with OpenAI (default) and explicit Milvus address @@ -260,5 +277,11 @@ Examples: # Start MCP server with LlamaCpp and custom timeout (useful for slower hardware) EMBEDDING_PROVIDER=LlamaCpp LLAMACPP_TIMEOUT=60000 EMBEDDING_MODEL=nomic-embed-code MILVUS_TOKEN=your-token npx @zilliz/claude-context-mcp@latest + + # Start MCP server with strict collection naming (prevents provider conflicts) + EMBEDDING_PROVIDER=Ollama EMBEDDING_MODEL=nomic-embed-text EMBEDDING_STRICT_COLLECTION_NAMES=true MILVUS_TOKEN=your-token npx @zilliz/claude-context-mcp@latest + + # Start MCP server with LlamaCpp and strict collection naming + EMBEDDING_PROVIDER=LlamaCpp EMBEDDING_MODEL=nomic-embed-code EMBEDDING_STRICT_COLLECTION_NAMES=true MILVUS_TOKEN=your-token npx @zilliz/claude-context-mcp@latest `); } \ No newline at end of file diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 8c4c3b28..67e16153 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -65,10 +65,16 @@ class ContextMcpServer { ...(config.milvusToken && { token: config.milvusToken }) }); + // Set collection naming strategy from config + if (config.embeddingStrictCollectionNames !== undefined) { + process.env.EMBEDDING_STRICT_COLLECTION_NAMES = config.embeddingStrictCollectionNames.toString(); + } + // Initialize Claude Context this.context = new Context({ embedding, - vectorDatabase + vectorDatabase, + customCollectionName: config.milvusCollectionName }); // Initialize managers From 46ca2fc4c6f578ff866be274b90aa05398b38423 Mon Sep 17 00:00:00 2001 From: spumer Date: Tue, 7 Oct 2025 07:41:57 +0500 Subject: [PATCH 4/4] docs: add collection naming configuration documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document new EMBEDDING_STRICT_COLLECTION_NAMES and MILVUS_COLLECTION_NAME environment variables in all relevant documentation files. ## Updated Documentation ### Environment Variables Guide - Add MILVUS_COLLECTION_NAME variable description - Add EMBEDDING_STRICT_COLLECTION_NAMES variable with detailed explanation - Add collection naming modes comparison (legacy vs strict) - Add use cases and benefits for strict mode ### MCP README - Add new "Collection Naming Configuration" section - Document both naming modes with examples - Explain format differences: `hybrid_code_chunks_` vs `hybrid____` - Recommend strict mode for multi-provider experimentation ## Benefits for Users - Clear understanding of collection naming behavior - Guidance on when to use strict mode - Prevention of data conflicts when switching providers - Complete reference for all configuration options 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- docs/getting-started/environment-variables.md | 8 +++++++ packages/mcp/README.md | 24 +++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/docs/getting-started/environment-variables.md b/docs/getting-started/environment-variables.md index 4ddde5c3..158505ec 100644 --- a/docs/getting-started/environment-variables.md +++ b/docs/getting-started/environment-variables.md @@ -47,6 +47,14 @@ Claude Context supports a global configuration file at `~/.context/.env` to simp |----------|-------------|---------| | `MILVUS_TOKEN` | Milvus authentication token. Get [Zilliz Personal API Key](https://github.com/zilliztech/claude-context/blob/master/assets/signup_and_get_apikey.png) | Recommended | | `MILVUS_ADDRESS` | Milvus server address. Optional when using Zilliz Personal API Key | Auto-resolved from token | +| `MILVUS_COLLECTION_NAME` | Custom collection name (optional, overrides automatic naming) | Auto-generated | +| `EMBEDDING_STRICT_COLLECTION_NAMES` | Use strict collection naming with provider+model info to prevent conflicts | `false` | + +> **💡 Collection Naming:** +> - **Legacy mode** (default): Collections named `hybrid_code_chunks_` - same name for all providers +> - **Strict mode** (`EMBEDDING_STRICT_COLLECTION_NAMES=true`): Collections include provider and model, e.g. `hybrid_ollama_nomic_embed_text__` +> - **Benefits of strict mode**: Prevents data conflicts when switching between different embedding providers or models +> - **Use case**: Enable strict mode when experimenting with multiple providers (Ollama, LlamaCpp, etc.) on the same codebase ### Ollama (Optional) | Variable | Description | Default | diff --git a/packages/mcp/README.md b/packages/mcp/README.md index 45ed03f4..2966d61b 100644 --- a/packages/mcp/README.md +++ b/packages/mcp/README.md @@ -228,6 +228,30 @@ CUSTOM_IGNORE_PATTERNS=temp/**,*.backup,private/**,uploads/** These settings work in combination with tool parameters - patterns from both sources will be merged together. +#### Collection Naming Configuration (Optional) + +You can configure how collection names are generated to prevent conflicts when using multiple embedding providers: + +```bash +# Use strict collection naming that includes provider and model info +# This prevents data conflicts when switching between providers +EMBEDDING_STRICT_COLLECTION_NAMES=true + +# Or set a custom collection name manually (overrides all automatic naming) +MILVUS_COLLECTION_NAME=my_custom_collection +``` + +**Collection Naming Modes:** +- **Legacy mode** (default, `EMBEDDING_STRICT_COLLECTION_NAMES=false`): + - Format: `hybrid_code_chunks_` + - Same collection name for all providers (may cause conflicts) + +- **Strict mode** (`EMBEDDING_STRICT_COLLECTION_NAMES=true`): + - Format: `hybrid____` + - Example: `hybrid_ollama_nomic_embed_text_abc12345_def67890` + - Prevents conflicts when switching between Ollama, LlamaCpp, OpenAI, etc. + - **Recommended** when experimenting with multiple embedding providers + ## Usage with MCP Clients