From 09ec2db03a116e4ca195f92e5b894bb4d0a8b28e Mon Sep 17 00:00:00 2001
From: spumer <spumer@users.noreply.github.com>
Date: Sat, 20 Sep 2025 22:05:39 +0500
Subject: [PATCH 1/4] add llamacpp embedding provider support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add LlamaCppEmbedding class with OpenAI-compatible API
- Support for local llama.cpp servers with nomic-embed-code model
- Automatic code prefix for improved code search quality
- Configurable timeout and dimension auto-detection
- Integration with MCP configuration system
- Environment variables: LLAMACPP_HOST, LLAMACPP_MODEL, LLAMACPP_TIMEOUT, LLAMACPP_CODE_PREFIX

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
---
 packages/core/src/embedding/index.ts          |   3 +-
 .../core/src/embedding/llamacpp-embedding.ts  | 460 ++++++++++++++++++
 packages/mcp/src/config.ts                    |  49 +-
 packages/mcp/src/embedding.ts                 |  26 +-
 4 files changed, 529 insertions(+), 9 deletions(-)
 create mode 100644 packages/core/src/embedding/llamacpp-embedding.ts

diff --git a/packages/core/src/embedding/index.ts b/packages/core/src/embedding/index.ts
index e6110941..3db35c1e 100644
--- a/packages/core/src/embedding/index.ts
+++ b/packages/core/src/embedding/index.ts
@@ -5,4 +5,5 @@ export * from './base-embedding';
 export * from './openai-embedding';
 export * from './voyageai-embedding';
 export * from './ollama-embedding';
-export * from './gemini-embedding'; 
\ No newline at end of file
+export * from './gemini-embedding';
+export * from './llamacpp-embedding'; 
\ No newline at end of file
diff --git a/packages/core/src/embedding/llamacpp-embedding.ts b/packages/core/src/embedding/llamacpp-embedding.ts
new file mode 100644
index 00000000..98f557e6
--- /dev/null
+++ b/packages/core/src/embedding/llamacpp-embedding.ts
@@ -0,0 +1,460 @@
+import { Embedding, EmbeddingVector } from './base-embedding';
+
+export interface LlamaCppEmbeddingConfig {
+    host?: string;
+    model?: string;
+    codePrefix?: boolean; // Enable automatic code prefix
+    dimension?: number; // Optional dimension parameter
+    maxTokens?: number; // Optional max tokens parameter
+    timeout?: number; // Request timeout in milliseconds
+}
+
+export class LlamaCppConfigurationError extends Error {
+    constructor(message: string) {
+        super(`LlamaCpp configuration error: ${message}`);
+        this.name = 'LlamaCppConfigurationError';
+    }
+}
+
+export class LlamaCppNetworkError extends Error {
+    constructor(message: string, public readonly originalError?: Error) {
+        super(`LlamaCpp network error: ${message}`);
+        this.name = 'LlamaCppNetworkError';
+    }
+}
+
+export class LlamaCppEmbedding extends Embedding {
+    private config: LlamaCppEmbeddingConfig;
+    private dimension: number = 768; // Default dimension
+    private dimensionDetected: boolean = false;
+    protected maxTokens: number = 8192; // Default for code models like nomic-embed-code
+    private host: string;
+    private codePrefix: string = "Represent this query for searching relevant code:";
+
+    constructor(config: LlamaCppEmbeddingConfig) {
+        super();
+
+        this.validateConfig(config);
+
+        this.config = config;
+        this.host = this.normalizeHost(config.host || 'http://localhost:8080');
+
+        // Set dimension if provided
+        if (config.dimension) {
+            if (config.dimension <= 0) {
+                throw new LlamaCppConfigurationError('Dimension must be a positive number');
+            }
+            this.dimension = config.dimension;
+            this.dimensionDetected = true;
+        }
+
+        // Set max tokens if provided
+        if (config.maxTokens) {
+            if (config.maxTokens <= 0) {
+                throw new LlamaCppConfigurationError('Max tokens must be a positive number');
+            }
+            this.maxTokens = config.maxTokens;
+        }
+
+        // Enable code prefix by default for llamacpp (designed for code)
+        if (config.codePrefix === undefined) {
+            this.config.codePrefix = true;
+        }
+    }
+
+    private validateConfig(config: LlamaCppEmbeddingConfig): void {
+        if (!config) {
+            throw new LlamaCppConfigurationError('Configuration object is required');
+        }
+
+        if (config.host !== undefined && typeof config.host !== 'string') {
+            throw new LlamaCppConfigurationError('Host must be a string');
+        }
+
+        if (config.model !== undefined && typeof config.model !== 'string') {
+            throw new LlamaCppConfigurationError('Model must be a string');
+        }
+
+        if (config.timeout !== undefined && (typeof config.timeout !== 'number' || config.timeout <= 0)) {
+            throw new LlamaCppConfigurationError('Timeout must be a positive number');
+        }
+
+        if (config.host) {
+            this.validateHostUrl(config.host);
+        }
+    }
+
+    private validateHostUrl(host: string): void {
+        try {
+            const url = new URL(host);
+            if (!['http:', 'https:'].includes(url.protocol)) {
+                throw new LlamaCppConfigurationError(`Unsupported protocol: ${url.protocol}. Only HTTP and HTTPS are supported`);
+            }
+        } catch (error) {
+            if (error instanceof LlamaCppConfigurationError) {
+                throw error;
+            }
+            throw new LlamaCppConfigurationError(`Invalid host URL: ${host}`);
+        }
+    }
+
+    private normalizeHost(host: string): string {
+        // Remove trailing slash for consistency
+        return host.replace(/\/$/, '');
+    }
+
+    private async makeRequest(url: string, body: any): Promise<any> {
+        const timeout = this.config.timeout || 30000; // 30s default timeout
+
+        const controller = new AbortController();
+        const timeoutId = setTimeout(() => controller.abort(), timeout);
+
+        try {
+            console.log(`[LlamaCppEmbedding] Making request to ${url} with timeout ${timeout}ms`);
+
+            const response = await fetch(url, {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json',
+                },
+                body: JSON.stringify(body),
+                signal: controller.signal,
+            });
+
+            clearTimeout(timeoutId);
+
+            if (!response.ok) {
+                let errorDetails: string;
+                try {
+                    errorDetails = await response.text();
+                } catch {
+                    errorDetails = 'Unable to read error response';
+                }
+
+                const errorMessage = `HTTP ${response.status} (${response.statusText}): ${errorDetails}`;
+                console.error(`[LlamaCppEmbedding] Request failed: ${errorMessage}`);
+                throw new LlamaCppNetworkError(errorMessage);
+            }
+
+            let responseData: any;
+            try {
+                responseData = await response.json();
+            } catch (parseError) {
+                const errorMessage = 'Invalid JSON response from server';
+                console.error(`[LlamaCppEmbedding] ${errorMessage}:`, parseError);
+                throw new LlamaCppNetworkError(errorMessage, parseError instanceof Error ? parseError : undefined);
+            }
+
+            return responseData;
+        } catch (error) {
+            clearTimeout(timeoutId);
+
+            // Re-throw specific errors without wrapping
+            if (error instanceof LlamaCppNetworkError || error instanceof LlamaCppConfigurationError) {
+                throw error;
+            }
+
+            if (error instanceof Error) {
+                if (error.name === 'AbortError') {
+                    const timeoutError = `Request timeout after ${timeout}ms - server at ${this.host} not responding`;
+                    console.error(`[LlamaCppEmbedding] ${timeoutError}`);
+                    throw new LlamaCppNetworkError(timeoutError, error);
+                }
+
+                if (error.name === 'TypeError' && error.message.includes('fetch')) {
+                    const connectionError = `Unable to connect to llama.cpp server at ${this.host}. Please ensure the server is running and accessible.`;
+                    console.error(`[LlamaCppEmbedding] ${connectionError}`);
+                    throw new LlamaCppNetworkError(connectionError, error);
+                }
+
+                // Log original error with full stack trace
+                console.error(`[LlamaCppEmbedding] Unexpected error during request:`, error);
+                throw new LlamaCppNetworkError(`Unexpected error: ${error.message}`, error);
+            }
+
+            // Fallback for non-Error objects
+            const unknownError = 'Unknown error occurred during request';
+            console.error(`[LlamaCppEmbedding] ${unknownError}:`, error);
+            throw new LlamaCppNetworkError(unknownError);
+        }
+    }
+
+    private preprocessTextForCode(text: string): string {
+        if (typeof text !== 'string') {
+            throw new LlamaCppConfigurationError('Text must be a string');
+        }
+
+        const processedText = this.preprocessText(text);
+
+        // Add code prefix if enabled and not already present
+        if (this.config.codePrefix && !processedText.startsWith(this.codePrefix)) {
+            return `${this.codePrefix} ${processedText}`;
+        }
+
+        return processedText;
+    }
+
+    async embed(text: string): Promise<EmbeddingVector> {
+        // Preprocess the text with optional code prefix
+        const processedText = this.preprocessTextForCode(text);
+
+        // Ensure dimension is detected
+        await this.ensureDimensionDetected();
+
+        const requestBody = {
+            input: processedText,
+            model: this.config.model || 'embedding-model',
+        };
+
+        const url = `${this.host}/v1/embeddings`;
+        const response = await this.makeRequest(url, requestBody);
+
+        if (!response || typeof response !== 'object') {
+            throw new LlamaCppNetworkError('Invalid response format: expected object');
+        }
+
+        if (!response.data || !Array.isArray(response.data)) {
+            throw new LlamaCppNetworkError('Invalid response format: missing or invalid data array');
+        }
+
+        if (response.data.length === 0) {
+            throw new LlamaCppNetworkError('Invalid response format: empty data array');
+        }
+
+        const firstItem = response.data[0];
+        if (!firstItem || typeof firstItem !== 'object') {
+            throw new LlamaCppNetworkError('Invalid response format: invalid first data item');
+        }
+
+        if (!firstItem.embedding || !Array.isArray(firstItem.embedding)) {
+            throw new LlamaCppNetworkError('Invalid response format: missing or invalid embedding array');
+        }
+
+        const embedding = response.data[0].embedding;
+
+        return {
+            vector: embedding,
+            dimension: this.dimension
+        };
+    }
+
+    async embedBatch(texts: string[]): Promise<EmbeddingVector[]> {
+        if (!Array.isArray(texts)) {
+            throw new LlamaCppConfigurationError('Texts must be an array');
+        }
+
+        if (texts.length === 0) {
+            throw new LlamaCppConfigurationError('Texts array cannot be empty');
+        }
+
+        // Preprocess all texts with optional code prefix
+        const processedTexts = texts.map(text => this.preprocessTextForCode(text));
+
+        // Ensure dimension is detected
+        await this.ensureDimensionDetected();
+
+        const requestBody = {
+            input: processedTexts,
+            model: this.config.model || 'embedding-model',
+        };
+
+        const url = `${this.host}/v1/embeddings`;
+        const response = await this.makeRequest(url, requestBody);
+
+        if (!response || typeof response !== 'object') {
+            throw new LlamaCppNetworkError('Invalid batch response format: expected object');
+        }
+
+        if (!response.data || !Array.isArray(response.data)) {
+            throw new LlamaCppNetworkError('Invalid batch response format: missing or invalid data array');
+        }
+
+        if (response.data.length === 0) {
+            throw new LlamaCppNetworkError('Invalid batch response format: empty data array');
+        }
+
+        return response.data.map((item: any, index: number) => {
+            if (!item || typeof item !== 'object') {
+                throw new LlamaCppNetworkError(`Invalid batch response format: invalid item at index ${index}`);
+            }
+
+            if (!item.embedding || !Array.isArray(item.embedding)) {
+                throw new LlamaCppNetworkError(`Invalid batch response format: missing or invalid embedding at index ${index}`);
+            }
+
+            return {
+                vector: item.embedding,
+                dimension: this.dimension
+            };
+        });
+    }
+
+    getDimension(): number {
+        return this.dimension;
+    }
+
+    getProvider(): string {
+        return 'LlamaCpp';
+    }
+
+    /**
+     * Ensure dimension is detected before making embedding requests
+     */
+    private async ensureDimensionDetected(): Promise<void> {
+        if (!this.dimensionDetected && !this.config.dimension) {
+            this.dimension = await this.detectDimension();
+            this.dimensionDetected = true;
+            console.log(`[LlamaCppEmbedding] 📏 Detected embedding dimension: ${this.dimension} for model: ${this.config.model || 'unknown'}`);
+        }
+    }
+
+    async detectDimension(testText: string = "test"): Promise<number> {
+        console.log(`[LlamaCppEmbedding] Detecting embedding dimension...`);
+
+        if (typeof testText !== 'string') {
+            throw new LlamaCppConfigurationError('Test text must be a string');
+        }
+
+        try {
+            // Use raw test text without code prefix for dimension detection
+            const processedText = this.preprocessText(testText);
+
+            const requestBody = {
+                input: processedText,
+                model: this.config.model || 'embedding-model',
+            };
+
+            const url = `${this.host}/v1/embeddings`;
+            const response = await this.makeRequest(url, requestBody);
+
+                if (!response || typeof response !== 'object') {
+                throw new LlamaCppNetworkError('Invalid response format: expected object');
+            }
+
+            if (!response.data || !Array.isArray(response.data)) {
+                throw new LlamaCppNetworkError('Invalid response format: missing or invalid data array');
+            }
+
+            if (response.data.length === 0) {
+                throw new LlamaCppNetworkError('Invalid response format: empty data array');
+            }
+
+            const firstItem = response.data[0];
+            if (!firstItem || typeof firstItem !== 'object') {
+                throw new LlamaCppNetworkError('Invalid response format: invalid first data item');
+            }
+
+            if (!firstItem.embedding || !Array.isArray(firstItem.embedding)) {
+                throw new LlamaCppNetworkError('Invalid response format: missing or invalid embedding array');
+            }
+
+            const dimension = firstItem.embedding.length;
+            if (dimension <= 0) {
+                throw new LlamaCppNetworkError(`Invalid embedding dimension: ${dimension}`);
+            }
+
+            console.log(`[LlamaCppEmbedding] Successfully detected embedding dimension: ${dimension}`);
+            return dimension;
+        } catch (error) {
+            if (error instanceof LlamaCppNetworkError || error instanceof LlamaCppConfigurationError) {
+                throw error;
+            }
+
+            const errorMessage = error instanceof Error ? error.message : 'Unknown error';
+            console.error(`[LlamaCppEmbedding] Failed to detect dimension:`, error);
+            throw new LlamaCppNetworkError(`Failed to detect embedding dimension: ${errorMessage}`, error instanceof Error ? error : undefined);
+        }
+    }
+
+    /**
+     * Set the host URL for llama.cpp server
+     * @param host Host URL (e.g., 'http://localhost:8080')
+     */
+    setHost(host: string): void {
+        if (typeof host !== 'string') {
+            throw new LlamaCppConfigurationError('Host must be a string');
+        }
+
+        this.validateHostUrl(host);
+
+        this.host = this.normalizeHost(host);
+        this.config.host = host;
+    }
+
+    /**
+     * Set the model name
+     * @param model Model name
+     */
+    async setModel(model: string): Promise<void> {
+        if (typeof model !== 'string') {
+            throw new LlamaCppConfigurationError('Model must be a string');
+        }
+
+        if (model.trim() === '') {
+            throw new LlamaCppConfigurationError('Model name cannot be empty');
+        }
+
+        this.config.model = model;
+        // Reset dimension detection when model changes
+        this.dimensionDetected = false;
+        if (!this.config.dimension) {
+            await this.ensureDimensionDetected();
+        }
+    }
+
+    /**
+     * Enable or disable automatic code prefix
+     * @param enabled Whether to enable code prefix
+     */
+    setCodePrefix(enabled: boolean): void {
+        if (typeof enabled !== 'boolean') {
+            throw new LlamaCppConfigurationError('Code prefix enabled flag must be a boolean');
+        }
+
+        this.config.codePrefix = enabled;
+    }
+
+    /**
+     * Set custom code prefix
+     * @param prefix Custom prefix text
+     */
+    setCustomCodePrefix(prefix: string): void {
+        if (typeof prefix !== 'string') {
+            throw new LlamaCppConfigurationError('Code prefix must be a string');
+        }
+
+        if (prefix.trim() === '') {
+            throw new LlamaCppConfigurationError('Code prefix cannot be empty');
+        }
+
+        this.codePrefix = prefix;
+        this.config.codePrefix = true;
+    }
+
+    /**
+     * Set request timeout
+     * @param timeout Timeout in milliseconds
+     */
+    setTimeout(timeout: number): void {
+        if (typeof timeout !== 'number') {
+            throw new LlamaCppConfigurationError('Timeout must be a number');
+        }
+
+        if (timeout <= 0) {
+            throw new LlamaCppConfigurationError('Timeout must be a positive number');
+        }
+
+        if (timeout > 600000) { // 10 minutes max
+            throw new LlamaCppConfigurationError('Timeout cannot exceed 600000ms (10 minutes)');
+        }
+
+        this.config.timeout = timeout;
+    }
+
+    /**
+     * Get current configuration
+     */
+    getConfig(): LlamaCppEmbeddingConfig {
+        return { ...this.config };
+    }
+}
\ No newline at end of file
diff --git a/packages/mcp/src/config.ts b/packages/mcp/src/config.ts
index 428f9474..b2615512 100644
--- a/packages/mcp/src/config.ts
+++ b/packages/mcp/src/config.ts
@@ -4,7 +4,7 @@ export interface ContextMcpConfig {
     name: string;
     version: string;
     // Embedding provider configuration
-    embeddingProvider: 'OpenAI' | 'VoyageAI' | 'Gemini' | 'Ollama';
+    embeddingProvider: 'OpenAI' | 'VoyageAI' | 'Gemini' | 'Ollama' | 'LlamaCpp';
     embeddingModel: string;
     // Provider-specific API keys
     openaiApiKey?: string;
@@ -15,6 +15,11 @@ export interface ContextMcpConfig {
     // Ollama configuration
     ollamaModel?: string;
     ollamaHost?: string;
+    // LlamaCpp configuration
+    llamacppHost?: string;
+    llamacppModel?: string;
+    llamacppTimeout?: number;
+    llamacppCodePrefix?: boolean;
     // Vector database configuration
     milvusAddress?: string; // Optional, can be auto-resolved from token
     milvusToken?: string;
@@ -78,6 +83,8 @@ export function getDefaultModelForProvider(provider: string): string {
             return 'gemini-embedding-001';
         case 'Ollama':
             return 'nomic-embed-text';
+        case 'LlamaCpp':
+            return 'nomic-embed-code';
         default:
             return 'text-embedding-3-small';
     }
@@ -91,6 +98,11 @@ export function getEmbeddingModelForProvider(provider: string): string {
             const ollamaModel = envManager.get('OLLAMA_MODEL') || envManager.get('EMBEDDING_MODEL') || getDefaultModelForProvider(provider);
             console.log(`[DEBUG] 🎯 Ollama model selection: OLLAMA_MODEL=${envManager.get('OLLAMA_MODEL') || 'NOT SET'}, EMBEDDING_MODEL=${envManager.get('EMBEDDING_MODEL') || 'NOT SET'}, selected=${ollamaModel}`);
             return ollamaModel;
+        case 'LlamaCpp':
+            // For LlamaCpp, prioritize LLAMACPP_MODEL over EMBEDDING_MODEL
+            const llamacppModel = envManager.get('LLAMACPP_MODEL') || envManager.get('EMBEDDING_MODEL') || getDefaultModelForProvider(provider);
+            console.log(`[DEBUG] 🎯 LlamaCpp model selection: LLAMACPP_MODEL=${envManager.get('LLAMACPP_MODEL') || 'NOT SET'}, EMBEDDING_MODEL=${envManager.get('EMBEDDING_MODEL') || 'NOT SET'}, selected=${llamacppModel}`);
+            return llamacppModel;
         case 'OpenAI':
         case 'VoyageAI':
         case 'Gemini':
@@ -108,6 +120,8 @@ export function createMcpConfig(): ContextMcpConfig {
     console.log(`[DEBUG]   EMBEDDING_PROVIDER: ${envManager.get('EMBEDDING_PROVIDER') || 'NOT SET'}`);
     console.log(`[DEBUG]   EMBEDDING_MODEL: ${envManager.get('EMBEDDING_MODEL') || 'NOT SET'}`);
     console.log(`[DEBUG]   OLLAMA_MODEL: ${envManager.get('OLLAMA_MODEL') || 'NOT SET'}`);
+    console.log(`[DEBUG]   LLAMACPP_MODEL: ${envManager.get('LLAMACPP_MODEL') || 'NOT SET'}`);
+    console.log(`[DEBUG]   LLAMACPP_HOST: ${envManager.get('LLAMACPP_HOST') || 'NOT SET'}`);
     console.log(`[DEBUG]   GEMINI_API_KEY: ${envManager.get('GEMINI_API_KEY') ? 'SET (length: ' + envManager.get('GEMINI_API_KEY')!.length + ')' : 'NOT SET'}`);
     console.log(`[DEBUG]   OPENAI_API_KEY: ${envManager.get('OPENAI_API_KEY') ? 'SET (length: ' + envManager.get('OPENAI_API_KEY')!.length + ')' : 'NOT SET'}`);
     console.log(`[DEBUG]   MILVUS_ADDRESS: ${envManager.get('MILVUS_ADDRESS') || 'NOT SET'}`);
@@ -117,7 +131,7 @@ export function createMcpConfig(): ContextMcpConfig {
         name: envManager.get('MCP_SERVER_NAME') || "Context MCP Server",
         version: envManager.get('MCP_SERVER_VERSION') || "1.0.0",
         // Embedding provider configuration
-        embeddingProvider: (envManager.get('EMBEDDING_PROVIDER') as 'OpenAI' | 'VoyageAI' | 'Gemini' | 'Ollama') || 'OpenAI',
+        embeddingProvider: (envManager.get('EMBEDDING_PROVIDER') as 'OpenAI' | 'VoyageAI' | 'Gemini' | 'Ollama' | 'LlamaCpp') || 'OpenAI',
         embeddingModel: getEmbeddingModelForProvider(envManager.get('EMBEDDING_PROVIDER') || 'OpenAI'),
         // Provider-specific API keys
         openaiApiKey: envManager.get('OPENAI_API_KEY'),
@@ -128,6 +142,11 @@ export function createMcpConfig(): ContextMcpConfig {
         // Ollama configuration
         ollamaModel: envManager.get('OLLAMA_MODEL'),
         ollamaHost: envManager.get('OLLAMA_HOST'),
+        // LlamaCpp configuration
+        llamacppHost: envManager.get('LLAMACPP_HOST'),
+        llamacppModel: envManager.get('LLAMACPP_MODEL'),
+        llamacppTimeout: envManager.get('LLAMACPP_TIMEOUT') ? parseInt(envManager.get('LLAMACPP_TIMEOUT')!, 10) : undefined,
+        llamacppCodePrefix: envManager.get('LLAMACPP_CODE_PREFIX') ? envManager.get('LLAMACPP_CODE_PREFIX') === 'true' : undefined,
         // Vector database configuration - address can be auto-resolved from token
         milvusAddress: envManager.get('MILVUS_ADDRESS'), // Optional, can be resolved from token
         milvusToken: envManager.get('MILVUS_TOKEN')
@@ -166,6 +185,14 @@ export function logConfigurationSummary(config: ContextMcpConfig): void {
             console.log(`[MCP]   Ollama Host: ${config.ollamaHost || 'http://127.0.0.1:11434'}`);
             console.log(`[MCP]   Ollama Model: ${config.embeddingModel}`);
             break;
+        case 'LlamaCpp':
+            console.log(`[MCP]   LlamaCpp Host: ${config.llamacppHost || 'http://localhost:8080'}`);
+            console.log(`[MCP]   LlamaCpp Model: ${config.embeddingModel}`);
+            if (config.llamacppTimeout) {
+                console.log(`[MCP]   LlamaCpp Timeout: ${config.llamacppTimeout}ms`);
+            }
+            console.log(`[MCP]   LlamaCpp Code Prefix: ${config.llamacppCodePrefix !== false ? '✅ Enabled' : '❌ Disabled'}`);
+            break;
     }
 
     console.log(`[MCP] 🔧 Initializing server components...`);
@@ -185,19 +212,25 @@ Environment Variables:
   MCP_SERVER_VERSION      Server version
   
   Embedding Provider Configuration:
-  EMBEDDING_PROVIDER      Embedding provider: OpenAI, VoyageAI, Gemini, Ollama (default: OpenAI)
+  EMBEDDING_PROVIDER      Embedding provider: OpenAI, VoyageAI, Gemini, Ollama, LlamaCpp (default: OpenAI)
   EMBEDDING_MODEL         Embedding model name (works for all providers)
-  
+
   Provider-specific API Keys:
   OPENAI_API_KEY          OpenAI API key (required for OpenAI provider)
   OPENAI_BASE_URL         OpenAI API base URL (optional, for custom endpoints)
   VOYAGEAI_API_KEY        VoyageAI API key (required for VoyageAI provider)
   GEMINI_API_KEY          Google AI API key (required for Gemini provider)
   GEMINI_BASE_URL         Gemini API base URL (optional, for custom endpoints)
-  
+
   Ollama Configuration:
   OLLAMA_HOST             Ollama server host (default: http://127.0.0.1:11434)
   OLLAMA_MODEL            Ollama model name (alternative to EMBEDDING_MODEL for Ollama)
+
+  LlamaCpp Configuration:
+  LLAMACPP_HOST           LlamaCpp server host (default: http://localhost:8080)
+  LLAMACPP_MODEL          LlamaCpp model name (alternative to EMBEDDING_MODEL for LlamaCpp)
+  LLAMACPP_TIMEOUT        Request timeout in milliseconds (default: 30000)
+  LLAMACPP_CODE_PREFIX    Enable automatic code prefix for embeddings (default: true)
   
   Vector Database Configuration:
   MILVUS_ADDRESS          Milvus address (optional, can be auto-resolved from token)
@@ -221,5 +254,11 @@ Examples:
   
   # Start MCP server with Ollama and specific model (using EMBEDDING_MODEL)
   EMBEDDING_PROVIDER=Ollama EMBEDDING_MODEL=nomic-embed-text MILVUS_TOKEN=your-token npx @zilliz/claude-context-mcp@latest
+
+  # Start MCP server with LlamaCpp and nomic-embed-code model
+  EMBEDDING_PROVIDER=LlamaCpp LLAMACPP_HOST=http://localhost:8080 LLAMACPP_MODEL=nomic-embed-code MILVUS_TOKEN=your-token npx @zilliz/claude-context-mcp@latest
+
+  # Start MCP server with LlamaCpp and custom timeout (useful for slower hardware)
+  EMBEDDING_PROVIDER=LlamaCpp LLAMACPP_TIMEOUT=60000 EMBEDDING_MODEL=nomic-embed-code MILVUS_TOKEN=your-token npx @zilliz/claude-context-mcp@latest
         `);
 } 
\ No newline at end of file
diff --git a/packages/mcp/src/embedding.ts b/packages/mcp/src/embedding.ts
index 6ebd71a0..e113326d 100644
--- a/packages/mcp/src/embedding.ts
+++ b/packages/mcp/src/embedding.ts
@@ -1,8 +1,8 @@
-import { OpenAIEmbedding, VoyageAIEmbedding, GeminiEmbedding, OllamaEmbedding } from "@zilliz/claude-context-core";
+import { OpenAIEmbedding, VoyageAIEmbedding, GeminiEmbedding, OllamaEmbedding, LlamaCppEmbedding } from "@zilliz/claude-context-core";
 import { ContextMcpConfig } from "./config.js";
 
 // Helper function to create embedding instance based on provider
-export function createEmbeddingInstance(config: ContextMcpConfig): OpenAIEmbedding | VoyageAIEmbedding | GeminiEmbedding | OllamaEmbedding {
+export function createEmbeddingInstance(config: ContextMcpConfig): OpenAIEmbedding | VoyageAIEmbedding | GeminiEmbedding | OllamaEmbedding | LlamaCppEmbedding {
     console.log(`[EMBEDDING] Creating ${config.embeddingProvider} embedding instance...`);
 
     switch (config.embeddingProvider) {
@@ -57,13 +57,29 @@ export function createEmbeddingInstance(config: ContextMcpConfig): OpenAIEmbeddi
             console.log(`[EMBEDDING] ✅ Ollama embedding instance created successfully`);
             return ollamaEmbedding;
 
+        case 'LlamaCpp':
+            const llamacppHost = config.llamacppHost || 'http://localhost:8080';
+            console.log(`[EMBEDDING] 🔧 Configuring LlamaCpp with model: ${config.embeddingModel}, host: ${llamacppHost}`);
+
+            const llamacppEmbeddingConfig = {
+                host: llamacppHost,
+                model: config.embeddingModel,
+                ...(config.llamacppTimeout !== undefined && { timeout: config.llamacppTimeout }),
+                ...(config.llamacppCodePrefix !== undefined && { codePrefix: config.llamacppCodePrefix })
+            };
+
+            const llamacppEmbedding = new LlamaCppEmbedding(llamacppEmbeddingConfig);
+            console.log(`[EMBEDDING] ✅ LlamaCpp embedding instance created successfully`);
+            console.log(`[EMBEDDING] 📝 LlamaCpp configuration: timeout=${config.llamacppTimeout || 30000}ms, codePrefix=${config.llamacppCodePrefix !== false}`);
+            return llamacppEmbedding;
+
         default:
             console.error(`[EMBEDDING] ❌ Unsupported embedding provider: ${config.embeddingProvider}`);
             throw new Error(`Unsupported embedding provider: ${config.embeddingProvider}`);
     }
 }
 
-export function logEmbeddingProviderInfo(config: ContextMcpConfig, embedding: OpenAIEmbedding | VoyageAIEmbedding | GeminiEmbedding | OllamaEmbedding): void {
+export function logEmbeddingProviderInfo(config: ContextMcpConfig, embedding: OpenAIEmbedding | VoyageAIEmbedding | GeminiEmbedding | OllamaEmbedding | LlamaCppEmbedding): void {
     console.log(`[EMBEDDING] ✅ Successfully initialized ${config.embeddingProvider} embedding provider`);
     console.log(`[EMBEDDING] Provider details - Model: ${config.embeddingModel}, Dimension: ${embedding.getDimension()}`);
 
@@ -81,5 +97,9 @@ export function logEmbeddingProviderInfo(config: ContextMcpConfig, embedding: Op
         case 'Ollama':
             console.log(`[EMBEDDING] Ollama configuration - Host: ${config.ollamaHost || 'http://127.0.0.1:11434'}, Model: ${config.embeddingModel}`);
             break;
+        case 'LlamaCpp':
+            console.log(`[EMBEDDING] LlamaCpp configuration - Host: ${config.llamacppHost || 'http://localhost:8080'}, Model: ${config.embeddingModel}`);
+            console.log(`[EMBEDDING] LlamaCpp advanced - Timeout: ${config.llamacppTimeout || 30000}ms, Code Prefix: ${config.llamacppCodePrefix !== false ? 'Enabled' : 'Disabled'}`);
+            break;
     }
 } 
\ No newline at end of file

From f4adaf73ff340c4295cf513667c03b054f2989ec Mon Sep 17 00:00:00 2001
From: spumer <spumer@users.noreply.github.com>
Date: Sat, 20 Sep 2025 22:13:08 +0500
Subject: [PATCH 2/4] docs: add llamacpp provider documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add LlamaCpp to supported embedding providers in main README
- Update environment variables documentation with LlamaCpp options
- Add comprehensive LlamaCpp configuration guide to MCP README
- Include setup instructions for local inference on consumer hardware
- Add configuration examples for various MCP clients
- Document LlamaCpp's goal: enable large model inference on Apple Silicon and desktop GPUs

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
---
 README.md                                     |  2 +-
 docs/getting-started/environment-variables.md | 10 ++-
 packages/mcp/README.md                        | 68 ++++++++++++++++++-
 3 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index ebacef1d..24653c4e 100644
--- a/README.md
+++ b/README.md
@@ -543,7 +543,7 @@ Claude Context is a monorepo containing three main packages:
 
 ### Supported Technologies
 
-- **Embedding Providers**: [OpenAI](https://openai.com), [VoyageAI](https://voyageai.com), [Ollama](https://ollama.ai), [Gemini](https://gemini.google.com)
+- **Embedding Providers**: [OpenAI](https://openai.com), [VoyageAI](https://voyageai.com), [Ollama](https://ollama.ai), [Gemini](https://gemini.google.com), [LlamaCpp](https://github.com/ggerganov/llama.cpp) (local inference on consumer hardware)
 - **Vector Databases**: [Milvus](https://milvus.io) or [Zilliz Cloud](https://zilliz.com/cloud)(fully managed vector database as a service)
 - **Code Splitters**: AST-based splitter (with automatic fallback), LangChain character-based splitter
 - **Languages**: TypeScript, JavaScript, Python, Java, C++, C#, Go, Rust, PHP, Ruby, Swift, Kotlin, Scala, Markdown
diff --git a/docs/getting-started/environment-variables.md b/docs/getting-started/environment-variables.md
index d2b813df..4ddde5c3 100644
--- a/docs/getting-started/environment-variables.md
+++ b/docs/getting-started/environment-variables.md
@@ -20,7 +20,7 @@ Claude Context supports a global configuration file at `~/.context/.env` to simp
 ### Embedding Provider
 | Variable | Description | Default |
 |----------|-------------|---------|
-| `EMBEDDING_PROVIDER` | Provider: `OpenAI`, `VoyageAI`, `Gemini`, `Ollama` | `OpenAI` |
+| `EMBEDDING_PROVIDER` | Provider: `OpenAI`, `VoyageAI`, `Gemini`, `Ollama`, `LlamaCpp` | `OpenAI` |
 | `EMBEDDING_MODEL` | Embedding model name (works for all providers) | Provider-specific default |
 | `OPENAI_API_KEY` | OpenAI API key | Required for OpenAI |
 | `OPENAI_BASE_URL` | OpenAI API base URL (optional, for custom endpoints) | `https://api.openai.com/v1` |
@@ -54,6 +54,14 @@ Claude Context supports a global configuration file at `~/.context/.env` to simp
 | `OLLAMA_HOST` | Ollama server URL | `http://127.0.0.1:11434` |
 | `OLLAMA_MODEL`(alternative to `EMBEDDING_MODEL`) | Model name |  |
 
+### LlamaCpp (Optional)
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `LLAMACPP_HOST` | LlamaCpp server URL | `http://localhost:8080` |
+| `LLAMACPP_MODEL` (alternative to `EMBEDDING_MODEL`) | Model name |  |
+| `LLAMACPP_TIMEOUT` | Request timeout in milliseconds | `30000` |
+| `LLAMACPP_CODE_PREFIX` | Enable automatic code prefix for embeddings | `true` |
+
 
 ### Advanced Configuration
 | Variable | Description | Default |
diff --git a/packages/mcp/README.md b/packages/mcp/README.md
index 4a562af5..45ed03f4 100644
--- a/packages/mcp/README.md
+++ b/packages/mcp/README.md
@@ -20,7 +20,7 @@ Model Context Protocol (MCP) allows you to integrate Claude Context with your fa
 
 Before using the MCP server, make sure you have:
 
-- API key for your chosen embedding provider (OpenAI, VoyageAI, Gemini, or Ollama setup)
+- API key for your chosen embedding provider (OpenAI, VoyageAI, Gemini, Ollama, or LlamaCpp setup)
 - Milvus vector database (local or cloud)
 
 > 💡 **Setup Help:** See the [main project setup guide](../../README.md#-quick-start) for detailed installation instructions.
@@ -34,7 +34,7 @@ Claude Context MCP supports multiple embedding providers. Choose the one that be
 > 📋 **Quick Reference**: For a complete list of environment variables and their descriptions, see the [Environment Variables Guide](../../docs/getting-started/environment-variables.md).
 
 ```bash
-# Supported providers: OpenAI, VoyageAI, Gemini, Ollama
+# Supported providers: OpenAI, VoyageAI, Gemini, Ollama, LlamaCpp
 EMBEDDING_PROVIDER=OpenAI
 ```
 
@@ -149,6 +149,51 @@ OLLAMA_HOST=http://127.0.0.1:11434
 
 </details>
 
+<details>
+<summary><strong>5. LlamaCpp Configuration (Local/Self-hosted)</strong></summary>
+
+LlamaCpp enables running large language models locally on consumer hardware including Apple Silicon, desktop GPUs, and CPU-only systems with state-of-the-art performance. It allows you to run embeddings locally with GGUF models without sending data to external services.
+
+```bash
+# Required: Specify which LlamaCpp model to use
+EMBEDDING_MODEL=nomic-embed-code
+
+# Optional: Specify LlamaCpp host (default: http://localhost:8080)
+LLAMACPP_HOST=http://localhost:8080
+
+# Optional: Request timeout in milliseconds (default: 30000)
+LLAMACPP_TIMEOUT=30000
+
+# Optional: Enable code prefix for better code search (default: true)
+LLAMACPP_CODE_PREFIX=true
+```
+
+**Setup Instructions:**
+
+1. Install llama.cpp from [GitHub](https://github.com/ggerganov/llama.cpp)
+2. Download the GGUF embedding model (e.g., nomic-embed-code):
+
+   ```bash
+   # Example: Download nomic-embed-code model
+   curl -L "https://huggingface.co/nomic-ai/nomic-embed-code-GGUF/resolve/main/nomic-embed-code.Q4_1.gguf" -o nomic-embed-code.Q4_1.gguf
+   ```
+
+3. Start the llama.cpp server with embeddings:
+
+   ```bash
+   llama-server -m nomic-embed-code.Q4_1.gguf --embeddings --pooling last
+   ```
+
+4. Verify the server is running:
+
+   ```bash
+   curl http://localhost:8080/v1/embeddings \
+     -H "Content-Type: application/json" \
+     -d '{"model": "nomic-embed-code", "input": "test"}'
+   ```
+
+</details>
+
 #### Get a free vector database on Zilliz Cloud
 
 Claude Context needs a vector database. You can [sign up](https://cloud.zilliz.com/signup?utm_source=github&utm_medium=referral&utm_campaign=2507-codecontext-readme) on Zilliz Cloud to get an API key.
@@ -354,6 +399,25 @@ Pasting the following configuration into your Cursor `~/.cursor/mcp.json` file i
 }
 ```
 
+**LlamaCpp Configuration:**
+
+```json
+{
+  "mcpServers": {
+    "claude-context": {
+      "command": "npx",
+      "args": ["-y", "@zilliz/claude-context-mcp@latest"],
+      "env": {
+        "EMBEDDING_PROVIDER": "LlamaCpp",
+        "EMBEDDING_MODEL": "nomic-embed-code",
+        "LLAMACPP_HOST": "http://localhost:8080",
+        "MILVUS_TOKEN": "your-zilliz-cloud-api-key"
+      }
+    }
+  }
+}
+```
+
 </details>
 
 <details>

From 895d7be711df8bdc11f7b37802f0fa06ecb46f3d Mon Sep 17 00:00:00 2001
From: spumer <spumer@users.noreply.github.com>
Date: Tue, 7 Oct 2025 07:31:30 +0500
Subject: [PATCH 3/4] feat: add strict collection naming with provider and
 model isolation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add configurable collection naming strategy to prevent conflicts between
different embedding providers and models. This ensures complete isolation
when switching between providers like Ollama and LlamaCpp.

## Core Changes

### Embedding Providers
- Add abstract `getModel()` method to base Embedding class
- Implement `getModel()` in all providers:
  - OllamaEmbedding: returns config.model
  - LlamaCppEmbedding: returns config.model with fallback
  - OpenAIEmbedding: returns config.model
  - GeminiEmbedding: returns config.model with fallback
  - VoyageAIEmbedding: returns config.model

### Collection Naming
- Add `EMBEDDING_STRICT_COLLECTION_NAMES` environment variable
- Implement dual naming strategies in Context.getCollectionName():
  - Legacy (default): `hybrid_code_chunks_<hash>` (backward compatible)
  - Strict: `hybrid_<provider>_<model>_<path_hash>_<unique_hash>`
- Add `customCollectionName` support in ContextConfig
- Ensure model names are sanitized for safe collection naming

### MCP Integration
- Add `embeddingStrictCollectionNames` to ContextMcpConfig
- Auto-set environment variable from MCP config
- Add new variables to debug output:
  - MILVUS_TOKEN (shows length only for security)
  - MILVUS_COLLECTION_NAME
  - LLAMACPP_TIMEOUT
  - LLAMACPP_CODE_PREFIX
  - EMBEDDING_STRICT_COLLECTION_NAMES
- Update help message with new configuration options
- Add examples for strict collection naming usage

### Documentation
- Update .env.example with collection naming configuration
- Add comprehensive examples in MCP help text
- Document all new environment variables

## Benefits
- **Zero conflict risk**: Each provider+model combination gets unique collection
- **Safe experimentation**: Switch providers without data contamination
- **Backward compatible**: Legacy naming works by default
- **Full isolation**: Ollama and LlamaCpp collections never intersect

## Example Collection Names
- Ollama: `hybrid_ollama_nomic_embed_text_abc12345_def67890`
- LlamaCpp: `hybrid_llamacpp_nomic_embed_code_Q4_1_gguf_abc12345_fed09876`

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .env.example                                  |  9 +++++
 packages/core/src/context.ts                  | 36 ++++++++++++++++---
 packages/core/src/embedding/base-embedding.ts |  6 ++++
 .../core/src/embedding/gemini-embedding.ts    |  4 +++
 .../core/src/embedding/llamacpp-embedding.ts  |  4 +++
 .../core/src/embedding/ollama-embedding.ts    |  4 +++
 .../core/src/embedding/openai-embedding.ts    |  4 +++
 .../core/src/embedding/voyageai-embedding.ts  |  4 +++
 packages/mcp/src/config.ts                    | 25 ++++++++++++-
 packages/mcp/src/index.ts                     |  8 ++++-
 10 files changed, 98 insertions(+), 6 deletions(-)

diff --git a/.env.example b/.env.example
index 8eb0266a..2d088dce 100644
--- a/.env.example
+++ b/.env.example
@@ -90,3 +90,12 @@ SPLITTER_TYPE=ast
 
 # Whether to use hybrid search mode. If true, it will use both dense vector and BM25; if false, it will use only dense vector search.
 # HYBRID_MODE=true
+
+# =============================================================================
+# Collection Naming Configuration
+# =============================================================================
+
+# Whether to use strict collection naming that includes provider and model info
+# This prevents conflicts when switching between different embedding providers/models
+# If false (default), uses legacy naming for backward compatibility
+# EMBEDDING_STRICT_COLLECTION_NAMES=false
diff --git a/packages/core/src/context.ts b/packages/core/src/context.ts
index 1af13058..93c1a7d3 100644
--- a/packages/core/src/context.ts
+++ b/packages/core/src/context.ts
@@ -94,6 +94,7 @@ export interface ContextConfig {
     ignorePatterns?: string[];
     customExtensions?: string[]; // New: custom extensions from MCP
     customIgnorePatterns?: string[]; // New: custom ignore patterns from MCP
+    customCollectionName?: string; // New: custom collection name from MCP config
 }
 
 export class Context {
@@ -103,6 +104,7 @@ export class Context {
     private supportedExtensions: string[];
     private ignorePatterns: string[];
     private synchronizers = new Map<string, FileSynchronizer>();
+    private customCollectionName?: string;
 
     constructor(config: ContextConfig = {}) {
         // Initialize services
@@ -145,6 +147,9 @@ export class Context {
         // Remove duplicates
         this.ignorePatterns = [...new Set(allIgnorePatterns)];
 
+        // Store custom collection name if provided
+        this.customCollectionName = config.customCollectionName;
+
         console.log(`[Context] 🔧 Initialized with ${this.supportedExtensions.length} supported extensions and ${this.ignorePatterns.length} ignore patterns`);
         if (envCustomExtensions.length > 0) {
             console.log(`[Context] 📎 Loaded ${envCustomExtensions.length} custom extensions from environment: ${envCustomExtensions.join(', ')}`);
@@ -229,14 +234,37 @@ export class Context {
     }
 
     /**
-     * Generate collection name based on codebase path and hybrid mode
+     * Generate collection name based on codebase path, provider, model and hybrid mode
      */
     public getCollectionName(codebasePath: string): string {
+        // If custom collection name is provided, use it directly
+        if (this.customCollectionName) {
+            return this.customCollectionName;
+        }
+
         const isHybrid = this.getIsHybrid();
         const normalizedPath = path.resolve(codebasePath);
-        const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
-        const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks';
-        return `${prefix}_${hash.substring(0, 8)}`;
+        const pathHash = crypto.createHash('md5').update(normalizedPath).digest('hex');
+
+        // Check if strict collection naming is enabled
+        const strictNaming = envManager.get('EMBEDDING_STRICT_COLLECTION_NAMES')?.toLowerCase() === 'true';
+
+        if (strictNaming) {
+            // Generate collection name including provider and model to prevent conflicts
+            const provider = this.embedding.getProvider().toLowerCase();
+            const model = this.embedding.getModel().replace(/[^a-zA-Z0-9]/g, '_'); // Sanitize model name
+
+            // Create a comprehensive hash including provider and model to ensure uniqueness
+            const uniqueString = `${provider}_${model}_${normalizedPath}`;
+            const fullHash = crypto.createHash('md5').update(uniqueString).digest('hex');
+
+            const prefix = isHybrid === true ? 'hybrid' : 'code';
+            return `${prefix}_${provider}_${model}_${pathHash.substring(0, 8)}_${fullHash.substring(0, 8)}`;
+        } else {
+            // Legacy collection naming (default behavior)
+            const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks';
+            return `${prefix}_${pathHash.substring(0, 8)}`;
+        }
     }
 
     /**
diff --git a/packages/core/src/embedding/base-embedding.ts b/packages/core/src/embedding/base-embedding.ts
index 18aae9f0..b4d3c913 100644
--- a/packages/core/src/embedding/base-embedding.ts
+++ b/packages/core/src/embedding/base-embedding.ts
@@ -73,4 +73,10 @@ export abstract class Embedding {
      * @returns Provider name
      */
     abstract getProvider(): string;
+
+    /**
+     * Get model name/identifier
+     * @returns Model name
+     */
+    abstract getModel(): string;
 } 
\ No newline at end of file
diff --git a/packages/core/src/embedding/gemini-embedding.ts b/packages/core/src/embedding/gemini-embedding.ts
index 480c80d1..f1c9f618 100644
--- a/packages/core/src/embedding/gemini-embedding.ts
+++ b/packages/core/src/embedding/gemini-embedding.ts
@@ -119,6 +119,10 @@ export class GeminiEmbedding extends Embedding {
         return 'Gemini';
     }
 
+    getModel(): string {
+        return this.config.model || 'gemini-embedding-001';
+    }
+
     /**
      * Set model type
      * @param model Model name
diff --git a/packages/core/src/embedding/llamacpp-embedding.ts b/packages/core/src/embedding/llamacpp-embedding.ts
index 98f557e6..94ad2ccf 100644
--- a/packages/core/src/embedding/llamacpp-embedding.ts
+++ b/packages/core/src/embedding/llamacpp-embedding.ts
@@ -297,6 +297,10 @@ export class LlamaCppEmbedding extends Embedding {
         return 'LlamaCpp';
     }
 
+    getModel(): string {
+        return this.config.model || 'nomic-embed-code';
+    }
+
     /**
      * Ensure dimension is detected before making embedding requests
      */
diff --git a/packages/core/src/embedding/ollama-embedding.ts b/packages/core/src/embedding/ollama-embedding.ts
index ef3ba070..c81b081a 100644
--- a/packages/core/src/embedding/ollama-embedding.ts
+++ b/packages/core/src/embedding/ollama-embedding.ts
@@ -132,6 +132,10 @@ export class OllamaEmbedding extends Embedding {
         return 'Ollama';
     }
 
+    getModel(): string {
+        return this.config.model;
+    }
+
     /**
      * Set model type and detect its dimension
      * @param model Model name
diff --git a/packages/core/src/embedding/openai-embedding.ts b/packages/core/src/embedding/openai-embedding.ts
index be0de117..476032a1 100644
--- a/packages/core/src/embedding/openai-embedding.ts
+++ b/packages/core/src/embedding/openai-embedding.ts
@@ -134,6 +134,10 @@ export class OpenAIEmbedding extends Embedding {
         return 'OpenAI';
     }
 
+    getModel(): string {
+        return this.config.model;
+    }
+
     /**
      * Set model type
      * @param model Model name
diff --git a/packages/core/src/embedding/voyageai-embedding.ts b/packages/core/src/embedding/voyageai-embedding.ts
index 9ad3ee89..00c88ddf 100644
--- a/packages/core/src/embedding/voyageai-embedding.ts
+++ b/packages/core/src/embedding/voyageai-embedding.ts
@@ -119,6 +119,10 @@ export class VoyageAIEmbedding extends Embedding {
         return 'VoyageAI';
     }
 
+    getModel(): string {
+        return this.config.model;
+    }
+
     /**
      * Set model type
      * @param model Model name
diff --git a/packages/mcp/src/config.ts b/packages/mcp/src/config.ts
index b2615512..407a30b5 100644
--- a/packages/mcp/src/config.ts
+++ b/packages/mcp/src/config.ts
@@ -23,6 +23,9 @@ export interface ContextMcpConfig {
     // Vector database configuration
     milvusAddress?: string; // Optional, can be auto-resolved from token
     milvusToken?: string;
+    milvusCollectionName?: string; // Optional, customizable collection name
+    // Collection naming configuration
+    embeddingStrictCollectionNames?: boolean; // Whether to include provider/model in collection names
 }
 
 // Legacy format (v1) - for backward compatibility
@@ -122,9 +125,14 @@ export function createMcpConfig(): ContextMcpConfig {
     console.log(`[DEBUG]   OLLAMA_MODEL: ${envManager.get('OLLAMA_MODEL') || 'NOT SET'}`);
     console.log(`[DEBUG]   LLAMACPP_MODEL: ${envManager.get('LLAMACPP_MODEL') || 'NOT SET'}`);
     console.log(`[DEBUG]   LLAMACPP_HOST: ${envManager.get('LLAMACPP_HOST') || 'NOT SET'}`);
+    console.log(`[DEBUG]   LLAMACPP_TIMEOUT: ${envManager.get('LLAMACPP_TIMEOUT') || 'NOT SET'}`);
+    console.log(`[DEBUG]   LLAMACPP_CODE_PREFIX: ${envManager.get('LLAMACPP_CODE_PREFIX') || 'NOT SET'}`);
     console.log(`[DEBUG]   GEMINI_API_KEY: ${envManager.get('GEMINI_API_KEY') ? 'SET (length: ' + envManager.get('GEMINI_API_KEY')!.length + ')' : 'NOT SET'}`);
     console.log(`[DEBUG]   OPENAI_API_KEY: ${envManager.get('OPENAI_API_KEY') ? 'SET (length: ' + envManager.get('OPENAI_API_KEY')!.length + ')' : 'NOT SET'}`);
     console.log(`[DEBUG]   MILVUS_ADDRESS: ${envManager.get('MILVUS_ADDRESS') || 'NOT SET'}`);
+    console.log(`[DEBUG]   MILVUS_TOKEN: ${envManager.get('MILVUS_TOKEN') ? 'SET (length: ' + envManager.get('MILVUS_TOKEN')!.length + ')' : 'NOT SET'}`);
+    console.log(`[DEBUG]   MILVUS_COLLECTION_NAME: ${envManager.get('MILVUS_COLLECTION_NAME') || 'NOT SET'}`);
+    console.log(`[DEBUG]   EMBEDDING_STRICT_COLLECTION_NAMES: ${envManager.get('EMBEDDING_STRICT_COLLECTION_NAMES') || 'NOT SET'}`);
     console.log(`[DEBUG]   NODE_ENV: ${envManager.get('NODE_ENV') || 'NOT SET'}`);
 
     const config: ContextMcpConfig = {
@@ -149,7 +157,10 @@ export function createMcpConfig(): ContextMcpConfig {
         llamacppCodePrefix: envManager.get('LLAMACPP_CODE_PREFIX') ? envManager.get('LLAMACPP_CODE_PREFIX') === 'true' : undefined,
         // Vector database configuration - address can be auto-resolved from token
         milvusAddress: envManager.get('MILVUS_ADDRESS'), // Optional, can be resolved from token
-        milvusToken: envManager.get('MILVUS_TOKEN')
+        milvusToken: envManager.get('MILVUS_TOKEN'),
+        milvusCollectionName: envManager.get('MILVUS_COLLECTION_NAME'), // Optional, customizable collection name
+        // Collection naming configuration
+        embeddingStrictCollectionNames: envManager.get('EMBEDDING_STRICT_COLLECTION_NAMES')?.toLowerCase() === 'true'
     };
 
     return config;
@@ -163,6 +174,8 @@ export function logConfigurationSummary(config: ContextMcpConfig): void {
     console.log(`[MCP]   Embedding Provider: ${config.embeddingProvider}`);
     console.log(`[MCP]   Embedding Model: ${config.embeddingModel}`);
     console.log(`[MCP]   Milvus Address: ${config.milvusAddress || (config.milvusToken ? '[Auto-resolve from token]' : '[Not configured]')}`);
+    console.log(`[MCP]   Milvus Collection: ${config.milvusCollectionName || '[Default: based on provider]'}`);
+    console.log(`[MCP]   Strict Collection Names: ${config.embeddingStrictCollectionNames ? 'Enabled (provider+model)' : 'Disabled (legacy)'}`);
 
     // Log provider-specific configuration without exposing sensitive data
     switch (config.embeddingProvider) {
@@ -235,6 +248,10 @@ Environment Variables:
   Vector Database Configuration:
   MILVUS_ADDRESS          Milvus address (optional, can be auto-resolved from token)
   MILVUS_TOKEN            Milvus token (optional, used for authentication and address resolution)
+  MILVUS_COLLECTION_NAME  Custom collection name (optional, defaults to provider-based name)
+
+  Collection Naming Configuration:
+  EMBEDDING_STRICT_COLLECTION_NAMES  Use strict collection naming with provider+model (default: false)
 
 Examples:
   # Start MCP server with OpenAI (default) and explicit Milvus address
@@ -260,5 +277,11 @@ Examples:
 
   # Start MCP server with LlamaCpp and custom timeout (useful for slower hardware)
   EMBEDDING_PROVIDER=LlamaCpp LLAMACPP_TIMEOUT=60000 EMBEDDING_MODEL=nomic-embed-code MILVUS_TOKEN=your-token npx @zilliz/claude-context-mcp@latest
+
+  # Start MCP server with strict collection naming (prevents provider conflicts)
+  EMBEDDING_PROVIDER=Ollama EMBEDDING_MODEL=nomic-embed-text EMBEDDING_STRICT_COLLECTION_NAMES=true MILVUS_TOKEN=your-token npx @zilliz/claude-context-mcp@latest
+
+  # Start MCP server with LlamaCpp and strict collection naming
+  EMBEDDING_PROVIDER=LlamaCpp EMBEDDING_MODEL=nomic-embed-code EMBEDDING_STRICT_COLLECTION_NAMES=true MILVUS_TOKEN=your-token npx @zilliz/claude-context-mcp@latest
         `);
 } 
\ No newline at end of file
diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts
index 8c4c3b28..67e16153 100644
--- a/packages/mcp/src/index.ts
+++ b/packages/mcp/src/index.ts
@@ -65,10 +65,16 @@ class ContextMcpServer {
             ...(config.milvusToken && { token: config.milvusToken })
         });
 
+        // Set collection naming strategy from config
+        if (config.embeddingStrictCollectionNames !== undefined) {
+            process.env.EMBEDDING_STRICT_COLLECTION_NAMES = config.embeddingStrictCollectionNames.toString();
+        }
+
         // Initialize Claude Context
         this.context = new Context({
             embedding,
-            vectorDatabase
+            vectorDatabase,
+            customCollectionName: config.milvusCollectionName
         });
 
         // Initialize managers

From 46ca2fc4c6f578ff866be274b90aa05398b38423 Mon Sep 17 00:00:00 2001
From: spumer <spumer@users.noreply.github.com>
Date: Tue, 7 Oct 2025 07:41:57 +0500
Subject: [PATCH 4/4] docs: add collection naming configuration documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Document new EMBEDDING_STRICT_COLLECTION_NAMES and MILVUS_COLLECTION_NAME
environment variables in all relevant documentation files.

## Updated Documentation

### Environment Variables Guide
- Add MILVUS_COLLECTION_NAME variable description
- Add EMBEDDING_STRICT_COLLECTION_NAMES variable with detailed explanation
- Add collection naming modes comparison (legacy vs strict)
- Add use cases and benefits for strict mode

### MCP README
- Add new "Collection Naming Configuration" section
- Document both naming modes with examples
- Explain format differences: `hybrid_code_chunks_<hash>` vs `hybrid_<provider>_<model>_<hash>_<unique>`
- Recommend strict mode for multi-provider experimentation

## Benefits for Users
- Clear understanding of collection naming behavior
- Guidance on when to use strict mode
- Prevention of data conflicts when switching providers
- Complete reference for all configuration options

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 docs/getting-started/environment-variables.md |  8 +++++++
 packages/mcp/README.md                        | 24 +++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/docs/getting-started/environment-variables.md b/docs/getting-started/environment-variables.md
index 4ddde5c3..158505ec 100644
--- a/docs/getting-started/environment-variables.md
+++ b/docs/getting-started/environment-variables.md
@@ -47,6 +47,14 @@ Claude Context supports a global configuration file at `~/.context/.env` to simp
 |----------|-------------|---------|
 | `MILVUS_TOKEN` | Milvus authentication token. Get [Zilliz Personal API Key](https://github.com/zilliztech/claude-context/blob/master/assets/signup_and_get_apikey.png) | Recommended |
 | `MILVUS_ADDRESS` | Milvus server address. Optional when using Zilliz Personal API Key | Auto-resolved from token |
+| `MILVUS_COLLECTION_NAME` | Custom collection name (optional, overrides automatic naming) | Auto-generated |
+| `EMBEDDING_STRICT_COLLECTION_NAMES` | Use strict collection naming with provider+model info to prevent conflicts | `false` |
+
+> **💡 Collection Naming:**
+> - **Legacy mode** (default): Collections named `hybrid_code_chunks_<hash>` - same name for all providers
+> - **Strict mode** (`EMBEDDING_STRICT_COLLECTION_NAMES=true`): Collections include provider and model, e.g. `hybrid_ollama_nomic_embed_text_<hash>_<unique>`
+> - **Benefits of strict mode**: Prevents data conflicts when switching between different embedding providers or models
+> - **Use case**: Enable strict mode when experimenting with multiple providers (Ollama, LlamaCpp, etc.) on the same codebase
 
 ### Ollama (Optional)
 | Variable | Description | Default |
diff --git a/packages/mcp/README.md b/packages/mcp/README.md
index 45ed03f4..2966d61b 100644
--- a/packages/mcp/README.md
+++ b/packages/mcp/README.md
@@ -228,6 +228,30 @@ CUSTOM_IGNORE_PATTERNS=temp/**,*.backup,private/**,uploads/**
 
 These settings work in combination with tool parameters - patterns from both sources will be merged together.
 
+#### Collection Naming Configuration (Optional)
+
+You can configure how collection names are generated to prevent conflicts when using multiple embedding providers:
+
+```bash
+# Use strict collection naming that includes provider and model info
+# This prevents data conflicts when switching between providers
+EMBEDDING_STRICT_COLLECTION_NAMES=true
+
+# Or set a custom collection name manually (overrides all automatic naming)
+MILVUS_COLLECTION_NAME=my_custom_collection
+```
+
+**Collection Naming Modes:**
+- **Legacy mode** (default, `EMBEDDING_STRICT_COLLECTION_NAMES=false`):
+  - Format: `hybrid_code_chunks_<hash>`
+  - Same collection name for all providers (may cause conflicts)
+
+- **Strict mode** (`EMBEDDING_STRICT_COLLECTION_NAMES=true`):
+  - Format: `hybrid_<provider>_<model>_<hash>_<unique>`
+  - Example: `hybrid_ollama_nomic_embed_text_abc12345_def67890`
+  - Prevents conflicts when switching between Ollama, LlamaCpp, OpenAI, etc.
+  - **Recommended** when experimenting with multiple embedding providers
+
 ## Usage with MCP Clients
 
 <details>