diff --git a/src/services/code-index/embedders/__tests__/openai-compatible.spec.ts b/src/services/code-index/embedders/__tests__/openai-compatible.spec.ts index ecde76915154..95b0c90d3769 100644 --- a/src/services/code-index/embedders/__tests__/openai-compatible.spec.ts +++ b/src/services/code-index/embedders/__tests__/openai-compatible.spec.ts @@ -1082,4 +1082,186 @@ describe("OpenAICompatibleEmbedder", () => { expect(result.error).toBe("embeddings:validation.configurationError") }) }) + + describe("DeepInfra provider detection and handling", () => { + it("should detect DeepInfra URLs with deepinfra.com domain", () => { + const embedder = new OpenAICompatibleEmbedder( + "https://api.deepinfra.com/v1/openai", + testApiKey, + "Qwen/Qwen3-Embedding-0.6B", + ) + + // Check the provider type is correctly detected + expect(embedder["providerType"]).toBe("deepinfra") + }) + + it("should detect DeepInfra URLs with deepinfra.ai domain", () => { + const embedder = new OpenAICompatibleEmbedder( + "https://api.deepinfra.ai/v1/openai", + testApiKey, + "Qwen/Qwen3-Embedding-0.6B", + ) + + // Check the provider type is correctly detected + expect(embedder["providerType"]).toBe("deepinfra") + }) + + it("should detect standard providers for non-DeepInfra URLs", () => { + const embedder = new OpenAICompatibleEmbedder("https://api.openai.com/v1", testApiKey, testModelId) + + // Check the provider type is correctly detected + expect(embedder["providerType"]).toBe("standard") + }) + + it("should send float encoding format for DeepInfra", async () => { + const embedder = new OpenAICompatibleEmbedder( + "https://api.deepinfra.com/v1/openai", + testApiKey, + "Qwen/Qwen3-Embedding-0.6B", + ) + + // Mock response with float array + const mockResponse = { + data: [{ embedding: [0.1, 0.2, 0.3] }], + usage: { prompt_tokens: 10, total_tokens: 15 }, + } + mockEmbeddingsCreate.mockResolvedValue(mockResponse) + + await embedder.createEmbeddings(["test text"]) + + // Verify that 'float' encoding format was used + expect(mockEmbeddingsCreate).toHaveBeenCalledWith({ + input: ["test text"], + model: "Qwen/Qwen3-Embedding-0.6B", + encoding_format: "float", + }) + }) + + it("should send base64 encoding format for standard providers", async () => { + const embedder = new OpenAICompatibleEmbedder("https://api.openai.com/v1", testApiKey, testModelId) + + // Mock response with base64 string + const testEmbedding = new Float32Array([0.1, 0.2, 0.3]) + const base64String = Buffer.from(testEmbedding.buffer).toString("base64") + const mockResponse = { + data: [{ embedding: base64String }], + usage: { prompt_tokens: 10, total_tokens: 15 }, + } + mockEmbeddingsCreate.mockResolvedValue(mockResponse) + + await embedder.createEmbeddings(["test text"]) + + // Verify that 'base64' encoding format was used + expect(mockEmbeddingsCreate).toHaveBeenCalledWith({ + input: ["test text"], + model: testModelId, + encoding_format: "base64", + }) + }) + + it("should handle float array responses from DeepInfra", async () => { + const embedder = new OpenAICompatibleEmbedder( + "https://api.deepinfra.com/v1/openai", + testApiKey, + "Qwen/Qwen3-Embedding-0.6B", + ) + + // Mock response with float array (DeepInfra format) + const mockResponse = { + data: [{ embedding: [0.1, 0.2, 0.3] }, { embedding: [0.4, 0.5, 0.6] }], + usage: { prompt_tokens: 20, total_tokens: 25 }, + } + mockEmbeddingsCreate.mockResolvedValue(mockResponse) + + const result = await embedder.createEmbeddings(["text1", "text2"]) + + // Verify the embeddings are correctly processed + expect(result.embeddings).toEqual([ + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + ]) + expect(result.usage).toEqual({ + promptTokens: 20, + totalTokens: 25, + }) + }) + + it("should handle base64 responses from standard providers", async () => { + const embedder = new OpenAICompatibleEmbedder("https://api.openai.com/v1", testApiKey, testModelId) + + // Create base64 encoded embeddings + const embedding1 = new Float32Array([0.1, 0.2, 0.3]) + const embedding2 = new Float32Array([0.4, 0.5, 0.6]) + const base64String1 = Buffer.from(embedding1.buffer).toString("base64") + const base64String2 = Buffer.from(embedding2.buffer).toString("base64") + + const mockResponse = { + data: [{ embedding: base64String1 }, { embedding: base64String2 }], + usage: { prompt_tokens: 20, total_tokens: 25 }, + } + mockEmbeddingsCreate.mockResolvedValue(mockResponse) + + const result = await embedder.createEmbeddings(["text1", "text2"]) + + // Verify the embeddings are correctly decoded from base64 + expect(result.embeddings[0][0]).toBeCloseTo(0.1, 5) + expect(result.embeddings[0][1]).toBeCloseTo(0.2, 5) + expect(result.embeddings[0][2]).toBeCloseTo(0.3, 5) + expect(result.embeddings[1][0]).toBeCloseTo(0.4, 5) + expect(result.embeddings[1][1]).toBeCloseTo(0.5, 5) + expect(result.embeddings[1][2]).toBeCloseTo(0.6, 5) + expect(result.usage).toEqual({ + promptTokens: 20, + totalTokens: 25, + }) + }) + + it("should validate DeepInfra configuration with float format", async () => { + const embedder = new OpenAICompatibleEmbedder( + "https://api.deepinfra.com/v1/openai", + testApiKey, + "Qwen/Qwen3-Embedding-0.6B", + ) + + const mockResponse = { + data: [{ embedding: [0.1, 0.2, 0.3] }], + usage: { prompt_tokens: 2, total_tokens: 2 }, + } + mockEmbeddingsCreate.mockResolvedValue(mockResponse) + + const result = await embedder.validateConfiguration() + + expect(result.valid).toBe(true) + expect(result.error).toBeUndefined() + expect(mockEmbeddingsCreate).toHaveBeenCalledWith({ + input: ["test"], + model: "Qwen/Qwen3-Embedding-0.6B", + encoding_format: "float", + }) + }) + + it("should use float format for DeepInfra with full endpoint URLs", async () => { + const fullUrl = "https://api.deepinfra.com/v1/openai/embeddings" + const embedder = new OpenAICompatibleEmbedder(fullUrl, testApiKey, "Qwen/Qwen3-Embedding-0.6B") + + global.fetch = vitest.fn().mockResolvedValueOnce({ + ok: true, + status: 200, + json: async () => ({ + data: [{ embedding: [0.1, 0.2, 0.3] }], + usage: { prompt_tokens: 10, total_tokens: 15 }, + }), + } as any) + + await embedder.createEmbeddings(["test"]) + + // Verify the request body contains float encoding format + expect(global.fetch).toHaveBeenCalledWith( + fullUrl, + expect.objectContaining({ + body: expect.stringContaining('"encoding_format":"float"'), + }), + ) + }) + }) }) diff --git a/src/services/code-index/embedders/openai-compatible.ts b/src/services/code-index/embedders/openai-compatible.ts index 6eaf2b6c2c16..cc217e104034 100644 --- a/src/services/code-index/embedders/openai-compatible.ts +++ b/src/services/code-index/embedders/openai-compatible.ts @@ -39,6 +39,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder { private readonly apiKey: string private readonly isFullUrl: boolean private readonly maxItemTokens: number + private readonly providerType: "deepinfra" | "standard" // Global rate limiting state shared across all instances private static globalRateLimitState = { @@ -82,9 +83,25 @@ export class OpenAICompatibleEmbedder implements IEmbedder { this.defaultModelId = modelId || getDefaultModelId("openai-compatible") // Cache the URL type check for performance this.isFullUrl = this.isFullEndpointUrl(baseUrl) + // Cache the provider type detection for performance + this.providerType = this.detectProviderType(baseUrl) this.maxItemTokens = maxItemTokens || MAX_ITEM_TOKENS } + /** + * Detects the provider type based on the URL pattern. + * DeepInfra requires 'float' encoding format while others use 'base64'. + * @param url The API URL to analyze + * @returns 'deepinfra' for DeepInfra endpoints, 'standard' for others + */ + private detectProviderType(url: string): "deepinfra" | "standard" { + // DeepInfra URLs contain 'deepinfra.com' or 'deepinfra.ai' + const deepInfraPatterns = [/deepinfra\.com/i, /deepinfra\.ai/i] + + const isDeepInfra = deepInfraPatterns.some((pattern) => pattern.test(url)) + return isDeepInfra ? "deepinfra" : "standard" + } + /** * Creates embeddings for the given texts with batching and rate limiting * @param texts Array of text strings to embed @@ -204,6 +221,9 @@ export class OpenAICompatibleEmbedder implements IEmbedder { batchTexts: string[], model: string, ): Promise { + // Use appropriate encoding format based on provider + const encodingFormat = this.providerType === "deepinfra" ? "float" : "base64" + const response = await fetch(url, { method: "POST", headers: { @@ -216,7 +236,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder { body: JSON.stringify({ input: batchTexts, model: model, - encoding_format: "base64", + encoding_format: encodingFormat, }), }) @@ -259,6 +279,8 @@ export class OpenAICompatibleEmbedder implements IEmbedder { ): Promise<{ embeddings: number[][]; usage: { promptTokens: number; totalTokens: number } }> { // Use cached value for performance const isFullUrl = this.isFullUrl + // Use appropriate encoding format based on provider + const encodingFormat = this.providerType === "deepinfra" ? "float" : "base64" for (let attempts = 0; attempts < MAX_RETRIES; attempts++) { // Check global rate limit before attempting request @@ -272,19 +294,18 @@ export class OpenAICompatibleEmbedder implements IEmbedder { response = await this.makeDirectEmbeddingRequest(this.baseUrl, batchTexts, model) } else { // Use OpenAI SDK for base URLs + // DeepInfra requires 'float' encoding, others use 'base64' response = (await this.embeddingsClient.embeddings.create({ input: batchTexts, model: model, - // OpenAI package (as of v4.78.1) has a parsing issue that truncates embedding dimensions to 256 - // when processing numeric arrays, which breaks compatibility with models using larger dimensions. - // By requesting base64 encoding, we bypass the package's parser and handle decoding ourselves. - encoding_format: "base64", + encoding_format: encodingFormat as any, })) as OpenAIEmbeddingResponse } - // Convert base64 embeddings to float32 arrays + // Process embeddings based on response format const processedEmbeddings = response.data.map((item: EmbeddingItem) => { if (typeof item.embedding === "string") { + // Base64 encoded response (standard OpenAI-compatible) const buffer = Buffer.from(item.embedding, "base64") // Create Float32Array view over the buffer @@ -294,7 +315,26 @@ export class OpenAICompatibleEmbedder implements IEmbedder { ...item, embedding: Array.from(float32Array), } + } else if (Array.isArray(item.embedding)) { + // Float array response (DeepInfra) + // Ensure all values are valid numbers + const cleanedEmbedding = item.embedding.map((v: any) => { + const num = typeof v === "number" ? v : Number(v) + if (!isFinite(num)) { + console.error( + `[OpenAICompatibleEmbedder] WARNING: Invalid embedding value detected: ${v}`, + ) + return 0 // Replace invalid values with 0 + } + return num + }) + return { + ...item, + embedding: cleanedEmbedding, + } } + // Fallback for unexpected formats + console.error(`[OpenAICompatibleEmbedder] Unexpected embedding format: ${typeof item.embedding}`) return item }) @@ -366,6 +406,8 @@ export class OpenAICompatibleEmbedder implements IEmbedder { // Test with a minimal embedding request const testTexts = ["test"] const modelToUse = this.defaultModelId + // Use appropriate encoding format based on provider + const encodingFormat = this.providerType === "deepinfra" ? "float" : "base64" let response: OpenAIEmbeddingResponse @@ -377,7 +419,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder { response = (await this.embeddingsClient.embeddings.create({ input: testTexts, model: modelToUse, - encoding_format: "base64", + encoding_format: encodingFormat as any, })) as OpenAIEmbeddingResponse } diff --git a/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts b/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts index ab7b15783e3a..11203f5e05e8 100644 --- a/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts +++ b/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts @@ -50,7 +50,7 @@ describe("QdrantVectorStore", () => { const mockWorkspacePath = "/test/workspace" const mockQdrantUrl = "http://mock-qdrant:6333" const mockApiKey = "test-api-key" - const mockVectorSize = 1536 + const mockVectorSize = 3 const mockHashedPath = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" // Needs to be long enough const expectedCollectionName = `ws-${mockHashedPath.substring(0, 16)}` diff --git a/src/services/code-index/vector-store/qdrant-client.ts b/src/services/code-index/vector-store/qdrant-client.ts index ba62afc5f81f..12dcd46b2874 100644 --- a/src/services/code-index/vector-store/qdrant-client.ts +++ b/src/services/code-index/vector-store/qdrant-client.ts @@ -18,6 +18,7 @@ export class QdrantVectorStore implements IVectorStore { private readonly collectionName: string private readonly qdrantUrl: string = "http://localhost:6333" private readonly workspacePath: string + private vectorName?: string // Track if collection uses named vectors /** * Creates a new Qdrant vector store @@ -153,6 +154,7 @@ export class QdrantVectorStore implements IVectorStore { if (collectionInfo === null) { // Collection info not retrieved (assume not found or inaccessible), create it + // Using unnamed vector configuration (default vector) await this.client.createCollection(this.collectionName, { vectors: { size: this.vectorSize, @@ -167,21 +169,53 @@ export class QdrantVectorStore implements IVectorStore { }) created = true } else { - // Collection exists, check vector size + // Collection exists, check vector configuration const vectorsConfig = collectionInfo.config?.params?.vectors - let existingVectorSize: number - - if (typeof vectorsConfig === "number") { + let existingVectorSize: number = 0 // Initialize with default value + let hasNamedVectors = false + let vectorNames: string[] = [] + + // Check if collection uses named vectors + if (typeof vectorsConfig === "object" && vectorsConfig && !Array.isArray(vectorsConfig)) { + // Check if this is a named vector configuration + const vectorKeys = Object.keys(vectorsConfig) + if (vectorKeys.length > 0 && !("size" in vectorsConfig)) { + // This is a named vector configuration + hasNamedVectors = true + vectorNames = vectorKeys + console.log(`[QdrantVectorStore] Collection uses named vectors: ${vectorNames.join(", ")}`) + + // Use the first vector name (or look for a specific one) + this.vectorName = vectorNames[0] + + // Get the size from the named vector + const namedVectorConfig = (vectorsConfig as any)[this.vectorName] + if (namedVectorConfig && typeof namedVectorConfig === "object" && "size" in namedVectorConfig) { + existingVectorSize = namedVectorConfig.size + console.log( + `[QdrantVectorStore] Using named vector '${this.vectorName}' with size ${existingVectorSize}`, + ) + } else { + console.warn( + `[QdrantVectorStore] Could not determine size for named vector '${this.vectorName}'`, + ) + existingVectorSize = 0 + } + } else if ("size" in vectorsConfig) { + // This is an unnamed vector with object configuration + existingVectorSize = (vectorsConfig as any).size + this.vectorName = undefined + } else { + // Unknown object configuration + existingVectorSize = 0 + } + } else if (typeof vectorsConfig === "number") { existingVectorSize = vectorsConfig - } else if ( - vectorsConfig && - typeof vectorsConfig === "object" && - "size" in vectorsConfig && - typeof vectorsConfig.size === "number" - ) { - existingVectorSize = vectorsConfig.size + this.vectorName = undefined } else { - existingVectorSize = 0 // Fallback for unknown configuration + // Fallback for unknown configuration + existingVectorSize = 0 + this.vectorName = undefined } if (existingVectorSize === this.vectorSize) { @@ -344,6 +378,9 @@ export class QdrantVectorStore implements IVectorStore { ): Promise { try { const processedPoints = points.map((point) => { + // Handle named vs unnamed vectors + const vector = this.vectorName ? { [this.vectorName]: point.vector } : point.vector + if (point.payload?.filePath) { const segments = point.payload.filePath.split(path.sep).filter(Boolean) const pathSegments = segments.reduce( @@ -354,14 +391,19 @@ export class QdrantVectorStore implements IVectorStore { {}, ) return { - ...point, + id: point.id, + vector: vector, payload: { ...point.payload, pathSegments, }, } } - return point + return { + id: point.id, + vector: vector, + payload: point.payload, + } }) await this.client.upsert(this.collectionName, { @@ -403,6 +445,13 @@ export class QdrantVectorStore implements IVectorStore { maxResults?: number, ): Promise { try { + // Validate query vector dimension + if (queryVector.length !== this.vectorSize) { + const errorMsg = `[QdrantVectorStore] Query vector dimension mismatch. Expected ${this.vectorSize}, got ${queryVector.length}. This usually happens when switching between embedding models with different dimensions. Please reindex your codebase.` + console.error(errorMsg) + throw new Error(errorMsg) + } + let filter: | { must: Array<{ key: string; match: { value: string } }> @@ -443,8 +492,8 @@ export class QdrantVectorStore implements IVectorStore { ? { ...filter, must_not: [...(filter.must_not || []), ...metadataExclusion.must_not] } : metadataExclusion - const searchRequest = { - query: queryVector, + // Build search request based on whether collection uses named vectors + const searchRequest: any = { filter: mergedFilter, score_threshold: minScore ?? DEFAULT_SEARCH_MIN_SCORE, limit: maxResults ?? DEFAULT_MAX_SEARCH_RESULTS, @@ -457,12 +506,38 @@ export class QdrantVectorStore implements IVectorStore { }, } + // Add query vector with appropriate format + if (this.vectorName) { + // Use named vector format + searchRequest.query = { + [this.vectorName]: queryVector, + } + } else { + // Use unnamed vector format + searchRequest.query = queryVector + } + + // Validate vector for invalid values + const hasInvalidValues = queryVector.some((v) => !isFinite(v)) + if (hasInvalidValues) { + console.error(`[QdrantVectorStore] WARNING: Query vector contains invalid values (NaN or Infinity)`) + const invalidIndices = queryVector.map((v, i) => (!isFinite(v) ? i : -1)).filter((i) => i >= 0) + console.error( + `[QdrantVectorStore] Invalid value indices: ${invalidIndices.slice(0, 10).join(", ")}${invalidIndices.length > 10 ? "..." : ""}`, + ) + } + const operationResult = await this.client.query(this.collectionName, searchRequest) const filteredPoints = operationResult.points.filter((p) => this.isPayloadValid(p.payload)) return filteredPoints as VectorStoreSearchResult[] - } catch (error) { + } catch (error: any) { console.error("Failed to search points:", error) + + // Extract error details for better debugging + if (error?.status === 400 && error.data) { + console.error("[QdrantVectorStore] Qdrant error details:", JSON.stringify(error.data)) + } throw error } } diff --git a/src/shared/embeddingModels.ts b/src/shared/embeddingModels.ts index 14015c80dbf7..986069d8de66 100644 --- a/src/shared/embeddingModels.ts +++ b/src/shared/embeddingModels.ts @@ -52,6 +52,14 @@ export const EMBEDDING_MODEL_PROFILES: EmbeddingModelProfiles = { scoreThreshold: 0.15, queryPrefix: "Represent this query for searching relevant code: ", }, + // DeepInfra models + "Qwen/Qwen3-Embedding-0.6B": { dimension: 1024, scoreThreshold: 0.4 }, + "Qwen/Qwen3-Embedding-4B": { dimension: 2560, scoreThreshold: 0.4 }, + "Qwen/Qwen3-Embedding-8B": { dimension: 4096, scoreThreshold: 0.4 }, + "intfloat/multilingual-e5-large-instruct": { dimension: 1024, scoreThreshold: 0.4 }, + "google/embeddinggemma-300m": { dimension: 768, scoreThreshold: 0.4 }, + "BAAI/bge-m3": { dimension: 1024, scoreThreshold: 0.4 }, + "BAAI/bge-large-en-v1.5": { dimension: 1024, scoreThreshold: 0.4 }, }, gemini: { "text-embedding-004": { dimension: 768 },