RooCodeInc · zadzanl · Oct 31, 2025 · Nov 1, 2025 · Nov 1, 2025 · Nov 1, 2025
@@ -24,7 +24,12 @@ export const MAX_PENDING_BATCHES = 20 // Maximum number of batches to accumulate
 
 /**OpenAI Embedder */
 export const MAX_BATCH_TOKENS = 100000
-export const MAX_ITEM_TOKENS = 8191
+// NOTE: Conservative limit of 512 tokens
+// to ensure compatibility with all (most?) embedding models
+// Many models (e5-large, bge-large-en-v1.5) only support 512 tokens max
+// TODO: Consider implementing model-specific token limits in embeddingModels.ts?
+// Some models support higher limits: Qwen3-Embedding (8192), embeddinggemma (2048)
+export const MAX_ITEM_TOKENS = 511
 export const BATCH_PROCESSING_CONCURRENCY = 10
 
 /**Gemini Embedder */

@@ -1082,4 +1082,186 @@ describe("OpenAICompatibleEmbedder", () => {
 			expect(result.error).toBe("embeddings:validation.configurationError")
 		})
 	})
+
+	describe("DeepInfra provider detection and handling", () => {
+		it("should detect DeepInfra URLs with deepinfra.com domain", () => {
+			const embedder = new OpenAICompatibleEmbedder(
+				"https://api.deepinfra.com/v1/openai",
+				testApiKey,
+				"Qwen/Qwen3-Embedding-0.6B",
+			)
+
+			// Check the provider type is correctly detected
+			expect(embedder["providerType"]).toBe("deepinfra")
+		})
+
+		it("should detect DeepInfra URLs with deepinfra.ai domain", () => {
+			const embedder = new OpenAICompatibleEmbedder(
+				"https://api.deepinfra.ai/v1/openai",
+				testApiKey,
+				"Qwen/Qwen3-Embedding-0.6B",
+			)
+
+			// Check the provider type is correctly detected
+			expect(embedder["providerType"]).toBe("deepinfra")
+		})
+
+		it("should detect standard providers for non-DeepInfra URLs", () => {
+			const embedder = new OpenAICompatibleEmbedder("https://api.openai.com/v1", testApiKey, testModelId)
+
+			// Check the provider type is correctly detected
+			expect(embedder["providerType"]).toBe("standard")
+		})
+
+		it("should send float encoding format for DeepInfra", async () => {
+			const embedder = new OpenAICompatibleEmbedder(
+				"https://api.deepinfra.com/v1/openai",
+				testApiKey,
+				"Qwen/Qwen3-Embedding-0.6B",
+			)
+
+			// Mock response with float array
+			const mockResponse = {
+				data: [{ embedding: [0.1, 0.2, 0.3] }],
+				usage: { prompt_tokens: 10, total_tokens: 15 },
+			}
+			mockEmbeddingsCreate.mockResolvedValue(mockResponse)
+
+			await embedder.createEmbeddings(["test text"])
+
+			// Verify that 'float' encoding format was used
+			expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
+				input: ["test text"],
+				model: "Qwen/Qwen3-Embedding-0.6B",
+				encoding_format: "float",
+			})
+		})
+
+		it("should send base64 encoding format for standard providers", async () => {
+			const embedder = new OpenAICompatibleEmbedder("https://api.openai.com/v1", testApiKey, testModelId)
+
+			// Mock response with base64 string
+			const testEmbedding = new Float32Array([0.1, 0.2, 0.3])
+			const base64String = Buffer.from(testEmbedding.buffer).toString("base64")
+			const mockResponse = {
+				data: [{ embedding: base64String }],
+				usage: { prompt_tokens: 10, total_tokens: 15 },
+			}
+			mockEmbeddingsCreate.mockResolvedValue(mockResponse)
+
+			await embedder.createEmbeddings(["test text"])
+
+			// Verify that 'base64' encoding format was used
+			expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
+				input: ["test text"],
+				model: testModelId,
+				encoding_format: "base64",
+			})
+		})
+
+		it("should handle float array responses from DeepInfra", async () => {
+			const embedder = new OpenAICompatibleEmbedder(
+				"https://api.deepinfra.com/v1/openai",
+				testApiKey,
+				"Qwen/Qwen3-Embedding-0.6B",
+			)
+
+			// Mock response with float array (DeepInfra format)
+			const mockResponse = {
+				data: [{ embedding: [0.1, 0.2, 0.3] }, { embedding: [0.4, 0.5, 0.6] }],
+				usage: { prompt_tokens: 20, total_tokens: 25 },
+			}
+			mockEmbeddingsCreate.mockResolvedValue(mockResponse)
+
+			const result = await embedder.createEmbeddings(["text1", "text2"])
+
+			// Verify the embeddings are correctly processed
+			expect(result.embeddings).toEqual([
+				[0.1, 0.2, 0.3],
+				[0.4, 0.5, 0.6],
+			])
+			expect(result.usage).toEqual({
+				promptTokens: 20,
+				totalTokens: 25,
+			})
+		})
+
+		it("should handle base64 responses from standard providers", async () => {
+			const embedder = new OpenAICompatibleEmbedder("https://api.openai.com/v1", testApiKey, testModelId)
+
+			// Create base64 encoded embeddings
+			const embedding1 = new Float32Array([0.1, 0.2, 0.3])
+			const embedding2 = new Float32Array([0.4, 0.5, 0.6])
+			const base64String1 = Buffer.from(embedding1.buffer).toString("base64")
+			const base64String2 = Buffer.from(embedding2.buffer).toString("base64")
+
+			const mockResponse = {
+				data: [{ embedding: base64String1 }, { embedding: base64String2 }],
+				usage: { prompt_tokens: 20, total_tokens: 25 },
+			}
+			mockEmbeddingsCreate.mockResolvedValue(mockResponse)
+
+			const result = await embedder.createEmbeddings(["text1", "text2"])
+
+			// Verify the embeddings are correctly decoded from base64
+			expect(result.embeddings[0][0]).toBeCloseTo(0.1, 5)
+			expect(result.embeddings[0][1]).toBeCloseTo(0.2, 5)
+			expect(result.embeddings[0][2]).toBeCloseTo(0.3, 5)
+			expect(result.embeddings[1][0]).toBeCloseTo(0.4, 5)
+			expect(result.embeddings[1][1]).toBeCloseTo(0.5, 5)
+			expect(result.embeddings[1][2]).toBeCloseTo(0.6, 5)
+			expect(result.usage).toEqual({
+				promptTokens: 20,
+				totalTokens: 25,
+			})
+		})
+
+		it("should validate DeepInfra configuration with float format", async () => {
+			const embedder = new OpenAICompatibleEmbedder(
+				"https://api.deepinfra.com/v1/openai",
+				testApiKey,
+				"Qwen/Qwen3-Embedding-0.6B",
+			)
+
+			const mockResponse = {
+				data: [{ embedding: [0.1, 0.2, 0.3] }],
+				usage: { prompt_tokens: 2, total_tokens: 2 },
+			}
+			mockEmbeddingsCreate.mockResolvedValue(mockResponse)
+
+			const result = await embedder.validateConfiguration()
+
+			expect(result.valid).toBe(true)
+			expect(result.error).toBeUndefined()
+			expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
+				input: ["test"],
+				model: "Qwen/Qwen3-Embedding-0.6B",
+				encoding_format: "float",
+			})
+		})
+
+		it("should use float format for DeepInfra with full endpoint URLs", async () => {
+			const fullUrl = "https://api.deepinfra.com/v1/openai/embeddings"
+			const embedder = new OpenAICompatibleEmbedder(fullUrl, testApiKey, "Qwen/Qwen3-Embedding-0.6B")
+
+			global.fetch = vitest.fn().mockResolvedValueOnce({
+				ok: true,
+				status: 200,
+				json: async () => ({
+					data: [{ embedding: [0.1, 0.2, 0.3] }],
+					usage: { prompt_tokens: 10, total_tokens: 15 },
+				}),
+			} as any)
+
+			await embedder.createEmbeddings(["test"])
+
+			// Verify the request body contains float encoding format
+			expect(global.fetch).toHaveBeenCalledWith(
+				fullUrl,
+				expect.objectContaining({
+					body: expect.stringContaining('"encoding_format":"float"'),
+				}),
+			)
+		})
+	})
 })
@@ -39,6 +39,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
 	private readonly apiKey: string
 	private readonly isFullUrl: boolean
 	private readonly maxItemTokens: number
+	private readonly providerType: "deepinfra" | "standard"
 
 	// Global rate limiting state shared across all instances
 	private static globalRateLimitState = {
@@ -82,9 +83,25 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
 		this.defaultModelId = modelId || getDefaultModelId("openai-compatible")
 		// Cache the URL type check for performance
 		this.isFullUrl = this.isFullEndpointUrl(baseUrl)
+		// Cache the provider type detection for performance
+		this.providerType = this.detectProviderType(baseUrl)
 		this.maxItemTokens = maxItemTokens || MAX_ITEM_TOKENS
 	}
 
+	/**
+	 * Detects the provider type based on the URL pattern.
+	 * DeepInfra requires 'float' encoding format while others use 'base64'.
+	 * @param url The API URL to analyze
+	 * @returns 'deepinfra' for DeepInfra endpoints, 'standard' for others
+	 */
+	private detectProviderType(url: string): "deepinfra" | "standard" {
+		// DeepInfra URLs contain 'deepinfra.com' or 'deepinfra.ai'
+		const deepInfraPatterns = [/deepinfra\.com/i, /deepinfra\.ai/i]
+
+		const isDeepInfra = deepInfraPatterns.some((pattern) => pattern.test(url))
+		return isDeepInfra ? "deepinfra" : "standard"
+	}
+
 	/**
 	 * Creates embeddings for the given texts with batching and rate limiting
 	 * @param texts Array of text strings to embed
@@ -204,6 +221,9 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
 		batchTexts: string[],
 		model: string,
 	): Promise<OpenAIEmbeddingResponse> {
+		// Use appropriate encoding format based on provider
+		const encodingFormat = this.providerType === "deepinfra" ? "float" : "base64"
+
 		const response = await fetch(url, {
 			method: "POST",
 			headers: {
@@ -216,7 +236,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
 			body: JSON.stringify({
 				input: batchTexts,
 				model: model,
-				encoding_format: "base64",
+				encoding_format: encodingFormat,
 			}),
 		})
 
@@ -259,6 +279,8 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
 	): Promise<{ embeddings: number[][]; usage: { promptTokens: number; totalTokens: number } }> {
 		// Use cached value for performance
 		const isFullUrl = this.isFullUrl
+		// Use appropriate encoding format based on provider
+		const encodingFormat = this.providerType === "deepinfra" ? "float" : "base64"
 
 		for (let attempts = 0; attempts < MAX_RETRIES; attempts++) {
 			// Check global rate limit before attempting request
@@ -272,19 +294,18 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
 					response = await this.makeDirectEmbeddingRequest(this.baseUrl, batchTexts, model)
 				} else {
 					// Use OpenAI SDK for base URLs
+					// DeepInfra requires 'float' encoding, others use 'base64'
 					response = (await this.embeddingsClient.embeddings.create({
 						input: batchTexts,
 						model: model,
-						// OpenAI package (as of v4.78.1) has a parsing issue that truncates embedding dimensions to 256
-						// when processing numeric arrays, which breaks compatibility with models using larger dimensions.
-						// By requesting base64 encoding, we bypass the package's parser and handle decoding ourselves.
-						encoding_format: "base64",
+						encoding_format: encodingFormat as any,
 					})) as OpenAIEmbeddingResponse
 				}
 
-				// Convert base64 embeddings to float32 arrays
+				// Process embeddings based on response format
 				const processedEmbeddings = response.data.map((item: EmbeddingItem) => {
 					if (typeof item.embedding === "string") {
+						// Base64 encoded response (standard OpenAI-compatible)
 						const buffer = Buffer.from(item.embedding, "base64")
 
 						// Create Float32Array view over the buffer
@@ -294,7 +315,26 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
 							...item,
 							embedding: Array.from(float32Array),
 						}
+					} else if (Array.isArray(item.embedding)) {
+						// Float array response (DeepInfra)
+						// Ensure all values are valid numbers
+						const cleanedEmbedding = item.embedding.map((v: any) => {
+							const num = typeof v === "number" ? v : Number(v)
+							if (!isFinite(num)) {
+								console.error(
+									`[OpenAICompatibleEmbedder] WARNING: Invalid embedding value detected: ${v}`,
+								)
+								return 0 // Replace invalid values with 0
+							}
+							return num
+						})
+						return {
+							...item,
+							embedding: cleanedEmbedding,
+						}
 					}
+					// Fallback for unexpected formats
+					console.error(`[OpenAICompatibleEmbedder] Unexpected embedding format: ${typeof item.embedding}`)
 					return item
 				})
 
@@ -366,6 +406,8 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
 				// Test with a minimal embedding request
 				const testTexts = ["test"]
 				const modelToUse = this.defaultModelId
+				// Use appropriate encoding format based on provider
+				const encodingFormat = this.providerType === "deepinfra" ? "float" : "base64"
 
 				let response: OpenAIEmbeddingResponse
 
@@ -377,7 +419,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
 					response = (await this.embeddingsClient.embeddings.create({
 						input: testTexts,
 						model: modelToUse,
-						encoding_format: "base64",
+						encoding_format: encodingFormat as any,
 					})) as OpenAIEmbeddingResponse
 				}