Skip to content

Commit e1d4f7b

Browse files
committed
feat: add DeepInfra embedding support and fix missing Qdrant type field index
Added support for DeepInfra-hosted embedding models and fix a critical bug where the 'type' field index was missing in Qdrant, causing "Bad Request" errors during code search operations. Changes: - Added DeepInfra provider detection in OpenAICompatibleEmbedder * Detect DeepInfra URLs (deepinfra.com) * Use 'float' encoding format for DeepInfra, 'base64' for other standard providers * Handle both float array and base64 string embedding responses * Added validation for embedding values (NaN/Infinity checking) - Fix missing Qdrant payload index for 'type' field * Non-existing `type` field causes "Bad Request" during `codebase_search` tool invocation * Create keyword index for 'type' field to support metadata filtering * Resolves "Index required but not found for 'type' field" error - Added 7 DeepInfra embedding model profiles: * Qwen/Qwen3-Embedding-0.6B (1024 dims) * Qwen/Qwen3-Embedding-4B (2560 dims) * Qwen/Qwen3-Embedding-8B (4096 dims) * intfloat/multilingual-e5-large-instruct (1024 dims) * google/embeddinggemma-300m (768 dims) * BAAI/bge-m3 (1024 dims) * BAAI/bge-large-en-v1.5 (1024 dims) - Added some test coverage for DeepInfra * Provider validation * Encoding format tests * Float array and base64 response handling tests * Configuration validation tests Tested with: embeddinggemma-300m, text-embedding-004, multilingual-e5-large
1 parent 2e32a59 commit e1d4f7b

File tree

4 files changed

+348
-24
lines changed

4 files changed

+348
-24
lines changed

src/services/code-index/embedders/__tests__/openai-compatible.spec.ts

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1082,4 +1082,186 @@ describe("OpenAICompatibleEmbedder", () => {
10821082
expect(result.error).toBe("embeddings:validation.configurationError")
10831083
})
10841084
})
1085+
1086+
describe("DeepInfra provider detection and handling", () => {
1087+
it("should detect DeepInfra URLs with deepinfra.com domain", () => {
1088+
const embedder = new OpenAICompatibleEmbedder(
1089+
"https://api.deepinfra.com/v1/openai",
1090+
testApiKey,
1091+
"Qwen/Qwen3-Embedding-0.6B",
1092+
)
1093+
1094+
// Check the provider type is correctly detected
1095+
expect(embedder["providerType"]).toBe("deepinfra")
1096+
})
1097+
1098+
it("should detect DeepInfra URLs with deepinfra.ai domain", () => {
1099+
const embedder = new OpenAICompatibleEmbedder(
1100+
"https://api.deepinfra.ai/v1/openai",
1101+
testApiKey,
1102+
"Qwen/Qwen3-Embedding-0.6B",
1103+
)
1104+
1105+
// Check the provider type is correctly detected
1106+
expect(embedder["providerType"]).toBe("deepinfra")
1107+
})
1108+
1109+
it("should detect standard providers for non-DeepInfra URLs", () => {
1110+
const embedder = new OpenAICompatibleEmbedder("https://api.openai.com/v1", testApiKey, testModelId)
1111+
1112+
// Check the provider type is correctly detected
1113+
expect(embedder["providerType"]).toBe("standard")
1114+
})
1115+
1116+
it("should send float encoding format for DeepInfra", async () => {
1117+
const embedder = new OpenAICompatibleEmbedder(
1118+
"https://api.deepinfra.com/v1/openai",
1119+
testApiKey,
1120+
"Qwen/Qwen3-Embedding-0.6B",
1121+
)
1122+
1123+
// Mock response with float array
1124+
const mockResponse = {
1125+
data: [{ embedding: [0.1, 0.2, 0.3] }],
1126+
usage: { prompt_tokens: 10, total_tokens: 15 },
1127+
}
1128+
mockEmbeddingsCreate.mockResolvedValue(mockResponse)
1129+
1130+
await embedder.createEmbeddings(["test text"])
1131+
1132+
// Verify that 'float' encoding format was used
1133+
expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
1134+
input: ["test text"],
1135+
model: "Qwen/Qwen3-Embedding-0.6B",
1136+
encoding_format: "float",
1137+
})
1138+
})
1139+
1140+
it("should send base64 encoding format for standard providers", async () => {
1141+
const embedder = new OpenAICompatibleEmbedder("https://api.openai.com/v1", testApiKey, testModelId)
1142+
1143+
// Mock response with base64 string
1144+
const testEmbedding = new Float32Array([0.1, 0.2, 0.3])
1145+
const base64String = Buffer.from(testEmbedding.buffer).toString("base64")
1146+
const mockResponse = {
1147+
data: [{ embedding: base64String }],
1148+
usage: { prompt_tokens: 10, total_tokens: 15 },
1149+
}
1150+
mockEmbeddingsCreate.mockResolvedValue(mockResponse)
1151+
1152+
await embedder.createEmbeddings(["test text"])
1153+
1154+
// Verify that 'base64' encoding format was used
1155+
expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
1156+
input: ["test text"],
1157+
model: testModelId,
1158+
encoding_format: "base64",
1159+
})
1160+
})
1161+
1162+
it("should handle float array responses from DeepInfra", async () => {
1163+
const embedder = new OpenAICompatibleEmbedder(
1164+
"https://api.deepinfra.com/v1/openai",
1165+
testApiKey,
1166+
"Qwen/Qwen3-Embedding-0.6B",
1167+
)
1168+
1169+
// Mock response with float array (DeepInfra format)
1170+
const mockResponse = {
1171+
data: [{ embedding: [0.1, 0.2, 0.3] }, { embedding: [0.4, 0.5, 0.6] }],
1172+
usage: { prompt_tokens: 20, total_tokens: 25 },
1173+
}
1174+
mockEmbeddingsCreate.mockResolvedValue(mockResponse)
1175+
1176+
const result = await embedder.createEmbeddings(["text1", "text2"])
1177+
1178+
// Verify the embeddings are correctly processed
1179+
expect(result.embeddings).toEqual([
1180+
[0.1, 0.2, 0.3],
1181+
[0.4, 0.5, 0.6],
1182+
])
1183+
expect(result.usage).toEqual({
1184+
promptTokens: 20,
1185+
totalTokens: 25,
1186+
})
1187+
})
1188+
1189+
it("should handle base64 responses from standard providers", async () => {
1190+
const embedder = new OpenAICompatibleEmbedder("https://api.openai.com/v1", testApiKey, testModelId)
1191+
1192+
// Create base64 encoded embeddings
1193+
const embedding1 = new Float32Array([0.1, 0.2, 0.3])
1194+
const embedding2 = new Float32Array([0.4, 0.5, 0.6])
1195+
const base64String1 = Buffer.from(embedding1.buffer).toString("base64")
1196+
const base64String2 = Buffer.from(embedding2.buffer).toString("base64")
1197+
1198+
const mockResponse = {
1199+
data: [{ embedding: base64String1 }, { embedding: base64String2 }],
1200+
usage: { prompt_tokens: 20, total_tokens: 25 },
1201+
}
1202+
mockEmbeddingsCreate.mockResolvedValue(mockResponse)
1203+
1204+
const result = await embedder.createEmbeddings(["text1", "text2"])
1205+
1206+
// Verify the embeddings are correctly decoded from base64
1207+
expect(result.embeddings[0][0]).toBeCloseTo(0.1, 5)
1208+
expect(result.embeddings[0][1]).toBeCloseTo(0.2, 5)
1209+
expect(result.embeddings[0][2]).toBeCloseTo(0.3, 5)
1210+
expect(result.embeddings[1][0]).toBeCloseTo(0.4, 5)
1211+
expect(result.embeddings[1][1]).toBeCloseTo(0.5, 5)
1212+
expect(result.embeddings[1][2]).toBeCloseTo(0.6, 5)
1213+
expect(result.usage).toEqual({
1214+
promptTokens: 20,
1215+
totalTokens: 25,
1216+
})
1217+
})
1218+
1219+
it("should validate DeepInfra configuration with float format", async () => {
1220+
const embedder = new OpenAICompatibleEmbedder(
1221+
"https://api.deepinfra.com/v1/openai",
1222+
testApiKey,
1223+
"Qwen/Qwen3-Embedding-0.6B",
1224+
)
1225+
1226+
const mockResponse = {
1227+
data: [{ embedding: [0.1, 0.2, 0.3] }],
1228+
usage: { prompt_tokens: 2, total_tokens: 2 },
1229+
}
1230+
mockEmbeddingsCreate.mockResolvedValue(mockResponse)
1231+
1232+
const result = await embedder.validateConfiguration()
1233+
1234+
expect(result.valid).toBe(true)
1235+
expect(result.error).toBeUndefined()
1236+
expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
1237+
input: ["test"],
1238+
model: "Qwen/Qwen3-Embedding-0.6B",
1239+
encoding_format: "float",
1240+
})
1241+
})
1242+
1243+
it("should use float format for DeepInfra with full endpoint URLs", async () => {
1244+
const fullUrl = "https://api.deepinfra.com/v1/openai/embeddings"
1245+
const embedder = new OpenAICompatibleEmbedder(fullUrl, testApiKey, "Qwen/Qwen3-Embedding-0.6B")
1246+
1247+
global.fetch = vitest.fn().mockResolvedValueOnce({
1248+
ok: true,
1249+
status: 200,
1250+
json: async () => ({
1251+
data: [{ embedding: [0.1, 0.2, 0.3] }],
1252+
usage: { prompt_tokens: 10, total_tokens: 15 },
1253+
}),
1254+
} as any)
1255+
1256+
await embedder.createEmbeddings(["test"])
1257+
1258+
// Verify the request body contains float encoding format
1259+
expect(global.fetch).toHaveBeenCalledWith(
1260+
fullUrl,
1261+
expect.objectContaining({
1262+
body: expect.stringContaining('"encoding_format":"float"'),
1263+
}),
1264+
)
1265+
})
1266+
})
10851267
})

src/services/code-index/embedders/openai-compatible.ts

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
3939
private readonly apiKey: string
4040
private readonly isFullUrl: boolean
4141
private readonly maxItemTokens: number
42+
private readonly providerType: "deepinfra" | "standard"
4243

4344
// Global rate limiting state shared across all instances
4445
private static globalRateLimitState = {
@@ -82,9 +83,25 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
8283
this.defaultModelId = modelId || getDefaultModelId("openai-compatible")
8384
// Cache the URL type check for performance
8485
this.isFullUrl = this.isFullEndpointUrl(baseUrl)
86+
// Cache the provider type detection for performance
87+
this.providerType = this.detectProviderType(baseUrl)
8588
this.maxItemTokens = maxItemTokens || MAX_ITEM_TOKENS
8689
}
8790

91+
/**
92+
* Detects the provider type based on the URL pattern.
93+
* DeepInfra requires 'float' encoding format while others use 'base64'.
94+
* @param url The API URL to analyze
95+
* @returns 'deepinfra' for DeepInfra endpoints, 'standard' for others
96+
*/
97+
private detectProviderType(url: string): "deepinfra" | "standard" {
98+
// DeepInfra URLs contain 'deepinfra.com' or 'deepinfra.ai'
99+
const deepInfraPatterns = [/deepinfra\.com/i, /deepinfra\.ai/i]
100+
101+
const isDeepInfra = deepInfraPatterns.some((pattern) => pattern.test(url))
102+
return isDeepInfra ? "deepinfra" : "standard"
103+
}
104+
88105
/**
89106
* Creates embeddings for the given texts with batching and rate limiting
90107
* @param texts Array of text strings to embed
@@ -204,6 +221,9 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
204221
batchTexts: string[],
205222
model: string,
206223
): Promise<OpenAIEmbeddingResponse> {
224+
// Use appropriate encoding format based on provider
225+
const encodingFormat = this.providerType === "deepinfra" ? "float" : "base64"
226+
207227
const response = await fetch(url, {
208228
method: "POST",
209229
headers: {
@@ -216,7 +236,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
216236
body: JSON.stringify({
217237
input: batchTexts,
218238
model: model,
219-
encoding_format: "base64",
239+
encoding_format: encodingFormat,
220240
}),
221241
})
222242

@@ -259,6 +279,8 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
259279
): Promise<{ embeddings: number[][]; usage: { promptTokens: number; totalTokens: number } }> {
260280
// Use cached value for performance
261281
const isFullUrl = this.isFullUrl
282+
// Use appropriate encoding format based on provider
283+
const encodingFormat = this.providerType === "deepinfra" ? "float" : "base64"
262284

263285
for (let attempts = 0; attempts < MAX_RETRIES; attempts++) {
264286
// Check global rate limit before attempting request
@@ -272,19 +294,18 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
272294
response = await this.makeDirectEmbeddingRequest(this.baseUrl, batchTexts, model)
273295
} else {
274296
// Use OpenAI SDK for base URLs
297+
// DeepInfra requires 'float' encoding, others use 'base64'
275298
response = (await this.embeddingsClient.embeddings.create({
276299
input: batchTexts,
277300
model: model,
278-
// OpenAI package (as of v4.78.1) has a parsing issue that truncates embedding dimensions to 256
279-
// when processing numeric arrays, which breaks compatibility with models using larger dimensions.
280-
// By requesting base64 encoding, we bypass the package's parser and handle decoding ourselves.
281-
encoding_format: "base64",
301+
encoding_format: encodingFormat as any,
282302
})) as OpenAIEmbeddingResponse
283303
}
284304

285-
// Convert base64 embeddings to float32 arrays
305+
// Process embeddings based on response format
286306
const processedEmbeddings = response.data.map((item: EmbeddingItem) => {
287307
if (typeof item.embedding === "string") {
308+
// Base64 encoded response (standard OpenAI-compatible)
288309
const buffer = Buffer.from(item.embedding, "base64")
289310

290311
// Create Float32Array view over the buffer
@@ -294,7 +315,26 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
294315
...item,
295316
embedding: Array.from(float32Array),
296317
}
318+
} else if (Array.isArray(item.embedding)) {
319+
// Float array response (DeepInfra)
320+
// Ensure all values are valid numbers
321+
const cleanedEmbedding = item.embedding.map((v: any) => {
322+
const num = typeof v === "number" ? v : Number(v)
323+
if (!isFinite(num)) {
324+
console.error(
325+
`[OpenAICompatibleEmbedder] WARNING: Invalid embedding value detected: ${v}`,
326+
)
327+
return 0 // Replace invalid values with 0
328+
}
329+
return num
330+
})
331+
return {
332+
...item,
333+
embedding: cleanedEmbedding,
334+
}
297335
}
336+
// Fallback for unexpected formats
337+
console.error(`[OpenAICompatibleEmbedder] Unexpected embedding format: ${typeof item.embedding}`)
298338
return item
299339
})
300340

@@ -366,6 +406,8 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
366406
// Test with a minimal embedding request
367407
const testTexts = ["test"]
368408
const modelToUse = this.defaultModelId
409+
// Use appropriate encoding format based on provider
410+
const encodingFormat = this.providerType === "deepinfra" ? "float" : "base64"
369411

370412
let response: OpenAIEmbeddingResponse
371413

@@ -377,7 +419,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
377419
response = (await this.embeddingsClient.embeddings.create({
378420
input: testTexts,
379421
model: modelToUse,
380-
encoding_format: "base64",
422+
encoding_format: encodingFormat as any,
381423
})) as OpenAIEmbeddingResponse
382424
}
383425

0 commit comments

Comments
 (0)