Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -1082,4 +1082,186 @@ describe("OpenAICompatibleEmbedder", () => {
expect(result.error).toBe("embeddings:validation.configurationError")
})
})

describe("DeepInfra provider detection and handling", () => {
it("should detect DeepInfra URLs with deepinfra.com domain", () => {
const embedder = new OpenAICompatibleEmbedder(
"https://api.deepinfra.com/v1/openai",
testApiKey,
"Qwen/Qwen3-Embedding-0.6B",
)

// Check the provider type is correctly detected
expect(embedder["providerType"]).toBe("deepinfra")
})

it("should detect DeepInfra URLs with deepinfra.ai domain", () => {
const embedder = new OpenAICompatibleEmbedder(
"https://api.deepinfra.ai/v1/openai",
testApiKey,
"Qwen/Qwen3-Embedding-0.6B",
)

// Check the provider type is correctly detected
expect(embedder["providerType"]).toBe("deepinfra")
})

it("should detect standard providers for non-DeepInfra URLs", () => {
const embedder = new OpenAICompatibleEmbedder("https://api.openai.com/v1", testApiKey, testModelId)

// Check the provider type is correctly detected
expect(embedder["providerType"]).toBe("standard")
})

it("should send float encoding format for DeepInfra", async () => {
const embedder = new OpenAICompatibleEmbedder(
"https://api.deepinfra.com/v1/openai",
testApiKey,
"Qwen/Qwen3-Embedding-0.6B",
)

// Mock response with float array
const mockResponse = {
data: [{ embedding: [0.1, 0.2, 0.3] }],
usage: { prompt_tokens: 10, total_tokens: 15 },
}
mockEmbeddingsCreate.mockResolvedValue(mockResponse)

await embedder.createEmbeddings(["test text"])

// Verify that 'float' encoding format was used
expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
input: ["test text"],
model: "Qwen/Qwen3-Embedding-0.6B",
encoding_format: "float",
})
})

it("should send base64 encoding format for standard providers", async () => {
const embedder = new OpenAICompatibleEmbedder("https://api.openai.com/v1", testApiKey, testModelId)

// Mock response with base64 string
const testEmbedding = new Float32Array([0.1, 0.2, 0.3])
const base64String = Buffer.from(testEmbedding.buffer).toString("base64")
const mockResponse = {
data: [{ embedding: base64String }],
usage: { prompt_tokens: 10, total_tokens: 15 },
}
mockEmbeddingsCreate.mockResolvedValue(mockResponse)

await embedder.createEmbeddings(["test text"])

// Verify that 'base64' encoding format was used
expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
input: ["test text"],
model: testModelId,
encoding_format: "base64",
})
})

it("should handle float array responses from DeepInfra", async () => {
const embedder = new OpenAICompatibleEmbedder(
"https://api.deepinfra.com/v1/openai",
testApiKey,
"Qwen/Qwen3-Embedding-0.6B",
)

// Mock response with float array (DeepInfra format)
const mockResponse = {
data: [{ embedding: [0.1, 0.2, 0.3] }, { embedding: [0.4, 0.5, 0.6] }],
usage: { prompt_tokens: 20, total_tokens: 25 },
}
mockEmbeddingsCreate.mockResolvedValue(mockResponse)

const result = await embedder.createEmbeddings(["text1", "text2"])

// Verify the embeddings are correctly processed
expect(result.embeddings).toEqual([
[0.1, 0.2, 0.3],
[0.4, 0.5, 0.6],
])
expect(result.usage).toEqual({
promptTokens: 20,
totalTokens: 25,
})
})

it("should handle base64 responses from standard providers", async () => {
const embedder = new OpenAICompatibleEmbedder("https://api.openai.com/v1", testApiKey, testModelId)

// Create base64 encoded embeddings
const embedding1 = new Float32Array([0.1, 0.2, 0.3])
const embedding2 = new Float32Array([0.4, 0.5, 0.6])
const base64String1 = Buffer.from(embedding1.buffer).toString("base64")
const base64String2 = Buffer.from(embedding2.buffer).toString("base64")

const mockResponse = {
data: [{ embedding: base64String1 }, { embedding: base64String2 }],
usage: { prompt_tokens: 20, total_tokens: 25 },
}
mockEmbeddingsCreate.mockResolvedValue(mockResponse)

const result = await embedder.createEmbeddings(["text1", "text2"])

// Verify the embeddings are correctly decoded from base64
expect(result.embeddings[0][0]).toBeCloseTo(0.1, 5)
expect(result.embeddings[0][1]).toBeCloseTo(0.2, 5)
expect(result.embeddings[0][2]).toBeCloseTo(0.3, 5)
expect(result.embeddings[1][0]).toBeCloseTo(0.4, 5)
expect(result.embeddings[1][1]).toBeCloseTo(0.5, 5)
expect(result.embeddings[1][2]).toBeCloseTo(0.6, 5)
expect(result.usage).toEqual({
promptTokens: 20,
totalTokens: 25,
})
})

it("should validate DeepInfra configuration with float format", async () => {
const embedder = new OpenAICompatibleEmbedder(
"https://api.deepinfra.com/v1/openai",
testApiKey,
"Qwen/Qwen3-Embedding-0.6B",
)

const mockResponse = {
data: [{ embedding: [0.1, 0.2, 0.3] }],
usage: { prompt_tokens: 2, total_tokens: 2 },
}
mockEmbeddingsCreate.mockResolvedValue(mockResponse)

const result = await embedder.validateConfiguration()

expect(result.valid).toBe(true)
expect(result.error).toBeUndefined()
expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
input: ["test"],
model: "Qwen/Qwen3-Embedding-0.6B",
encoding_format: "float",
})
})

it("should use float format for DeepInfra with full endpoint URLs", async () => {
const fullUrl = "https://api.deepinfra.com/v1/openai/embeddings"
const embedder = new OpenAICompatibleEmbedder(fullUrl, testApiKey, "Qwen/Qwen3-Embedding-0.6B")

global.fetch = vitest.fn().mockResolvedValueOnce({
ok: true,
status: 200,
json: async () => ({
data: [{ embedding: [0.1, 0.2, 0.3] }],
usage: { prompt_tokens: 10, total_tokens: 15 },
}),
} as any)

await embedder.createEmbeddings(["test"])

// Verify the request body contains float encoding format
expect(global.fetch).toHaveBeenCalledWith(
fullUrl,
expect.objectContaining({
body: expect.stringContaining('"encoding_format":"float"'),
}),
)
})
})
})
56 changes: 49 additions & 7 deletions src/services/code-index/embedders/openai-compatible.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
private readonly apiKey: string
private readonly isFullUrl: boolean
private readonly maxItemTokens: number
private readonly providerType: "deepinfra" | "standard"

// Global rate limiting state shared across all instances
private static globalRateLimitState = {
Expand Down Expand Up @@ -82,9 +83,25 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
this.defaultModelId = modelId || getDefaultModelId("openai-compatible")
// Cache the URL type check for performance
this.isFullUrl = this.isFullEndpointUrl(baseUrl)
// Cache the provider type detection for performance
this.providerType = this.detectProviderType(baseUrl)
this.maxItemTokens = maxItemTokens || MAX_ITEM_TOKENS
}

/**
* Detects the provider type based on the URL pattern.
* DeepInfra requires 'float' encoding format while others use 'base64'.
* @param url The API URL to analyze
* @returns 'deepinfra' for DeepInfra endpoints, 'standard' for others
*/
private detectProviderType(url: string): "deepinfra" | "standard" {
// DeepInfra URLs contain 'deepinfra.com' or 'deepinfra.ai'
const deepInfraPatterns = [/deepinfra\.com/i, /deepinfra\.ai/i]

const isDeepInfra = deepInfraPatterns.some((pattern) => pattern.test(url))
return isDeepInfra ? "deepinfra" : "standard"
}

/**
* Creates embeddings for the given texts with batching and rate limiting
* @param texts Array of text strings to embed
Expand Down Expand Up @@ -204,6 +221,9 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
batchTexts: string[],
model: string,
): Promise<OpenAIEmbeddingResponse> {
// Use appropriate encoding format based on provider
const encodingFormat = this.providerType === "deepinfra" ? "float" : "base64"

const response = await fetch(url, {
method: "POST",
headers: {
Expand All @@ -216,7 +236,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
body: JSON.stringify({
input: batchTexts,
model: model,
encoding_format: "base64",
encoding_format: encodingFormat,
}),
})

Expand Down Expand Up @@ -259,6 +279,8 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
): Promise<{ embeddings: number[][]; usage: { promptTokens: number; totalTokens: number } }> {
// Use cached value for performance
const isFullUrl = this.isFullUrl
// Use appropriate encoding format based on provider
const encodingFormat = this.providerType === "deepinfra" ? "float" : "base64"

for (let attempts = 0; attempts < MAX_RETRIES; attempts++) {
// Check global rate limit before attempting request
Expand All @@ -272,19 +294,18 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
response = await this.makeDirectEmbeddingRequest(this.baseUrl, batchTexts, model)
} else {
// Use OpenAI SDK for base URLs
// DeepInfra requires 'float' encoding, others use 'base64'
response = (await this.embeddingsClient.embeddings.create({
input: batchTexts,
model: model,
// OpenAI package (as of v4.78.1) has a parsing issue that truncates embedding dimensions to 256
// when processing numeric arrays, which breaks compatibility with models using larger dimensions.
// By requesting base64 encoding, we bypass the package's parser and handle decoding ourselves.
encoding_format: "base64",
encoding_format: encodingFormat as any,
})) as OpenAIEmbeddingResponse
}

// Convert base64 embeddings to float32 arrays
// Process embeddings based on response format
const processedEmbeddings = response.data.map((item: EmbeddingItem) => {
if (typeof item.embedding === "string") {
// Base64 encoded response (standard OpenAI-compatible)
const buffer = Buffer.from(item.embedding, "base64")

// Create Float32Array view over the buffer
Expand All @@ -294,7 +315,26 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
...item,
embedding: Array.from(float32Array),
}
} else if (Array.isArray(item.embedding)) {
// Float array response (DeepInfra)
// Ensure all values are valid numbers
const cleanedEmbedding = item.embedding.map((v: any) => {
const num = typeof v === "number" ? v : Number(v)
if (!isFinite(num)) {
console.error(
`[OpenAICompatibleEmbedder] WARNING: Invalid embedding value detected: ${v}`,
)
return 0 // Replace invalid values with 0
}
return num
})
return {
...item,
embedding: cleanedEmbedding,
}
}
// Fallback for unexpected formats
console.error(`[OpenAICompatibleEmbedder] Unexpected embedding format: ${typeof item.embedding}`)
return item
})

Expand Down Expand Up @@ -366,6 +406,8 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
// Test with a minimal embedding request
const testTexts = ["test"]
const modelToUse = this.defaultModelId
// Use appropriate encoding format based on provider
const encodingFormat = this.providerType === "deepinfra" ? "float" : "base64"

let response: OpenAIEmbeddingResponse

Expand All @@ -377,7 +419,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
response = (await this.embeddingsClient.embeddings.create({
input: testTexts,
model: modelToUse,
encoding_format: "base64",
encoding_format: encodingFormat as any,
})) as OpenAIEmbeddingResponse
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ describe("QdrantVectorStore", () => {
const mockWorkspacePath = "/test/workspace"
const mockQdrantUrl = "http://mock-qdrant:6333"
const mockApiKey = "test-api-key"
const mockVectorSize = 1536
const mockVectorSize = 3
const mockHashedPath = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" // Needs to be long enough
const expectedCollectionName = `ws-${mockHashedPath.substring(0, 16)}`

Expand Down
Loading