Skip to content
7 changes: 6 additions & 1 deletion src/services/code-index/constants/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ export const MAX_PENDING_BATCHES = 20 // Maximum number of batches to accumulate

/**OpenAI Embedder */
export const MAX_BATCH_TOKENS = 100000
export const MAX_ITEM_TOKENS = 8191
// NOTE: Conservative limit of 512 tokens
// to ensure compatibility with all (most?) embedding models
// Many models (e5-large, bge-large-en-v1.5) only support 512 tokens max
// TODO: Consider implementing model-specific token limits in embeddingModels.ts?
// Some models support higher limits: Qwen3-Embedding (8192), embeddinggemma (2048)
export const MAX_ITEM_TOKENS = 511
export const BATCH_PROCESSING_CONCURRENCY = 10

/**Gemini Embedder */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1082,4 +1082,186 @@ describe("OpenAICompatibleEmbedder", () => {
expect(result.error).toBe("embeddings:validation.configurationError")
})
})

describe("DeepInfra provider detection and handling", () => {
it("should detect DeepInfra URLs with deepinfra.com domain", () => {
const embedder = new OpenAICompatibleEmbedder(
"https://api.deepinfra.com/v1/openai",
testApiKey,
"Qwen/Qwen3-Embedding-0.6B",
)

// Check the provider type is correctly detected
expect(embedder["providerType"]).toBe("deepinfra")
})

it("should detect DeepInfra URLs with deepinfra.ai domain", () => {
const embedder = new OpenAICompatibleEmbedder(
"https://api.deepinfra.ai/v1/openai",
testApiKey,
"Qwen/Qwen3-Embedding-0.6B",
)

// Check the provider type is correctly detected
expect(embedder["providerType"]).toBe("deepinfra")
})

it("should detect standard providers for non-DeepInfra URLs", () => {
const embedder = new OpenAICompatibleEmbedder("https://api.openai.com/v1", testApiKey, testModelId)

// Check the provider type is correctly detected
expect(embedder["providerType"]).toBe("standard")
})

it("should send float encoding format for DeepInfra", async () => {
const embedder = new OpenAICompatibleEmbedder(
"https://api.deepinfra.com/v1/openai",
testApiKey,
"Qwen/Qwen3-Embedding-0.6B",
)

// Mock response with float array
const mockResponse = {
data: [{ embedding: [0.1, 0.2, 0.3] }],
usage: { prompt_tokens: 10, total_tokens: 15 },
}
mockEmbeddingsCreate.mockResolvedValue(mockResponse)

await embedder.createEmbeddings(["test text"])

// Verify that 'float' encoding format was used
expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
input: ["test text"],
model: "Qwen/Qwen3-Embedding-0.6B",
encoding_format: "float",
})
})

it("should send base64 encoding format for standard providers", async () => {
const embedder = new OpenAICompatibleEmbedder("https://api.openai.com/v1", testApiKey, testModelId)

// Mock response with base64 string
const testEmbedding = new Float32Array([0.1, 0.2, 0.3])
const base64String = Buffer.from(testEmbedding.buffer).toString("base64")
const mockResponse = {
data: [{ embedding: base64String }],
usage: { prompt_tokens: 10, total_tokens: 15 },
}
mockEmbeddingsCreate.mockResolvedValue(mockResponse)

await embedder.createEmbeddings(["test text"])

// Verify that 'base64' encoding format was used
expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
input: ["test text"],
model: testModelId,
encoding_format: "base64",
})
})

it("should handle float array responses from DeepInfra", async () => {
const embedder = new OpenAICompatibleEmbedder(
"https://api.deepinfra.com/v1/openai",
testApiKey,
"Qwen/Qwen3-Embedding-0.6B",
)

// Mock response with float array (DeepInfra format)
const mockResponse = {
data: [{ embedding: [0.1, 0.2, 0.3] }, { embedding: [0.4, 0.5, 0.6] }],
usage: { prompt_tokens: 20, total_tokens: 25 },
}
mockEmbeddingsCreate.mockResolvedValue(mockResponse)

const result = await embedder.createEmbeddings(["text1", "text2"])

// Verify the embeddings are correctly processed
expect(result.embeddings).toEqual([
[0.1, 0.2, 0.3],
[0.4, 0.5, 0.6],
])
expect(result.usage).toEqual({
promptTokens: 20,
totalTokens: 25,
})
})

it("should handle base64 responses from standard providers", async () => {
const embedder = new OpenAICompatibleEmbedder("https://api.openai.com/v1", testApiKey, testModelId)

// Create base64 encoded embeddings
const embedding1 = new Float32Array([0.1, 0.2, 0.3])
const embedding2 = new Float32Array([0.4, 0.5, 0.6])
const base64String1 = Buffer.from(embedding1.buffer).toString("base64")
const base64String2 = Buffer.from(embedding2.buffer).toString("base64")

const mockResponse = {
data: [{ embedding: base64String1 }, { embedding: base64String2 }],
usage: { prompt_tokens: 20, total_tokens: 25 },
}
mockEmbeddingsCreate.mockResolvedValue(mockResponse)

const result = await embedder.createEmbeddings(["text1", "text2"])

// Verify the embeddings are correctly decoded from base64
expect(result.embeddings[0][0]).toBeCloseTo(0.1, 5)
expect(result.embeddings[0][1]).toBeCloseTo(0.2, 5)
expect(result.embeddings[0][2]).toBeCloseTo(0.3, 5)
expect(result.embeddings[1][0]).toBeCloseTo(0.4, 5)
expect(result.embeddings[1][1]).toBeCloseTo(0.5, 5)
expect(result.embeddings[1][2]).toBeCloseTo(0.6, 5)
expect(result.usage).toEqual({
promptTokens: 20,
totalTokens: 25,
})
})

it("should validate DeepInfra configuration with float format", async () => {
const embedder = new OpenAICompatibleEmbedder(
"https://api.deepinfra.com/v1/openai",
testApiKey,
"Qwen/Qwen3-Embedding-0.6B",
)

const mockResponse = {
data: [{ embedding: [0.1, 0.2, 0.3] }],
usage: { prompt_tokens: 2, total_tokens: 2 },
}
mockEmbeddingsCreate.mockResolvedValue(mockResponse)

const result = await embedder.validateConfiguration()

expect(result.valid).toBe(true)
expect(result.error).toBeUndefined()
expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
input: ["test"],
model: "Qwen/Qwen3-Embedding-0.6B",
encoding_format: "float",
})
})

it("should use float format for DeepInfra with full endpoint URLs", async () => {
const fullUrl = "https://api.deepinfra.com/v1/openai/embeddings"
const embedder = new OpenAICompatibleEmbedder(fullUrl, testApiKey, "Qwen/Qwen3-Embedding-0.6B")

global.fetch = vitest.fn().mockResolvedValueOnce({
ok: true,
status: 200,
json: async () => ({
data: [{ embedding: [0.1, 0.2, 0.3] }],
usage: { prompt_tokens: 10, total_tokens: 15 },
}),
} as any)

await embedder.createEmbeddings(["test"])

// Verify the request body contains float encoding format
expect(global.fetch).toHaveBeenCalledWith(
fullUrl,
expect.objectContaining({
body: expect.stringContaining('"encoding_format":"float"'),
}),
)
})
})
})
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The query prefix functionality added in this PR (lines 114-137) lacks test coverage. While DeepInfra provider detection and encoding format are tested, there are no tests verifying that getModelQueryPrefix() prefixes are actually being applied to queries. Consider adding tests that verify: (1) prefixes are correctly added for models that require them, (2) double-prefixing is prevented, and (3) texts that would exceed MAX_ITEM_TOKENS after prefixing are handled appropriately.

56 changes: 49 additions & 7 deletions src/services/code-index/embedders/openai-compatible.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
private readonly apiKey: string
private readonly isFullUrl: boolean
private readonly maxItemTokens: number
private readonly providerType: "deepinfra" | "standard"

// Global rate limiting state shared across all instances
private static globalRateLimitState = {
Expand Down Expand Up @@ -82,9 +83,25 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
this.defaultModelId = modelId || getDefaultModelId("openai-compatible")
// Cache the URL type check for performance
this.isFullUrl = this.isFullEndpointUrl(baseUrl)
// Cache the provider type detection for performance
this.providerType = this.detectProviderType(baseUrl)
this.maxItemTokens = maxItemTokens || MAX_ITEM_TOKENS
}

/**
* Detects the provider type based on the URL pattern.
* DeepInfra requires 'float' encoding format while others use 'base64'.
* @param url The API URL to analyze
* @returns 'deepinfra' for DeepInfra endpoints, 'standard' for others
*/
private detectProviderType(url: string): "deepinfra" | "standard" {
// DeepInfra URLs contain 'deepinfra.com' or 'deepinfra.ai'
const deepInfraPatterns = [/deepinfra\.com/i, /deepinfra\.ai/i]

const isDeepInfra = deepInfraPatterns.some((pattern) => pattern.test(url))
return isDeepInfra ? "deepinfra" : "standard"
}

/**
* Creates embeddings for the given texts with batching and rate limiting
* @param texts Array of text strings to embed
Expand Down Expand Up @@ -204,6 +221,9 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
batchTexts: string[],
model: string,
): Promise<OpenAIEmbeddingResponse> {
// Use appropriate encoding format based on provider
const encodingFormat = this.providerType === "deepinfra" ? "float" : "base64"

const response = await fetch(url, {
method: "POST",
headers: {
Expand All @@ -216,7 +236,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
body: JSON.stringify({
input: batchTexts,
model: model,
encoding_format: "base64",
encoding_format: encodingFormat,
}),
})

Expand Down Expand Up @@ -259,6 +279,8 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
): Promise<{ embeddings: number[][]; usage: { promptTokens: number; totalTokens: number } }> {
// Use cached value for performance
const isFullUrl = this.isFullUrl
// Use appropriate encoding format based on provider
const encodingFormat = this.providerType === "deepinfra" ? "float" : "base64"

for (let attempts = 0; attempts < MAX_RETRIES; attempts++) {
// Check global rate limit before attempting request
Expand All @@ -272,19 +294,18 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
response = await this.makeDirectEmbeddingRequest(this.baseUrl, batchTexts, model)
} else {
// Use OpenAI SDK for base URLs
// DeepInfra requires 'float' encoding, others use 'base64'
response = (await this.embeddingsClient.embeddings.create({
input: batchTexts,
model: model,
// OpenAI package (as of v4.78.1) has a parsing issue that truncates embedding dimensions to 256
// when processing numeric arrays, which breaks compatibility with models using larger dimensions.
// By requesting base64 encoding, we bypass the package's parser and handle decoding ourselves.
encoding_format: "base64",
encoding_format: encodingFormat as any,
})) as OpenAIEmbeddingResponse
}

// Convert base64 embeddings to float32 arrays
// Process embeddings based on response format
const processedEmbeddings = response.data.map((item: EmbeddingItem) => {
if (typeof item.embedding === "string") {
// Base64 encoded response (standard OpenAI-compatible)
const buffer = Buffer.from(item.embedding, "base64")

// Create Float32Array view over the buffer
Expand All @@ -294,7 +315,26 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
...item,
embedding: Array.from(float32Array),
}
} else if (Array.isArray(item.embedding)) {
// Float array response (DeepInfra)
// Ensure all values are valid numbers
const cleanedEmbedding = item.embedding.map((v: any) => {
const num = typeof v === "number" ? v : Number(v)
if (!isFinite(num)) {
console.error(
`[OpenAICompatibleEmbedder] WARNING: Invalid embedding value detected: ${v}`,
)
return 0 // Replace invalid values with 0
}
return num
})
return {
...item,
embedding: cleanedEmbedding,
}
}
// Fallback for unexpected formats
console.error(`[OpenAICompatibleEmbedder] Unexpected embedding format: ${typeof item.embedding}`)
return item
})

Expand Down Expand Up @@ -366,6 +406,8 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
// Test with a minimal embedding request
const testTexts = ["test"]
const modelToUse = this.defaultModelId
// Use appropriate encoding format based on provider
const encodingFormat = this.providerType === "deepinfra" ? "float" : "base64"

let response: OpenAIEmbeddingResponse

Expand All @@ -377,7 +419,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
response = (await this.embeddingsClient.embeddings.create({
input: testTexts,
model: modelToUse,
encoding_format: "base64",
encoding_format: encodingFormat as any,
})) as OpenAIEmbeddingResponse
}

Expand Down
Loading
Loading