diff --git a/src/services/code-index/__tests__/service-factory.spec.ts b/src/services/code-index/__tests__/service-factory.spec.ts index 1d8f7ba478..6fb14c3c9e 100644 --- a/src/services/code-index/__tests__/service-factory.spec.ts +++ b/src/services/code-index/__tests__/service-factory.spec.ts @@ -17,6 +17,7 @@ vitest.mock("../vector-store/qdrant-client") vitest.mock("../../../shared/embeddingModels", () => ({ getDefaultModelId: vitest.fn(), getModelDimension: vitest.fn(), + getModelMaxBatchSize: vitest.fn(), })) // Mock TelemetryService @@ -35,9 +36,10 @@ const MockedGeminiEmbedder = GeminiEmbedder as MockedClass // Import the mocked functions -import { getDefaultModelId, getModelDimension } from "../../../shared/embeddingModels" +import { getDefaultModelId, getModelDimension, getModelMaxBatchSize } from "../../../shared/embeddingModels" const mockGetDefaultModelId = getDefaultModelId as MockedFunction const mockGetModelDimension = getModelDimension as MockedFunction +const mockGetModelMaxBatchSize = getModelMaxBatchSize as MockedFunction describe("CodeIndexServiceFactory", () => { let factory: CodeIndexServiceFactory @@ -53,6 +55,9 @@ describe("CodeIndexServiceFactory", () => { mockCacheManager = {} + // Default mock for getModelMaxBatchSize + mockGetModelMaxBatchSize.mockReturnValue(undefined) + factory = new CodeIndexServiceFactory(mockConfigManager, "/test/workspace", mockCacheManager) }) @@ -194,6 +199,8 @@ describe("CodeIndexServiceFactory", () => { "https://api.example.com/v1", "test-api-key", testModelId, + undefined, + undefined, ) }) @@ -217,6 +224,8 @@ describe("CodeIndexServiceFactory", () => { "https://api.example.com/v1", "test-api-key", undefined, + undefined, + undefined, ) }) diff --git a/src/services/code-index/embedders/__tests__/openai-compatible-batch-limit.spec.ts b/src/services/code-index/embedders/__tests__/openai-compatible-batch-limit.spec.ts new file mode 100644 index 0000000000..8dabe7f989 --- /dev/null +++ b/src/services/code-index/embedders/__tests__/openai-compatible-batch-limit.spec.ts @@ -0,0 +1,281 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest" +import { OpenAICompatibleEmbedder } from "../openai-compatible" +import { OpenAI } from "openai" + +// Mock OpenAI +vi.mock("openai") + +// Mock the embeddingModels module +vi.mock("../../../../shared/embeddingModels", () => ({ + getDefaultModelId: vi.fn().mockReturnValue("text-embedding-3-small"), + getModelQueryPrefix: vi.fn().mockReturnValue(undefined), + getModelMaxBatchSize: vi.fn(), +})) + +// Mock the translation function +vi.mock("../../../../i18n", () => ({ + t: (key: string, params?: any) => { + const translations: Record = { + "embeddings:textExceedsTokenLimit": `Text at index ${params?.index} exceeds token limit`, + "embeddings:failedMaxAttempts": `Failed after ${params?.attempts} attempts`, + } + return translations[key] || key + }, +})) + +// Import mocked functions +import { getModelMaxBatchSize } from "../../../../shared/embeddingModels" +const mockGetModelMaxBatchSize = getModelMaxBatchSize as any + +describe("OpenAICompatibleEmbedder - Batch Size Limits", () => { + let mockOpenAIInstance: any + let mockEmbeddingsCreate: any + + const testBaseUrl = "https://api.example.com/v1" + const testApiKey = "test-api-key" + + beforeEach(() => { + // Reset all mocks + vi.clearAllMocks() + + // Setup OpenAI mock + mockEmbeddingsCreate = vi.fn() + mockOpenAIInstance = { + embeddings: { + create: mockEmbeddingsCreate, + }, + } + ;(OpenAI as any).mockImplementation(() => mockOpenAIInstance) + }) + + afterEach(() => { + vi.clearAllMocks() + }) + + describe("Model-specific batch size limits", () => { + it("should respect model-specific batch size limit from profile", async () => { + // Setup model with batch size limit of 10 + mockGetModelMaxBatchSize.mockReturnValue(10) + + const embedder = new OpenAICompatibleEmbedder(testBaseUrl, testApiKey, "qwen3-embedding") + + // Create 15 texts - should be split into 2 batches (10 + 5) + const texts = Array.from({ length: 15 }, (_, i) => `Text ${i}`) + + // Mock successful responses + mockEmbeddingsCreate + .mockResolvedValueOnce({ + data: Array.from({ length: 10 }, (_, i) => ({ + embedding: Buffer.from(new Float32Array([i, i + 0.1, i + 0.2]).buffer).toString("base64"), + })), + usage: { prompt_tokens: 100, total_tokens: 150 }, + }) + .mockResolvedValueOnce({ + data: Array.from({ length: 5 }, (_, i) => ({ + embedding: Buffer.from(new Float32Array([i + 10, i + 10.1, i + 10.2]).buffer).toString( + "base64", + ), + })), + usage: { prompt_tokens: 50, total_tokens: 75 }, + }) + + const result = await embedder.createEmbeddings(texts) + + // Should have made 2 API calls + expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(2) + + // First call should have 10 texts + expect(mockEmbeddingsCreate.mock.calls[0][0].input).toHaveLength(10) + + // Second call should have 5 texts + expect(mockEmbeddingsCreate.mock.calls[1][0].input).toHaveLength(5) + + // Result should contain all 15 embeddings + expect(result.embeddings).toHaveLength(15) + }) + + it("should use constructor-provided maxBatchSize over model profile", async () => { + // Model profile says 10, but constructor overrides to 5 + mockGetModelMaxBatchSize.mockReturnValue(10) + + const embedder = new OpenAICompatibleEmbedder( + testBaseUrl, + testApiKey, + "qwen3-embedding", + undefined, // maxItemTokens + 5, // maxBatchSize override + ) + + // Create 12 texts - should be split into 3 batches (5 + 5 + 2) + const texts = Array.from({ length: 12 }, (_, i) => `Text ${i}`) + + // Mock successful responses + mockEmbeddingsCreate + .mockResolvedValueOnce({ + data: Array.from({ length: 5 }, (_, i) => ({ + embedding: Buffer.from(new Float32Array([i, i + 0.1, i + 0.2]).buffer).toString("base64"), + })), + usage: { prompt_tokens: 50, total_tokens: 75 }, + }) + .mockResolvedValueOnce({ + data: Array.from({ length: 5 }, (_, i) => ({ + embedding: Buffer.from(new Float32Array([i + 5, i + 5.1, i + 5.2]).buffer).toString("base64"), + })), + usage: { prompt_tokens: 50, total_tokens: 75 }, + }) + .mockResolvedValueOnce({ + data: Array.from({ length: 2 }, (_, i) => ({ + embedding: Buffer.from(new Float32Array([i + 10, i + 10.1, i + 10.2]).buffer).toString( + "base64", + ), + })), + usage: { prompt_tokens: 20, total_tokens: 30 }, + }) + + const result = await embedder.createEmbeddings(texts) + + // Should have made 3 API calls + expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(3) + + // First two calls should have 5 texts each + expect(mockEmbeddingsCreate.mock.calls[0][0].input).toHaveLength(5) + expect(mockEmbeddingsCreate.mock.calls[1][0].input).toHaveLength(5) + + // Third call should have 2 texts + expect(mockEmbeddingsCreate.mock.calls[2][0].input).toHaveLength(2) + + // Result should contain all 12 embeddings + expect(result.embeddings).toHaveLength(12) + }) + + it("should handle no batch size limit (undefined)", async () => { + // No batch size limit from model profile + mockGetModelMaxBatchSize.mockReturnValue(undefined) + + const embedder = new OpenAICompatibleEmbedder(testBaseUrl, testApiKey, "text-embedding-3-small") + + // Create 100 small texts - should be batched by token limit only + const texts = Array.from({ length: 100 }, (_, i) => `T${i}`) // Very short texts + + // Mock successful response for large batch + mockEmbeddingsCreate.mockResolvedValue({ + data: Array.from({ length: 100 }, (_, i) => ({ + embedding: Buffer.from(new Float32Array([i, i + 0.1, i + 0.2]).buffer).toString("base64"), + })), + usage: { prompt_tokens: 200, total_tokens: 300 }, + }) + + const result = await embedder.createEmbeddings(texts) + + // Should make only 1 API call since texts are small and no batch limit + expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(1) + expect(mockEmbeddingsCreate.mock.calls[0][0].input).toHaveLength(100) + expect(result.embeddings).toHaveLength(100) + }) + + it("should respect batch size limit with mixed text sizes", async () => { + // Set batch size limit to 10 + mockGetModelMaxBatchSize.mockReturnValue(10) + + const embedder = new OpenAICompatibleEmbedder(testBaseUrl, testApiKey, "qwen3-embedding") + + // Create 20 texts - should be split into 2 batches due to batch size limit + const texts = Array.from({ length: 20 }, (_, i) => `Text content ${i}`) + + // Mock responses for 2 batches (10 + 10) + mockEmbeddingsCreate + .mockResolvedValueOnce({ + data: Array.from({ length: 10 }, (_, i) => ({ + embedding: Buffer.from(new Float32Array([i, i + 0.1, i + 0.2]).buffer).toString("base64"), + })), + usage: { prompt_tokens: 100, total_tokens: 150 }, + }) + .mockResolvedValueOnce({ + data: Array.from({ length: 10 }, (_, i) => ({ + embedding: Buffer.from(new Float32Array([i + 10, i + 10.1, i + 10.2]).buffer).toString( + "base64", + ), + })), + usage: { prompt_tokens: 100, total_tokens: 150 }, + }) + + const result = await embedder.createEmbeddings(texts) + + // Should have made 2 API calls + expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(2) + + // Each call should have 10 texts (batch size limit) + expect(mockEmbeddingsCreate.mock.calls[0][0].input).toHaveLength(10) + expect(mockEmbeddingsCreate.mock.calls[1][0].input).toHaveLength(10) + + // Result should contain all 20 embeddings + expect(result.embeddings).toHaveLength(20) + }) + }) + + describe("Aliyun Bailian specific models", () => { + it("should handle qwen3-embedding model with 10-item batch limit", async () => { + mockGetModelMaxBatchSize.mockReturnValue(10) + + const embedder = new OpenAICompatibleEmbedder( + "https://dashscope.aliyuncs.com/compatible-mode/v1", + testApiKey, + "qwen3-embedding", + ) + + const texts = Array.from({ length: 25 }, (_, i) => `Text ${i}`) + + // Mock responses for 3 batches (10 + 10 + 5) + mockEmbeddingsCreate + .mockResolvedValueOnce({ + data: Array.from({ length: 10 }, (_, i) => ({ + embedding: Buffer.from(new Float32Array([i]).buffer).toString("base64"), + })), + usage: { prompt_tokens: 100, total_tokens: 150 }, + }) + .mockResolvedValueOnce({ + data: Array.from({ length: 10 }, (_, i) => ({ + embedding: Buffer.from(new Float32Array([i + 10]).buffer).toString("base64"), + })), + usage: { prompt_tokens: 100, total_tokens: 150 }, + }) + .mockResolvedValueOnce({ + data: Array.from({ length: 5 }, (_, i) => ({ + embedding: Buffer.from(new Float32Array([i + 20]).buffer).toString("base64"), + })), + usage: { prompt_tokens: 50, total_tokens: 75 }, + }) + + const result = await embedder.createEmbeddings(texts) + + expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(3) + expect(result.embeddings).toHaveLength(25) + }) + + it("should handle text-embedding-v4 model with 10-item batch limit", async () => { + mockGetModelMaxBatchSize.mockReturnValue(10) + + const embedder = new OpenAICompatibleEmbedder( + "https://dashscope.aliyuncs.com/compatible-mode/v1", + testApiKey, + "text-embedding-v4", + ) + + const texts = Array.from({ length: 10 }, (_, i) => `Text ${i}`) + + mockEmbeddingsCreate.mockResolvedValueOnce({ + data: Array.from({ length: 10 }, (_, i) => ({ + embedding: Buffer.from(new Float32Array([i]).buffer).toString("base64"), + })), + usage: { prompt_tokens: 100, total_tokens: 150 }, + }) + + const result = await embedder.createEmbeddings(texts) + + // Should make exactly 1 call for 10 items (at the limit) + expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(1) + expect(mockEmbeddingsCreate.mock.calls[0][0].input).toHaveLength(10) + expect(result.embeddings).toHaveLength(10) + }) + }) +}) diff --git a/src/services/code-index/embedders/openai-compatible.ts b/src/services/code-index/embedders/openai-compatible.ts index 06c4ba5282..77ebb60086 100644 --- a/src/services/code-index/embedders/openai-compatible.ts +++ b/src/services/code-index/embedders/openai-compatible.ts @@ -6,7 +6,7 @@ import { MAX_BATCH_RETRIES as MAX_RETRIES, INITIAL_RETRY_DELAY_MS as INITIAL_DELAY_MS, } from "../constants" -import { getDefaultModelId, getModelQueryPrefix } from "../../../shared/embeddingModels" +import { getDefaultModelId, getModelQueryPrefix, getModelMaxBatchSize } from "../../../shared/embeddingModels" import { t } from "../../../i18n" import { withValidationErrorHandling, HttpError, formatEmbeddingError } from "../shared/validation-helpers" import { TelemetryEventName } from "@roo-code/types" @@ -38,6 +38,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder { private readonly apiKey: string private readonly isFullUrl: boolean private readonly maxItemTokens: number + private readonly maxBatchSize: number | undefined // Global rate limiting state shared across all instances private static globalRateLimitState = { @@ -55,8 +56,9 @@ export class OpenAICompatibleEmbedder implements IEmbedder { * @param apiKey The API key for authentication * @param modelId Optional model identifier (defaults to "text-embedding-3-small") * @param maxItemTokens Optional maximum tokens per item (defaults to MAX_ITEM_TOKENS) + * @param maxBatchSize Optional maximum batch size (overrides model-specific limits) */ - constructor(baseUrl: string, apiKey: string, modelId?: string, maxItemTokens?: number) { + constructor(baseUrl: string, apiKey: string, modelId?: string, maxItemTokens?: number, maxBatchSize?: number) { if (!baseUrl) { throw new Error(t("embeddings:validation.baseUrlRequired")) } @@ -74,6 +76,9 @@ export class OpenAICompatibleEmbedder implements IEmbedder { // Cache the URL type check for performance this.isFullUrl = this.isFullEndpointUrl(baseUrl) this.maxItemTokens = maxItemTokens || MAX_ITEM_TOKENS + // Use provided maxBatchSize, or get from model profile, or undefined (no limit) + this.maxBatchSize = + maxBatchSize !== undefined ? maxBatchSize : getModelMaxBatchSize("openai-compatible", this.defaultModelId) } /** @@ -135,7 +140,11 @@ export class OpenAICompatibleEmbedder implements IEmbedder { continue } - if (currentBatchTokens + itemTokens <= MAX_BATCH_TOKENS) { + // Check both token limit and batch size limit + const withinTokenLimit = currentBatchTokens + itemTokens <= MAX_BATCH_TOKENS + const withinBatchSizeLimit = this.maxBatchSize === undefined || currentBatch.length < this.maxBatchSize + + if (withinTokenLimit && withinBatchSizeLimit) { currentBatch.push(text) currentBatchTokens += itemTokens processedIndices.push(i) diff --git a/src/services/code-index/service-factory.ts b/src/services/code-index/service-factory.ts index 6d69e1f0b6..efc79e4fcb 100644 --- a/src/services/code-index/service-factory.ts +++ b/src/services/code-index/service-factory.ts @@ -5,7 +5,12 @@ import { OpenAICompatibleEmbedder } from "./embedders/openai-compatible" import { GeminiEmbedder } from "./embedders/gemini" import { MistralEmbedder } from "./embedders/mistral" import { VercelAiGatewayEmbedder } from "./embedders/vercel-ai-gateway" -import { EmbedderProvider, getDefaultModelId, getModelDimension } from "../../shared/embeddingModels" +import { + EmbedderProvider, + getDefaultModelId, + getModelDimension, + getModelMaxBatchSize, +} from "../../shared/embeddingModels" import { QdrantVectorStore } from "./vector-store/qdrant-client" import { codeParser, DirectoryScanner, FileWatcher } from "./processors" import { ICodeParser, IEmbedder, IFileWatcher, IVectorStore } from "./interfaces" @@ -59,10 +64,14 @@ export class CodeIndexServiceFactory { if (!config.openAiCompatibleOptions?.baseUrl || !config.openAiCompatibleOptions?.apiKey) { throw new Error(t("embeddings:serviceFactory.openAiCompatibleConfigMissing")) } + // Get model-specific batch size limit if available + const maxBatchSize = config.modelId ? getModelMaxBatchSize("openai-compatible", config.modelId) : undefined return new OpenAICompatibleEmbedder( config.openAiCompatibleOptions.baseUrl, config.openAiCompatibleOptions.apiKey, config.modelId, + undefined, // maxItemTokens - use default + maxBatchSize, ) } else if (provider === "gemini") { if (!config.geminiOptions?.apiKey) { @@ -168,6 +177,16 @@ export class CodeIndexServiceFactory { // In test environment, vscode.workspace might not be available batchSize = BATCH_SEGMENT_THRESHOLD } + + // Check if the embedder has a model-specific batch size limit + const config = this.configManager.getConfig() + if (config.embedderProvider === "openai-compatible" && config.modelId) { + const modelMaxBatchSize = getModelMaxBatchSize("openai-compatible", config.modelId) + if (modelMaxBatchSize && modelMaxBatchSize < batchSize) { + batchSize = modelMaxBatchSize + } + } + return new DirectoryScanner(embedder, vectorStore, parser, this.cacheManager, ignoreInstance, batchSize) } @@ -192,6 +211,16 @@ export class CodeIndexServiceFactory { // In test environment, vscode.workspace might not be available batchSize = BATCH_SEGMENT_THRESHOLD } + + // Check if the embedder has a model-specific batch size limit + const config = this.configManager.getConfig() + if (config.embedderProvider === "openai-compatible" && config.modelId) { + const modelMaxBatchSize = getModelMaxBatchSize("openai-compatible", config.modelId) + if (modelMaxBatchSize && modelMaxBatchSize < batchSize) { + batchSize = modelMaxBatchSize + } + } + return new FileWatcher( this.workspacePath, context, diff --git a/src/shared/embeddingModels.ts b/src/shared/embeddingModels.ts index 80c51a6b45..4f300ac769 100644 --- a/src/shared/embeddingModels.ts +++ b/src/shared/embeddingModels.ts @@ -8,6 +8,7 @@ export interface EmbeddingModelProfile { dimension: number scoreThreshold?: number // Model-specific minimum score threshold for semantic search queryPrefix?: string // Optional prefix required by the model for queries + maxBatchSize?: number // Maximum number of items that can be sent in a single batch // Add other model-specific properties if needed, e.g., context window size } @@ -45,6 +46,9 @@ export const EMBEDDING_MODEL_PROFILES: EmbeddingModelProfiles = { scoreThreshold: 0.15, queryPrefix: "Represent this query for searching relevant code: ", }, + // Aliyun Bailian models with batch size limits + "qwen3-embedding": { dimension: 1536, scoreThreshold: 0.4, maxBatchSize: 10 }, + "text-embedding-v4": { dimension: 1536, scoreThreshold: 0.4, maxBatchSize: 10 }, }, gemini: { "text-embedding-004": { dimension: 768 }, @@ -127,6 +131,22 @@ export function getModelQueryPrefix(provider: EmbedderProvider, modelId: string) return modelProfile?.queryPrefix } +/** + * Retrieves the maximum batch size for a given provider and model ID. + * @param provider The embedder provider (e.g., "openai-compatible"). + * @param modelId The specific model ID (e.g., "qwen3-embedding"). + * @returns The maximum batch size or undefined if not specified. + */ +export function getModelMaxBatchSize(provider: EmbedderProvider, modelId: string): number | undefined { + const providerProfiles = EMBEDDING_MODEL_PROFILES[provider] + if (!providerProfiles) { + return undefined + } + + const modelProfile = providerProfiles[modelId] + return modelProfile?.maxBatchSize +} + /** * Gets the default *specific* embedding model ID based on the provider. * Does not include the provider prefix.