diff --git a/src/services/code-index/constants/index.ts b/src/services/code-index/constants/index.ts index c2567f5635b..2af711dcecc 100644 --- a/src/services/code-index/constants/index.ts +++ b/src/services/code-index/constants/index.ts @@ -28,3 +28,6 @@ export const BATCH_PROCESSING_CONCURRENCY = 10 /**Gemini Embedder */ export const GEMINI_MAX_ITEM_TOKENS = 2048 +export const GEMINI_EMBEDDING_001_MAX_BATCH_TOKENS = 20000 // Reduced batch size for gemini-embedding-001 +export const GEMINI_EMBEDDING_001_RETRY_DELAY_MS = 2000 // Longer delay for gemini-embedding-001 +export const GEMINI_EMBEDDING_001_MAX_BATCH_SIZE = 10 // Smaller batch size for gemini-embedding-001 diff --git a/src/services/code-index/embedders/__tests__/gemini.spec.ts b/src/services/code-index/embedders/__tests__/gemini.spec.ts index d41a4dc1e93..a2a416da07c 100644 --- a/src/services/code-index/embedders/__tests__/gemini.spec.ts +++ b/src/services/code-index/embedders/__tests__/gemini.spec.ts @@ -38,6 +38,9 @@ describe("GeminiEmbedder", () => { apiKey, "gemini-embedding-001", 2048, + 20000, // GEMINI_EMBEDDING_001_MAX_BATCH_TOKENS + 2000, // GEMINI_EMBEDDING_001_RETRY_DELAY_MS + 10, // GEMINI_EMBEDDING_001_MAX_BATCH_SIZE ) }) @@ -55,6 +58,9 @@ describe("GeminiEmbedder", () => { apiKey, "text-embedding-004", 2048, + 100000, // MAX_BATCH_TOKENS (default for text-embedding-004) + 500, // INITIAL_RETRY_DELAY_MS (default for text-embedding-004) + undefined, // maxBatchSize (undefined for text-embedding-004) ) }) diff --git a/src/services/code-index/embedders/gemini.ts b/src/services/code-index/embedders/gemini.ts index 7e795875c9d..316948f904a 100644 --- a/src/services/code-index/embedders/gemini.ts +++ b/src/services/code-index/embedders/gemini.ts @@ -1,6 +1,13 @@ import { OpenAICompatibleEmbedder } from "./openai-compatible" import { IEmbedder, EmbeddingResponse, EmbedderInfo } from "../interfaces/embedder" -import { GEMINI_MAX_ITEM_TOKENS } from "../constants" +import { + GEMINI_MAX_ITEM_TOKENS, + GEMINI_EMBEDDING_001_MAX_BATCH_TOKENS, + GEMINI_EMBEDDING_001_RETRY_DELAY_MS, + GEMINI_EMBEDDING_001_MAX_BATCH_SIZE, + MAX_BATCH_TOKENS, + INITIAL_RETRY_DELAY_MS, +} from "../constants" import { t } from "../../../i18n" import { TelemetryEventName } from "@roo-code/types" import { TelemetryService } from "@roo-code/telemetry" @@ -32,12 +39,21 @@ export class GeminiEmbedder implements IEmbedder { // Use provided model or default this.modelId = modelId || GeminiEmbedder.DEFAULT_MODEL + // Get model-specific configuration for gemini-embedding-001 + const isGeminiEmbedding001 = this.modelId === "gemini-embedding-001" + const maxBatchTokens = isGeminiEmbedding001 ? GEMINI_EMBEDDING_001_MAX_BATCH_TOKENS : MAX_BATCH_TOKENS + const retryDelayMs = isGeminiEmbedding001 ? GEMINI_EMBEDDING_001_RETRY_DELAY_MS : INITIAL_RETRY_DELAY_MS + const maxBatchSize = isGeminiEmbedding001 ? GEMINI_EMBEDDING_001_MAX_BATCH_SIZE : undefined + // Create an OpenAI Compatible embedder with Gemini's configuration this.openAICompatibleEmbedder = new OpenAICompatibleEmbedder( GeminiEmbedder.GEMINI_BASE_URL, apiKey, this.modelId, GEMINI_MAX_ITEM_TOKENS, + maxBatchTokens, + retryDelayMs, + maxBatchSize, ) } diff --git a/src/services/code-index/embedders/openai-compatible.ts b/src/services/code-index/embedders/openai-compatible.ts index d882e783139..6f5f785666f 100644 --- a/src/services/code-index/embedders/openai-compatible.ts +++ b/src/services/code-index/embedders/openai-compatible.ts @@ -37,6 +37,9 @@ export class OpenAICompatibleEmbedder implements IEmbedder { private readonly apiKey: string private readonly isFullUrl: boolean private readonly maxItemTokens: number + private readonly maxBatchTokens: number + private readonly retryDelayMs: number + private readonly maxBatchSize?: number /** * Creates a new OpenAI Compatible embedder @@ -44,8 +47,19 @@ export class OpenAICompatibleEmbedder implements IEmbedder { * @param apiKey The API key for authentication * @param modelId Optional model identifier (defaults to "text-embedding-3-small") * @param maxItemTokens Optional maximum tokens per item (defaults to MAX_ITEM_TOKENS) + * @param maxBatchTokens Optional maximum tokens per batch (defaults to MAX_BATCH_TOKENS) + * @param retryDelayMs Optional initial retry delay in milliseconds (defaults to INITIAL_DELAY_MS) + * @param maxBatchSize Optional maximum number of items per batch */ - constructor(baseUrl: string, apiKey: string, modelId?: string, maxItemTokens?: number) { + constructor( + baseUrl: string, + apiKey: string, + modelId?: string, + maxItemTokens?: number, + maxBatchTokens?: number, + retryDelayMs?: number, + maxBatchSize?: number, + ) { if (!baseUrl) { throw new Error(t("embeddings:validation.baseUrlRequired")) } @@ -63,6 +77,9 @@ export class OpenAICompatibleEmbedder implements IEmbedder { // Cache the URL type check for performance this.isFullUrl = this.isFullEndpointUrl(baseUrl) this.maxItemTokens = maxItemTokens || MAX_ITEM_TOKENS + this.maxBatchTokens = maxBatchTokens || MAX_BATCH_TOKENS + this.retryDelayMs = retryDelayMs || INITIAL_DELAY_MS + this.maxBatchSize = maxBatchSize } /** @@ -124,7 +141,10 @@ export class OpenAICompatibleEmbedder implements IEmbedder { continue } - if (currentBatchTokens + itemTokens <= MAX_BATCH_TOKENS) { + if ( + currentBatchTokens + itemTokens <= this.maxBatchTokens && + (!this.maxBatchSize || currentBatch.length < this.maxBatchSize) + ) { currentBatch.push(text) currentBatchTokens += itemTokens processedIndices.push(i) @@ -143,6 +163,12 @@ export class OpenAICompatibleEmbedder implements IEmbedder { allEmbeddings.push(...batchResult.embeddings) usage.promptTokens += batchResult.usage.promptTokens usage.totalTokens += batchResult.usage.totalTokens + + // Add delay between batches if there are more batches to process + // This helps with rate limiting, especially for gemini-embedding-001 + if (remainingTexts.length > 0 && this.retryDelayMs > INITIAL_DELAY_MS) { + await new Promise((resolve) => setTimeout(resolve, this.retryDelayMs / 4)) + } } } @@ -299,7 +325,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder { // Check if it's a rate limit error const httpError = error as HttpError if (httpError?.status === 429 && hasMoreAttempts) { - const delayMs = INITIAL_DELAY_MS * Math.pow(2, attempts) + const delayMs = this.retryDelayMs * Math.pow(2, attempts) console.warn( t("embeddings:rateLimitRetry", { delayMs,