Skip to content

Commit 5f27712

Browse files
committed
fix: implement model-specific rate limiting for gemini-embedding-001
- Add Gemini-specific constants for gemini-embedding-001: - Reduced batch token limit (20,000 vs 100,000) - Longer retry delays (2000ms vs 500ms) - Smaller batch size limit (10 items) - Update OpenAICompatibleEmbedder to accept configurable rate limiting parameters - Add inter-batch delays for models with stricter rate limits - Update GeminiEmbedder to use model-specific configuration - Fix test expectations to match new constructor signature Fixes #5713: gemini-embedding-001 quota limit issues during indexing
1 parent 88c4261 commit 5f27712

File tree

4 files changed

+55
-4
lines changed

4 files changed

+55
-4
lines changed

src/services/code-index/constants/index.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,6 @@ export const BATCH_PROCESSING_CONCURRENCY = 10
2828

2929
/**Gemini Embedder */
3030
export const GEMINI_MAX_ITEM_TOKENS = 2048
31+
export const GEMINI_EMBEDDING_001_MAX_BATCH_TOKENS = 20000 // Reduced batch size for gemini-embedding-001
32+
export const GEMINI_EMBEDDING_001_RETRY_DELAY_MS = 2000 // Longer delay for gemini-embedding-001
33+
export const GEMINI_EMBEDDING_001_MAX_BATCH_SIZE = 10 // Smaller batch size for gemini-embedding-001

src/services/code-index/embedders/__tests__/gemini.spec.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ describe("GeminiEmbedder", () => {
3838
apiKey,
3939
"gemini-embedding-001",
4040
2048,
41+
20000, // GEMINI_EMBEDDING_001_MAX_BATCH_TOKENS
42+
2000, // GEMINI_EMBEDDING_001_RETRY_DELAY_MS
43+
10, // GEMINI_EMBEDDING_001_MAX_BATCH_SIZE
4144
)
4245
})
4346

@@ -55,6 +58,9 @@ describe("GeminiEmbedder", () => {
5558
apiKey,
5659
"text-embedding-004",
5760
2048,
61+
100000, // MAX_BATCH_TOKENS (default for text-embedding-004)
62+
500, // INITIAL_RETRY_DELAY_MS (default for text-embedding-004)
63+
undefined, // maxBatchSize (undefined for text-embedding-004)
5864
)
5965
})
6066

src/services/code-index/embedders/gemini.ts

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
import { OpenAICompatibleEmbedder } from "./openai-compatible"
22
import { IEmbedder, EmbeddingResponse, EmbedderInfo } from "../interfaces/embedder"
3-
import { GEMINI_MAX_ITEM_TOKENS } from "../constants"
3+
import {
4+
GEMINI_MAX_ITEM_TOKENS,
5+
GEMINI_EMBEDDING_001_MAX_BATCH_TOKENS,
6+
GEMINI_EMBEDDING_001_RETRY_DELAY_MS,
7+
GEMINI_EMBEDDING_001_MAX_BATCH_SIZE,
8+
MAX_BATCH_TOKENS,
9+
INITIAL_RETRY_DELAY_MS,
10+
} from "../constants"
411
import { t } from "../../../i18n"
512
import { TelemetryEventName } from "@roo-code/types"
613
import { TelemetryService } from "@roo-code/telemetry"
@@ -32,12 +39,21 @@ export class GeminiEmbedder implements IEmbedder {
3239
// Use provided model or default
3340
this.modelId = modelId || GeminiEmbedder.DEFAULT_MODEL
3441

42+
// Get model-specific configuration for gemini-embedding-001
43+
const isGeminiEmbedding001 = this.modelId === "gemini-embedding-001"
44+
const maxBatchTokens = isGeminiEmbedding001 ? GEMINI_EMBEDDING_001_MAX_BATCH_TOKENS : MAX_BATCH_TOKENS
45+
const retryDelayMs = isGeminiEmbedding001 ? GEMINI_EMBEDDING_001_RETRY_DELAY_MS : INITIAL_RETRY_DELAY_MS
46+
const maxBatchSize = isGeminiEmbedding001 ? GEMINI_EMBEDDING_001_MAX_BATCH_SIZE : undefined
47+
3548
// Create an OpenAI Compatible embedder with Gemini's configuration
3649
this.openAICompatibleEmbedder = new OpenAICompatibleEmbedder(
3750
GeminiEmbedder.GEMINI_BASE_URL,
3851
apiKey,
3952
this.modelId,
4053
GEMINI_MAX_ITEM_TOKENS,
54+
maxBatchTokens,
55+
retryDelayMs,
56+
maxBatchSize,
4157
)
4258
}
4359

src/services/code-index/embedders/openai-compatible.ts

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,29 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
3737
private readonly apiKey: string
3838
private readonly isFullUrl: boolean
3939
private readonly maxItemTokens: number
40+
private readonly maxBatchTokens: number
41+
private readonly retryDelayMs: number
42+
private readonly maxBatchSize?: number
4043

4144
/**
4245
* Creates a new OpenAI Compatible embedder
4346
* @param baseUrl The base URL for the OpenAI-compatible API endpoint
4447
* @param apiKey The API key for authentication
4548
* @param modelId Optional model identifier (defaults to "text-embedding-3-small")
4649
* @param maxItemTokens Optional maximum tokens per item (defaults to MAX_ITEM_TOKENS)
50+
* @param maxBatchTokens Optional maximum tokens per batch (defaults to MAX_BATCH_TOKENS)
51+
* @param retryDelayMs Optional initial retry delay in milliseconds (defaults to INITIAL_DELAY_MS)
52+
* @param maxBatchSize Optional maximum number of items per batch
4753
*/
48-
constructor(baseUrl: string, apiKey: string, modelId?: string, maxItemTokens?: number) {
54+
constructor(
55+
baseUrl: string,
56+
apiKey: string,
57+
modelId?: string,
58+
maxItemTokens?: number,
59+
maxBatchTokens?: number,
60+
retryDelayMs?: number,
61+
maxBatchSize?: number,
62+
) {
4963
if (!baseUrl) {
5064
throw new Error(t("embeddings:validation.baseUrlRequired"))
5165
}
@@ -63,6 +77,9 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
6377
// Cache the URL type check for performance
6478
this.isFullUrl = this.isFullEndpointUrl(baseUrl)
6579
this.maxItemTokens = maxItemTokens || MAX_ITEM_TOKENS
80+
this.maxBatchTokens = maxBatchTokens || MAX_BATCH_TOKENS
81+
this.retryDelayMs = retryDelayMs || INITIAL_DELAY_MS
82+
this.maxBatchSize = maxBatchSize
6683
}
6784

6885
/**
@@ -124,7 +141,10 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
124141
continue
125142
}
126143

127-
if (currentBatchTokens + itemTokens <= MAX_BATCH_TOKENS) {
144+
if (
145+
currentBatchTokens + itemTokens <= this.maxBatchTokens &&
146+
(!this.maxBatchSize || currentBatch.length < this.maxBatchSize)
147+
) {
128148
currentBatch.push(text)
129149
currentBatchTokens += itemTokens
130150
processedIndices.push(i)
@@ -143,6 +163,12 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
143163
allEmbeddings.push(...batchResult.embeddings)
144164
usage.promptTokens += batchResult.usage.promptTokens
145165
usage.totalTokens += batchResult.usage.totalTokens
166+
167+
// Add delay between batches if there are more batches to process
168+
// This helps with rate limiting, especially for gemini-embedding-001
169+
if (remainingTexts.length > 0 && this.retryDelayMs > INITIAL_DELAY_MS) {
170+
await new Promise((resolve) => setTimeout(resolve, this.retryDelayMs / 4))
171+
}
146172
}
147173
}
148174

@@ -299,7 +325,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
299325
// Check if it's a rate limit error
300326
const httpError = error as HttpError
301327
if (httpError?.status === 429 && hasMoreAttempts) {
302-
const delayMs = INITIAL_DELAY_MS * Math.pow(2, attempts)
328+
const delayMs = this.retryDelayMs * Math.pow(2, attempts)
303329
console.warn(
304330
t("embeddings:rateLimitRetry", {
305331
delayMs,

0 commit comments

Comments
 (0)