Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/services/code-index/constants/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@ export const BATCH_PROCESSING_CONCURRENCY = 10

/**Gemini Embedder */
export const GEMINI_MAX_ITEM_TOKENS = 2048
export const GEMINI_EMBEDDING_001_MAX_BATCH_TOKENS = 20000 // Reduced batch size for gemini-embedding-001
export const GEMINI_EMBEDDING_001_RETRY_DELAY_MS = 2000 // Longer delay for gemini-embedding-001
export const GEMINI_EMBEDDING_001_MAX_BATCH_SIZE = 10 // Smaller batch size for gemini-embedding-001
6 changes: 6 additions & 0 deletions src/services/code-index/embedders/__tests__/gemini.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ describe("GeminiEmbedder", () => {
apiKey,
"gemini-embedding-001",
2048,
20000, // GEMINI_EMBEDDING_001_MAX_BATCH_TOKENS
2000, // GEMINI_EMBEDDING_001_RETRY_DELAY_MS
10, // GEMINI_EMBEDDING_001_MAX_BATCH_SIZE
)
})

Expand All @@ -55,6 +58,9 @@ describe("GeminiEmbedder", () => {
apiKey,
"text-embedding-004",
2048,
100000, // MAX_BATCH_TOKENS (default for text-embedding-004)
500, // INITIAL_RETRY_DELAY_MS (default for text-embedding-004)
undefined, // maxBatchSize (undefined for text-embedding-004)
)
})

Expand Down
18 changes: 17 additions & 1 deletion src/services/code-index/embedders/gemini.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
import { OpenAICompatibleEmbedder } from "./openai-compatible"
import { IEmbedder, EmbeddingResponse, EmbedderInfo } from "../interfaces/embedder"
import { GEMINI_MAX_ITEM_TOKENS } from "../constants"
import {
GEMINI_MAX_ITEM_TOKENS,
GEMINI_EMBEDDING_001_MAX_BATCH_TOKENS,
GEMINI_EMBEDDING_001_RETRY_DELAY_MS,
GEMINI_EMBEDDING_001_MAX_BATCH_SIZE,
MAX_BATCH_TOKENS,
INITIAL_RETRY_DELAY_MS,
} from "../constants"
import { t } from "../../../i18n"
import { TelemetryEventName } from "@roo-code/types"
import { TelemetryService } from "@roo-code/telemetry"
Expand Down Expand Up @@ -32,12 +39,21 @@ export class GeminiEmbedder implements IEmbedder {
// Use provided model or default
this.modelId = modelId || GeminiEmbedder.DEFAULT_MODEL

// Get model-specific configuration for gemini-embedding-001
const isGeminiEmbedding001 = this.modelId === "gemini-embedding-001"
const maxBatchTokens = isGeminiEmbedding001 ? GEMINI_EMBEDDING_001_MAX_BATCH_TOKENS : MAX_BATCH_TOKENS
const retryDelayMs = isGeminiEmbedding001 ? GEMINI_EMBEDDING_001_RETRY_DELAY_MS : INITIAL_RETRY_DELAY_MS
const maxBatchSize = isGeminiEmbedding001 ? GEMINI_EMBEDDING_001_MAX_BATCH_SIZE : undefined

// Create an OpenAI Compatible embedder with Gemini's configuration
this.openAICompatibleEmbedder = new OpenAICompatibleEmbedder(
GeminiEmbedder.GEMINI_BASE_URL,
apiKey,
this.modelId,
GEMINI_MAX_ITEM_TOKENS,
maxBatchTokens,
retryDelayMs,
maxBatchSize,
)
}

Expand Down
32 changes: 29 additions & 3 deletions src/services/code-index/embedders/openai-compatible.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,29 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
private readonly apiKey: string
private readonly isFullUrl: boolean
private readonly maxItemTokens: number
private readonly maxBatchTokens: number
private readonly retryDelayMs: number
private readonly maxBatchSize?: number

/**
* Creates a new OpenAI Compatible embedder
* @param baseUrl The base URL for the OpenAI-compatible API endpoint
* @param apiKey The API key for authentication
* @param modelId Optional model identifier (defaults to "text-embedding-3-small")
* @param maxItemTokens Optional maximum tokens per item (defaults to MAX_ITEM_TOKENS)
* @param maxBatchTokens Optional maximum tokens per batch (defaults to MAX_BATCH_TOKENS)
* @param retryDelayMs Optional initial retry delay in milliseconds (defaults to INITIAL_DELAY_MS)
* @param maxBatchSize Optional maximum number of items per batch
*/
constructor(baseUrl: string, apiKey: string, modelId?: string, maxItemTokens?: number) {
constructor(
baseUrl: string,
apiKey: string,
modelId?: string,
maxItemTokens?: number,
maxBatchTokens?: number,
retryDelayMs?: number,
maxBatchSize?: number,
) {
if (!baseUrl) {
throw new Error(t("embeddings:validation.baseUrlRequired"))
}
Expand All @@ -63,6 +77,9 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
// Cache the URL type check for performance
this.isFullUrl = this.isFullEndpointUrl(baseUrl)
this.maxItemTokens = maxItemTokens || MAX_ITEM_TOKENS
this.maxBatchTokens = maxBatchTokens || MAX_BATCH_TOKENS
this.retryDelayMs = retryDelayMs || INITIAL_DELAY_MS
this.maxBatchSize = maxBatchSize
}

/**
Expand Down Expand Up @@ -124,7 +141,10 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
continue
}

if (currentBatchTokens + itemTokens <= MAX_BATCH_TOKENS) {
if (
currentBatchTokens + itemTokens <= this.maxBatchTokens &&
(!this.maxBatchSize || currentBatch.length < this.maxBatchSize)
) {
currentBatch.push(text)
currentBatchTokens += itemTokens
processedIndices.push(i)
Expand All @@ -143,6 +163,12 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
allEmbeddings.push(...batchResult.embeddings)
usage.promptTokens += batchResult.usage.promptTokens
usage.totalTokens += batchResult.usage.totalTokens

// Add delay between batches if there are more batches to process
// This helps with rate limiting, especially for gemini-embedding-001
if (remainingTexts.length > 0 && this.retryDelayMs > INITIAL_DELAY_MS) {
await new Promise((resolve) => setTimeout(resolve, this.retryDelayMs / 4))
}
}
}

Expand Down Expand Up @@ -299,7 +325,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
// Check if it's a rate limit error
const httpError = error as HttpError
if (httpError?.status === 429 && hasMoreAttempts) {
const delayMs = INITIAL_DELAY_MS * Math.pow(2, attempts)
const delayMs = this.retryDelayMs * Math.pow(2, attempts)
console.warn(
t("embeddings:rateLimitRetry", {
delayMs,
Expand Down