Skip to content

Commit 43562bb

Browse files
committed
feat: Enhance Gemini embedder with token limit handling and model profile support
1 parent d98378f commit 43562bb

File tree

3 files changed

+172
-44
lines changed

3 files changed

+172
-44
lines changed

src/services/code-index/constants/index.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,6 @@ export const PARSING_CONCURRENCY = 10
2323
export const MAX_BATCH_TOKENS = 100000
2424
export const MAX_ITEM_TOKENS = 8191
2525
export const BATCH_PROCESSING_CONCURRENCY = 10
26+
27+
/**Gemini Embedder */
28+
export const GEMINI_RATE_LIMIT_DELAY_MS = 6000

src/services/code-index/embedders/gemini.ts

Lines changed: 158 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import { ApiHandlerOptions } from "../../../shared/api"
22
import { EmbedderInfo, EmbeddingResponse, IEmbedder } from "../interfaces"
33
import { GeminiHandler } from "../../../api/providers/gemini"
4-
4+
import { EMBEDDING_MODEL_PROFILES } from "../../../shared/embeddingModels"
5+
import { GEMINI_RATE_LIMIT_DELAY_MS, MAX_BATCH_RETRIES, INITIAL_RETRY_DELAY_MS } from "../constants"
56
/**
67
* Implements the IEmbedder interface using Google Gemini's embedding API.
78
*/
@@ -25,67 +26,183 @@ export class CodeIndexGeminiEmbedder extends GeminiHandler implements IEmbedder
2526
* @param model - Optional model ID to override the default.
2627
* @returns A promise that resolves to an EmbeddingResponse containing the embeddings and usage data.
2728
*/
29+
// Removed async keyword from the method signature as it no longer uses await at the top level.
30+
// It constructs and returns a promise.
2831
async createEmbeddings(texts: string[], model?: string): Promise<EmbeddingResponse> {
2932
try {
30-
// Use the enhanced GeminiHandler to generate embeddings
31-
const result = await this.generateEmbeddings(texts, model, this.defaultTaskType)
32-
33+
const modelId = model || this.defaultModelId
34+
const result = await this.embedWithTokenLimit(texts, modelId, this.defaultTaskType)
3335
return {
3436
embeddings: result.embeddings,
35-
usage: result.usage,
3637
}
3738
} catch (error: any) {
38-
// Log the original error for debugging purposes
39-
console.error("Gemini embedding failed:", error)
40-
// Re-throw a more specific error for the caller
41-
throw new Error(`Gemini embedding failed: ${error.message}`)
39+
console.error("Gemini embedding task failed:", error)
40+
throw error
4241
}
4342
}
4443

4544
/**
46-
* Generates embeddings for the provided texts using Gemini API
45+
* Embeds texts while respecting the token limit of the model.
46+
* Splits the input texts into batches that don't exceed the model's token limit.
47+
* Also adds a delay between requests to respect Gemini's rate limits.
48+
*
4749
* @param texts - Array of text strings to create embeddings for
48-
* @param model - Optional model ID to use for embeddings, defaults to the configured default model
49-
* @param taskType - The task type to optimize embeddings for (e.g., 'CODE_RETRIEVAL_QUERY')
50-
* @returns Promise resolving to an EmbeddingResponse with the embeddings and usage data
50+
* @param model - Model ID to use for embeddings
51+
* @param taskType - The task type to optimize embeddings for
52+
* @returns Promise resolving to an object with embeddings and usage data
5153
*/
52-
private async generateEmbeddings(
54+
private async embedWithTokenLimit(
5355
texts: string[],
54-
model?: string,
55-
taskType: string = "CODE_RETRIEVAL_QUERY",
56+
model: string,
57+
taskType: string,
5658
): Promise<{
5759
embeddings: number[][]
58-
usage?: {
59-
promptTokens: number
60-
totalTokens: number
61-
}
60+
usage: { promptTokens: number; totalTokens: number }
6261
}> {
63-
try {
64-
const modelId = model || this.defaultModelId
62+
// Get the model profile
63+
const geminiProfiles = EMBEDDING_MODEL_PROFILES.gemini || {}
64+
const modelProfile = geminiProfiles[model]
65+
66+
// Default max tokens if not specified in the profile
67+
const maxInputTokens = modelProfile?.maxInputTokens || 8192
6568

66-
// Use batchEmbedContents for multiple texts
67-
const response = await this.client.models.embedContent({
68-
model: modelId,
69-
contents: texts,
70-
config: {
71-
taskType,
72-
},
73-
})
74-
75-
if (!response.embeddings) {
76-
throw new Error("No embeddings returned from Gemini API")
69+
// Initialize result arrays
70+
const allEmbeddings: number[][] = []
71+
const aggregatedUsage = { promptTokens: 0, totalTokens: 0 }
72+
73+
// Process texts in batches
74+
const remainingTexts = [...texts]
75+
let isFirstBatch = true // Initialize isFirstBatch
76+
77+
while (remainingTexts.length > 0) {
78+
const currentBatch: string[] = []
79+
let currentBatchTokens = 0
80+
const processedIndices: number[] = []
81+
82+
// Simple token estimation (4 chars ≈ 1 token)
83+
for (let i = 0; i < remainingTexts.length; i++) {
84+
const text = remainingTexts[i]
85+
// Estimate tokens (similar to OpenAI's implementation)
86+
const estimatedTokens = Math.ceil(text.length / 4)
87+
88+
// Skip texts that exceed the max token limit for a single item
89+
if (estimatedTokens > maxInputTokens) {
90+
console.warn(
91+
`Text at index ${i} exceeds maximum token limit (${estimatedTokens} > ${maxInputTokens}). Skipping.`,
92+
)
93+
processedIndices.push(i)
94+
continue
95+
}
96+
97+
// Add text to batch if it fits within the token limit
98+
if (currentBatchTokens + estimatedTokens <= maxInputTokens) {
99+
currentBatch.push(text)
100+
currentBatchTokens += estimatedTokens
101+
processedIndices.push(i)
102+
} else {
103+
// This text would exceed the limit, so process the current batch first
104+
break
105+
}
77106
}
78107

79-
const embeddings = response.embeddings
80-
.map((embedding) => embedding?.values)
81-
.filter((values) => values !== undefined && values.length > 0) as number[][]
82-
return { embeddings }
83-
} catch (error) {
84-
if (error instanceof Error) {
85-
throw new Error(`Gemini embeddings error: ${error.message}`)
108+
// Remove processed texts from the remaining texts
109+
for (let i = processedIndices.length - 1; i >= 0; i--) {
110+
remainingTexts.splice(processedIndices[i], 1)
111+
}
112+
113+
// Process the current batch if not empty
114+
if (currentBatch.length > 0) {
115+
// Add proactive delay for rate limiting, except for the very first batch.
116+
if (!isFirstBatch) {
117+
console.log(`Adding proactive delay of ${GEMINI_RATE_LIMIT_DELAY_MS}ms before Gemini batch`)
118+
await new Promise((resolve) => setTimeout(resolve, GEMINI_RATE_LIMIT_DELAY_MS))
119+
}
120+
isFirstBatch = false // Set to false after the first potential delay or first run
121+
122+
try {
123+
const batchResult = await this._embedBatchWithRetries(currentBatch, model, taskType)
124+
allEmbeddings.push(...batchResult.embeddings)
125+
aggregatedUsage.promptTokens += batchResult.usage.promptTokens
126+
aggregatedUsage.totalTokens += batchResult.usage.totalTokens
127+
} catch (error) {
128+
console.error("Failed to process batch with retries:", error)
129+
throw new Error(`Failed to create embeddings for batch: ${(error as Error).message}`)
130+
}
131+
}
132+
}
133+
134+
return { embeddings: allEmbeddings, usage: aggregatedUsage }
135+
}
136+
137+
/**
138+
* Helper method to handle batch embedding with retries and exponential backoff for Gemini.
139+
* @param batchTexts Array of texts to embed in this batch
140+
* @param model Model identifier to use
141+
* @param taskType The task type for the embedding
142+
* @returns Promise resolving to embeddings and usage statistics
143+
*/
144+
private async _embedBatchWithRetries(
145+
batchTexts: string[],
146+
model: string,
147+
taskType: string,
148+
): Promise<{ embeddings: number[][]; usage: { promptTokens: number; totalTokens: number } }> {
149+
const modelId = model || this.defaultModelId
150+
let lastError: any = null
151+
152+
for (let attempts = 0; attempts < MAX_BATCH_RETRIES; attempts++) {
153+
try {
154+
const response = await this.client.models.embedContent({
155+
model: modelId,
156+
contents: batchTexts,
157+
config: {
158+
taskType,
159+
},
160+
})
161+
162+
if (!response.embeddings) {
163+
throw new Error("No embeddings returned from Gemini API")
164+
}
165+
166+
const embeddings = response.embeddings
167+
.map((embedding) => embedding?.values)
168+
.filter((values) => values !== undefined && values.length > 0) as number[][]
169+
170+
// Gemini API for embeddings doesn't directly return token usage per call in the same way some others do.
171+
// The `generateEmbeddings` in the original file didn't populate usage.
172+
// If usage needs to be calculated, it would require a separate token counting call.
173+
// For now, returning empty usage, consistent with the original generateEmbeddings.
174+
return {
175+
embeddings,
176+
usage: { promptTokens: 0, totalTokens: 0 }, // Placeholder usage
177+
}
178+
} catch (error: any) {
179+
lastError = error
180+
// Basic check for retryable errors (e.g., rate limits)
181+
// Gemini might use 429 or specific error messages like "RESOURCE_EXHAUSTED" or "rate limit exceeded"
182+
const isRateLimitError =
183+
error?.status === 429 ||
184+
(error?.message &&
185+
(error.message.includes("rate limit") || error.message.includes("RESOURCE_EXHAUSTED")))
186+
187+
const hasMoreAttempts = attempts < MAX_BATCH_RETRIES - 1
188+
189+
if (isRateLimitError && hasMoreAttempts) {
190+
const delayMs = INITIAL_RETRY_DELAY_MS * Math.pow(2, attempts)
191+
console.warn(
192+
`Gemini embedding attempt ${attempts + 1} failed due to rate limit. Retrying in ${delayMs}ms...`,
193+
)
194+
await new Promise((resolve) => setTimeout(resolve, delayMs))
195+
continue
196+
}
197+
// Non-retryable error or last attempt failed
198+
console.error(`Gemini embedding failed on attempt ${attempts + 1}:`, error)
199+
throw error // Re-throw the last error if not retryable or out of attempts
86200
}
87-
throw error
88201
}
202+
// Should not be reached if throw error in loop works correctly, but as a fallback:
203+
throw new Error(
204+
`Failed to create embeddings for batch after ${MAX_BATCH_RETRIES} attempts. Last error: ${lastError?.message}`,
205+
)
89206
}
90207

91208
get embedderInfo(): EmbedderInfo {

src/shared/embeddingModels.ts

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,14 @@ export type EmbedderProvider = "openai" | "ollama" | "gemini" // Add other provi
66

77
export interface EmbeddingModelProfile {
88
dimension: number
9+
/**
10+
* Specific dimensions supported by the model
11+
*/
12+
supportDimensions?: number[]
13+
/**
14+
* Optional maximum input tokens for the model.
15+
*/
16+
maxInputTokens?: number
917
// Add other model-specific properties if needed, e.g., context window size
1018
}
1119

@@ -30,9 +38,9 @@ export const EMBEDDING_MODEL_PROFILES: EmbeddingModelProfiles = {
3038
// 'default': { dimension: 768 } // Assuming a default dimension
3139
},
3240
gemini: {
33-
"gemini-embedding-exp-03-07": { dimension: 3072 },
34-
"models/text-embedding-004": { dimension: 768 },
35-
"models/embedding-001": { dimension: 768 },
41+
"gemini-embedding-exp-03-07": { dimension: 3072, supportDimensions: [3072, 1536, 768], maxInputTokens: 8192 },
42+
"models/text-embedding-004": { dimension: 768, maxInputTokens: 2048 },
43+
"models/embedding-001": { dimension: 768, maxInputTokens: 2048 },
3644
},
3745
}
3846

0 commit comments

Comments
 (0)