11import { ApiHandlerOptions } from "../../../shared/api"
22import { EmbedderInfo , EmbeddingResponse , IEmbedder } from "../interfaces"
33import { GeminiHandler } from "../../../api/providers/gemini"
4-
4+ import { EMBEDDING_MODEL_PROFILES } from "../../../shared/embeddingModels"
5+ import { GEMINI_RATE_LIMIT_DELAY_MS , MAX_BATCH_RETRIES , INITIAL_RETRY_DELAY_MS } from "../constants"
56/**
67 * Implements the IEmbedder interface using Google Gemini's embedding API.
78 */
@@ -25,67 +26,183 @@ export class CodeIndexGeminiEmbedder extends GeminiHandler implements IEmbedder
2526 * @param model - Optional model ID to override the default.
2627 * @returns A promise that resolves to an EmbeddingResponse containing the embeddings and usage data.
2728 */
29+ // Removed async keyword from the method signature as it no longer uses await at the top level.
30+ // It constructs and returns a promise.
2831 async createEmbeddings ( texts : string [ ] , model ?: string ) : Promise < EmbeddingResponse > {
2932 try {
30- // Use the enhanced GeminiHandler to generate embeddings
31- const result = await this . generateEmbeddings ( texts , model , this . defaultTaskType )
32-
33+ const modelId = model || this . defaultModelId
34+ const result = await this . embedWithTokenLimit ( texts , modelId , this . defaultTaskType )
3335 return {
3436 embeddings : result . embeddings ,
35- usage : result . usage ,
3637 }
3738 } catch ( error : any ) {
38- // Log the original error for debugging purposes
39- console . error ( "Gemini embedding failed:" , error )
40- // Re-throw a more specific error for the caller
41- throw new Error ( `Gemini embedding failed: ${ error . message } ` )
39+ console . error ( "Gemini embedding task failed:" , error )
40+ throw error
4241 }
4342 }
4443
4544 /**
46- * Generates embeddings for the provided texts using Gemini API
45+ * Embeds texts while respecting the token limit of the model.
46+ * Splits the input texts into batches that don't exceed the model's token limit.
47+ * Also adds a delay between requests to respect Gemini's rate limits.
48+ *
4749 * @param texts - Array of text strings to create embeddings for
48- * @param model - Optional model ID to use for embeddings, defaults to the configured default model
49- * @param taskType - The task type to optimize embeddings for (e.g., 'CODE_RETRIEVAL_QUERY')
50- * @returns Promise resolving to an EmbeddingResponse with the embeddings and usage data
50+ * @param model - Model ID to use for embeddings
51+ * @param taskType - The task type to optimize embeddings for
52+ * @returns Promise resolving to an object with embeddings and usage data
5153 */
52- private async generateEmbeddings (
54+ private async embedWithTokenLimit (
5355 texts : string [ ] ,
54- model ? : string ,
55- taskType : string = "CODE_RETRIEVAL_QUERY" ,
56+ model : string ,
57+ taskType : string ,
5658 ) : Promise < {
5759 embeddings : number [ ] [ ]
58- usage ?: {
59- promptTokens : number
60- totalTokens : number
61- }
60+ usage : { promptTokens : number ; totalTokens : number }
6261 } > {
63- try {
64- const modelId = model || this . defaultModelId
62+ // Get the model profile
63+ const geminiProfiles = EMBEDDING_MODEL_PROFILES . gemini || { }
64+ const modelProfile = geminiProfiles [ model ]
65+
66+ // Default max tokens if not specified in the profile
67+ const maxInputTokens = modelProfile ?. maxInputTokens || 8192
6568
66- // Use batchEmbedContents for multiple texts
67- const response = await this . client . models . embedContent ( {
68- model : modelId ,
69- contents : texts ,
70- config : {
71- taskType,
72- } ,
73- } )
74-
75- if ( ! response . embeddings ) {
76- throw new Error ( "No embeddings returned from Gemini API" )
69+ // Initialize result arrays
70+ const allEmbeddings : number [ ] [ ] = [ ]
71+ const aggregatedUsage = { promptTokens : 0 , totalTokens : 0 }
72+
73+ // Process texts in batches
74+ const remainingTexts = [ ...texts ]
75+ let isFirstBatch = true // Initialize isFirstBatch
76+
77+ while ( remainingTexts . length > 0 ) {
78+ const currentBatch : string [ ] = [ ]
79+ let currentBatchTokens = 0
80+ const processedIndices : number [ ] = [ ]
81+
82+ // Simple token estimation (4 chars ≈ 1 token)
83+ for ( let i = 0 ; i < remainingTexts . length ; i ++ ) {
84+ const text = remainingTexts [ i ]
85+ // Estimate tokens (similar to OpenAI's implementation)
86+ const estimatedTokens = Math . ceil ( text . length / 4 )
87+
88+ // Skip texts that exceed the max token limit for a single item
89+ if ( estimatedTokens > maxInputTokens ) {
90+ console . warn (
91+ `Text at index ${ i } exceeds maximum token limit (${ estimatedTokens } > ${ maxInputTokens } ). Skipping.` ,
92+ )
93+ processedIndices . push ( i )
94+ continue
95+ }
96+
97+ // Add text to batch if it fits within the token limit
98+ if ( currentBatchTokens + estimatedTokens <= maxInputTokens ) {
99+ currentBatch . push ( text )
100+ currentBatchTokens += estimatedTokens
101+ processedIndices . push ( i )
102+ } else {
103+ // This text would exceed the limit, so process the current batch first
104+ break
105+ }
77106 }
78107
79- const embeddings = response . embeddings
80- . map ( ( embedding ) => embedding ?. values )
81- . filter ( ( values ) => values !== undefined && values . length > 0 ) as number [ ] [ ]
82- return { embeddings }
83- } catch ( error ) {
84- if ( error instanceof Error ) {
85- throw new Error ( `Gemini embeddings error: ${ error . message } ` )
108+ // Remove processed texts from the remaining texts
109+ for ( let i = processedIndices . length - 1 ; i >= 0 ; i -- ) {
110+ remainingTexts . splice ( processedIndices [ i ] , 1 )
111+ }
112+
113+ // Process the current batch if not empty
114+ if ( currentBatch . length > 0 ) {
115+ // Add proactive delay for rate limiting, except for the very first batch.
116+ if ( ! isFirstBatch ) {
117+ console . log ( `Adding proactive delay of ${ GEMINI_RATE_LIMIT_DELAY_MS } ms before Gemini batch` )
118+ await new Promise ( ( resolve ) => setTimeout ( resolve , GEMINI_RATE_LIMIT_DELAY_MS ) )
119+ }
120+ isFirstBatch = false // Set to false after the first potential delay or first run
121+
122+ try {
123+ const batchResult = await this . _embedBatchWithRetries ( currentBatch , model , taskType )
124+ allEmbeddings . push ( ...batchResult . embeddings )
125+ aggregatedUsage . promptTokens += batchResult . usage . promptTokens
126+ aggregatedUsage . totalTokens += batchResult . usage . totalTokens
127+ } catch ( error ) {
128+ console . error ( "Failed to process batch with retries:" , error )
129+ throw new Error ( `Failed to create embeddings for batch: ${ ( error as Error ) . message } ` )
130+ }
131+ }
132+ }
133+
134+ return { embeddings : allEmbeddings , usage : aggregatedUsage }
135+ }
136+
137+ /**
138+ * Helper method to handle batch embedding with retries and exponential backoff for Gemini.
139+ * @param batchTexts Array of texts to embed in this batch
140+ * @param model Model identifier to use
141+ * @param taskType The task type for the embedding
142+ * @returns Promise resolving to embeddings and usage statistics
143+ */
144+ private async _embedBatchWithRetries (
145+ batchTexts : string [ ] ,
146+ model : string ,
147+ taskType : string ,
148+ ) : Promise < { embeddings : number [ ] [ ] ; usage : { promptTokens : number ; totalTokens : number } } > {
149+ const modelId = model || this . defaultModelId
150+ let lastError : any = null
151+
152+ for ( let attempts = 0 ; attempts < MAX_BATCH_RETRIES ; attempts ++ ) {
153+ try {
154+ const response = await this . client . models . embedContent ( {
155+ model : modelId ,
156+ contents : batchTexts ,
157+ config : {
158+ taskType,
159+ } ,
160+ } )
161+
162+ if ( ! response . embeddings ) {
163+ throw new Error ( "No embeddings returned from Gemini API" )
164+ }
165+
166+ const embeddings = response . embeddings
167+ . map ( ( embedding ) => embedding ?. values )
168+ . filter ( ( values ) => values !== undefined && values . length > 0 ) as number [ ] [ ]
169+
170+ // Gemini API for embeddings doesn't directly return token usage per call in the same way some others do.
171+ // The `generateEmbeddings` in the original file didn't populate usage.
172+ // If usage needs to be calculated, it would require a separate token counting call.
173+ // For now, returning empty usage, consistent with the original generateEmbeddings.
174+ return {
175+ embeddings,
176+ usage : { promptTokens : 0 , totalTokens : 0 } , // Placeholder usage
177+ }
178+ } catch ( error : any ) {
179+ lastError = error
180+ // Basic check for retryable errors (e.g., rate limits)
181+ // Gemini might use 429 or specific error messages like "RESOURCE_EXHAUSTED" or "rate limit exceeded"
182+ const isRateLimitError =
183+ error ?. status === 429 ||
184+ ( error ?. message &&
185+ ( error . message . includes ( "rate limit" ) || error . message . includes ( "RESOURCE_EXHAUSTED" ) ) )
186+
187+ const hasMoreAttempts = attempts < MAX_BATCH_RETRIES - 1
188+
189+ if ( isRateLimitError && hasMoreAttempts ) {
190+ const delayMs = INITIAL_RETRY_DELAY_MS * Math . pow ( 2 , attempts )
191+ console . warn (
192+ `Gemini embedding attempt ${ attempts + 1 } failed due to rate limit. Retrying in ${ delayMs } ms...` ,
193+ )
194+ await new Promise ( ( resolve ) => setTimeout ( resolve , delayMs ) )
195+ continue
196+ }
197+ // Non-retryable error or last attempt failed
198+ console . error ( `Gemini embedding failed on attempt ${ attempts + 1 } :` , error )
199+ throw error // Re-throw the last error if not retryable or out of attempts
86200 }
87- throw error
88201 }
202+ // Should not be reached if throw error in loop works correctly, but as a fallback:
203+ throw new Error (
204+ `Failed to create embeddings for batch after ${ MAX_BATCH_RETRIES } attempts. Last error: ${ lastError ?. message } ` ,
205+ )
89206 }
90207
91208 get embedderInfo ( ) : EmbedderInfo {
0 commit comments