Skip to content

Commit ea054e8

Browse files
committed
feat: implement dynamic retry timing for Google Gemini API
- Enhanced error handling in GeminiHandler to preserve original error structure - Added logic to distinguish between rate limits and quota exhaustion - Preserved retry delay information from Gemini API responses - Added new localized error messages for better user feedback - Added comprehensive tests for the new retry logic Fixes #6680
1 parent 7ca4901 commit ea054e8

File tree

3 files changed

+139
-3
lines changed

3 files changed

+139
-3
lines changed

src/api/providers/__tests__/gemini.spec.ts

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,67 @@ describe("GeminiHandler", () => {
102102
}
103103
}).rejects.toThrow()
104104
})
105+
106+
it("should handle rate limit errors with retry information", async () => {
107+
const mockError: any = new Error("Rate limit exceeded")
108+
mockError.status = 429
109+
mockError.errorDetails = [
110+
{
111+
"@type": "type.googleapis.com/google.rpc.RetryInfo",
112+
retryDelay: "40s",
113+
},
114+
]
115+
;(handler["client"].models.generateContentStream as any).mockRejectedValue(mockError)
116+
117+
const stream = handler.createMessage(systemPrompt, mockMessages)
118+
119+
await expect(async () => {
120+
for await (const _chunk of stream) {
121+
// Should throw before yielding any chunks
122+
}
123+
}).rejects.toThrow(t("common:errors.gemini.rate_limit"))
124+
})
125+
126+
it("should handle quota exhaustion errors", async () => {
127+
const mockError: any = new Error(
128+
"You exceeded your current quota, please check your plan and billing details",
129+
)
130+
mockError.status = 429
131+
mockError.message = "You exceeded your current quota, please check your plan and billing details"
132+
;(handler["client"].models.generateContentStream as any).mockRejectedValue(mockError)
133+
134+
const stream = handler.createMessage(systemPrompt, mockMessages)
135+
136+
await expect(async () => {
137+
for await (const _chunk of stream) {
138+
// Should throw before yielding any chunks
139+
}
140+
}).rejects.toThrow(t("common:errors.gemini.quota_exhausted"))
141+
})
142+
143+
it("should preserve error details for retry logic", async () => {
144+
const mockError: any = new Error("Rate limit exceeded")
145+
mockError.status = 429
146+
mockError.errorDetails = [
147+
{
148+
"@type": "type.googleapis.com/google.rpc.RetryInfo",
149+
retryDelay: "60s",
150+
},
151+
]
152+
;(handler["client"].models.generateContentStream as any).mockRejectedValue(mockError)
153+
154+
const stream = handler.createMessage(systemPrompt, mockMessages)
155+
156+
try {
157+
for await (const _chunk of stream) {
158+
// Should throw before yielding any chunks
159+
}
160+
} catch (error: any) {
161+
expect(error.status).toBe(429)
162+
expect(error.errorDetails).toBeDefined()
163+
expect(error.errorDetails[0].retryDelay).toBe("60s")
164+
}
165+
})
105166
})
106167

107168
describe("completePrompt", () => {
@@ -134,6 +195,33 @@ describe("GeminiHandler", () => {
134195
)
135196
})
136197

198+
it("should handle rate limit errors in completePrompt", async () => {
199+
const mockError: any = new Error("Rate limit exceeded")
200+
mockError.status = 429
201+
mockError.errorDetails = [
202+
{
203+
"@type": "type.googleapis.com/google.rpc.RetryInfo",
204+
retryDelay: "30s",
205+
},
206+
]
207+
;(handler["client"].models.generateContent as any).mockRejectedValue(mockError)
208+
209+
await expect(handler.completePrompt("Test prompt")).rejects.toThrow(t("common:errors.gemini.rate_limit"))
210+
})
211+
212+
it("should handle quota exhaustion errors in completePrompt", async () => {
213+
const mockError: any = new Error(
214+
"You exceeded your current quota, please check your plan and billing details",
215+
)
216+
mockError.status = 429
217+
mockError.message = "You exceeded your current quota, please check your plan and billing details"
218+
;(handler["client"].models.generateContent as any).mockRejectedValue(mockError)
219+
220+
await expect(handler.completePrompt("Test prompt")).rejects.toThrow(
221+
t("common:errors.gemini.quota_exhausted"),
222+
)
223+
})
224+
137225
it("should handle empty response", async () => {
138226
// Mock the response with empty text
139227
;(handler["client"].models.generateContent as any).mockResolvedValue({

src/api/providers/gemini.ts

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,30 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
153153
totalCost: this.calculateCost({ info, inputTokens, outputTokens, cacheReadTokens }),
154154
}
155155
}
156-
} catch (error) {
156+
} catch (error: any) {
157+
// Preserve the original error structure for retry logic
158+
if (error.status === 429) {
159+
// Check if this is a rate limit or quota exhaustion
160+
const errorMessage = error.message || ""
161+
const isQuotaExhausted = errorMessage.includes("quota") || errorMessage.includes("billing")
162+
163+
// Create an enhanced error that preserves the original structure
164+
const enhancedError = new Error(
165+
isQuotaExhausted
166+
? t("common:errors.gemini.quota_exhausted", { error: errorMessage })
167+
: t("common:errors.gemini.rate_limit", { error: errorMessage }),
168+
)
169+
170+
// Preserve the original error properties for retry logic
171+
Object.assign(enhancedError, {
172+
status: error.status,
173+
errorDetails: error.errorDetails,
174+
message: error.message,
175+
})
176+
177+
throw enhancedError
178+
}
179+
157180
if (error instanceof Error) {
158181
throw new Error(t("common:errors.gemini.generate_stream", { error: error.message }))
159182
}
@@ -235,7 +258,30 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
235258
}
236259

237260
return text
238-
} catch (error) {
261+
} catch (error: any) {
262+
// Preserve the original error structure for retry logic
263+
if (error.status === 429) {
264+
// Check if this is a rate limit or quota exhaustion
265+
const errorMessage = error.message || ""
266+
const isQuotaExhausted = errorMessage.includes("quota") || errorMessage.includes("billing")
267+
268+
// Create an enhanced error that preserves the original structure
269+
const enhancedError = new Error(
270+
isQuotaExhausted
271+
? t("common:errors.gemini.quota_exhausted", { error: errorMessage })
272+
: t("common:errors.gemini.rate_limit", { error: errorMessage }),
273+
)
274+
275+
// Preserve the original error properties for retry logic
276+
Object.assign(enhancedError, {
277+
status: error.status,
278+
errorDetails: error.errorDetails,
279+
message: error.message,
280+
})
281+
282+
throw enhancedError
283+
}
284+
239285
if (error instanceof Error) {
240286
throw new Error(t("common:errors.gemini.generate_complete_prompt", { error: error.message }))
241287
}

src/i18n/locales/en/common.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,9 @@
8989
"gemini": {
9090
"generate_stream": "Gemini generate context stream error: {{error}}",
9191
"generate_complete_prompt": "Gemini completion error: {{error}}",
92-
"sources": "Sources:"
92+
"sources": "Sources:",
93+
"rate_limit": "Gemini API rate limit exceeded. The system will automatically retry with the appropriate delay.",
94+
"quota_exhausted": "Gemini API quota exhausted: {{error}}. Please check your plan and billing details at https://ai.google.dev/gemini-api/docs/rate-limits"
9395
},
9496
"cerebras": {
9597
"authenticationFailed": "Cerebras API authentication failed. Please check your API key is valid and not expired.",

0 commit comments

Comments
 (0)