-
Notifications
You must be signed in to change notification settings - Fork 2.6k
feat: Honor Gemini retryDelay and improve rate limit handling #8013
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,307 @@ | ||
| // npx vitest run src/api/providers/__tests__/gemini-rate-limit.spec.ts | ||
|
|
||
| import { describe, it, expect, vi, beforeEach } from "vitest" | ||
| import { GeminiHandler, GeminiError } from "../gemini" | ||
| import { t } from "i18next" | ||
|
|
||
| describe("GeminiHandler Rate Limit Handling", () => { | ||
| let handler: GeminiHandler | ||
|
|
||
| beforeEach(() => { | ||
| handler = new GeminiHandler({ | ||
| apiModelId: "gemini-1.5-flash", | ||
| geminiApiKey: "test-key", | ||
| }) | ||
| }) | ||
|
|
||
| describe("GeminiError", () => { | ||
| it("should properly construct error with RetryInfo", () => { | ||
| const error = new GeminiError("Rate limit exceeded", { | ||
| status: 429, | ||
| error: { | ||
| status: "RESOURCE_EXHAUSTED", | ||
| message: "Too many requests", | ||
| details: [ | ||
| { | ||
| "@type": "type.googleapis.com/google.rpc.RetryInfo", | ||
| retryDelay: "59s", | ||
| }, | ||
| ], | ||
| }, | ||
| }) | ||
|
|
||
| expect(error.status).toBe(429) | ||
| expect(error.errorStatus).toBe("RESOURCE_EXHAUSTED") | ||
| expect(error.errorDetails).toHaveLength(1) | ||
| expect(error.errorDetails?.[0]).toEqual({ | ||
| "@type": "type.googleapis.com/google.rpc.RetryInfo", | ||
| retryDelay: "59s", | ||
| }) | ||
| }) | ||
|
|
||
| it("should properly construct error with QuotaFailure", () => { | ||
| const error = new GeminiError("Quota exceeded", { | ||
| status: 429, | ||
| error: { | ||
| status: "RESOURCE_EXHAUSTED", | ||
| message: "Quota exceeded", | ||
| details: [ | ||
| { | ||
| "@type": "type.googleapis.com/google.rpc.QuotaFailure", | ||
| violations: [ | ||
| { | ||
| subject: "tokens_per_minute", | ||
| description: "Token limit exceeded for model", | ||
| }, | ||
| ], | ||
| }, | ||
| ], | ||
| }, | ||
| }) | ||
|
|
||
| expect(error.status).toBe(429) | ||
| expect(error.errorStatus).toBe("RESOURCE_EXHAUSTED") | ||
| expect(error.errorDetails).toHaveLength(1) | ||
| expect(error.errorDetails?.[0]).toEqual({ | ||
| "@type": "type.googleapis.com/google.rpc.QuotaFailure", | ||
| violations: [ | ||
| { | ||
| subject: "tokens_per_minute", | ||
| description: "Token limit exceeded for model", | ||
| }, | ||
| ], | ||
| }) | ||
| }) | ||
|
|
||
| it("should handle both RetryInfo and QuotaFailure in same error", () => { | ||
| const error = new GeminiError("Rate limit with retry", { | ||
| status: 429, | ||
| error: { | ||
| status: "RESOURCE_EXHAUSTED", | ||
| message: "Rate limit exceeded", | ||
| details: [ | ||
| { | ||
| "@type": "type.googleapis.com/google.rpc.RetryInfo", | ||
| retryDelay: "30s", | ||
| }, | ||
| { | ||
| "@type": "type.googleapis.com/google.rpc.QuotaFailure", | ||
| violations: [ | ||
| { | ||
| subject: "requests_per_minute", | ||
| description: "Request limit exceeded", | ||
| }, | ||
| ], | ||
| }, | ||
| ], | ||
| }, | ||
| }) | ||
|
|
||
| expect(error.status).toBe(429) | ||
| expect(error.errorDetails).toHaveLength(2) | ||
|
|
||
| const retryInfo = error.errorDetails?.find( | ||
| (d: any) => d["@type"] === "type.googleapis.com/google.rpc.RetryInfo", | ||
| ) | ||
| expect(retryInfo?.retryDelay).toBe("30s") | ||
|
|
||
| const quotaFailure = error.errorDetails?.find( | ||
| (d: any) => d["@type"] === "type.googleapis.com/google.rpc.QuotaFailure", | ||
| ) | ||
| expect(quotaFailure?.violations?.[0]?.subject).toBe("requests_per_minute") | ||
| }) | ||
|
|
||
| it("should handle error details in errorDetails field (alternative format)", () => { | ||
| const error = new GeminiError("Rate limit", { | ||
| status: 429, | ||
| errorDetails: [ | ||
| { | ||
| "@type": "type.googleapis.com/google.rpc.RetryInfo", | ||
| retryDelay: "45s", | ||
| }, | ||
| ], | ||
| }) | ||
|
|
||
| expect(error.status).toBe(429) | ||
| expect(error.errorDetails).toHaveLength(1) | ||
| expect(error.errorDetails?.[0].retryDelay).toBe("45s") | ||
| }) | ||
| }) | ||
|
|
||
| describe("Error transformation in createMessage", () => { | ||
| it("should transform 429 errors with proper structure", async () => { | ||
| // Mock the GoogleGenAI client to throw an error | ||
| const mockError = { | ||
| status: 429, | ||
| message: "Resource exhausted", | ||
| error: { | ||
| status: "RESOURCE_EXHAUSTED", | ||
| details: [ | ||
| { | ||
| "@type": "type.googleapis.com/google.rpc.RetryInfo", | ||
| retryDelay: "60s", | ||
| }, | ||
| ], | ||
| }, | ||
| } | ||
|
|
||
| // Mock the client's generateContentStream method | ||
| const mockClient = { | ||
| models: { | ||
| generateContentStream: vi.fn().mockRejectedValue(mockError), | ||
| }, | ||
| } | ||
| ;(handler as any).client = mockClient | ||
|
|
||
| // Attempt to create a message and expect it to throw GeminiError | ||
| try { | ||
| const stream = handler.createMessage("system", [{ role: "user", content: "test" }]) | ||
| // Consume the stream to trigger the error | ||
| for await (const chunk of stream) { | ||
| // This should not be reached | ||
| } | ||
| expect.fail("Should have thrown an error") | ||
| } catch (error) { | ||
| expect(error).toBeInstanceOf(GeminiError) | ||
| const geminiError = error as GeminiError | ||
| expect(geminiError.status).toBe(429) | ||
| expect(geminiError.errorDetails?.[0]).toEqual({ | ||
| "@type": "type.googleapis.com/google.rpc.RetryInfo", | ||
| retryDelay: "60s", | ||
| }) | ||
| } | ||
| }) | ||
|
|
||
| it("should handle quota exhaustion errors", async () => { | ||
| const mockError = { | ||
| status: 429, | ||
| message: "Daily quota exceeded", | ||
| error: { | ||
| status: "RESOURCE_EXHAUSTED", | ||
| details: [ | ||
| { | ||
| "@type": "type.googleapis.com/google.rpc.QuotaFailure", | ||
| violations: [ | ||
| { | ||
| subject: "daily_quota", | ||
| description: "Daily quota has been exhausted", | ||
| }, | ||
| ], | ||
| }, | ||
| ], | ||
| }, | ||
| } | ||
|
|
||
| const mockClient = { | ||
| models: { | ||
| generateContentStream: vi.fn().mockRejectedValue(mockError), | ||
| }, | ||
| } | ||
| ;(handler as any).client = mockClient | ||
|
|
||
| try { | ||
| const stream = handler.createMessage("system", [{ role: "user", content: "test" }]) | ||
| for await (const chunk of stream) { | ||
| // Should not reach here | ||
| } | ||
| expect.fail("Should have thrown an error") | ||
| } catch (error) { | ||
| expect(error).toBeInstanceOf(GeminiError) | ||
| const geminiError = error as GeminiError | ||
| expect(geminiError.status).toBe(429) | ||
|
|
||
| const quotaFailure = geminiError.errorDetails?.find( | ||
| (d: any) => d["@type"] === "type.googleapis.com/google.rpc.QuotaFailure", | ||
| ) | ||
| expect(quotaFailure?.violations?.[0]?.description).toBe("Daily quota has been exhausted") | ||
| } | ||
| }) | ||
|
|
||
| it("should handle generic errors without status", async () => { | ||
| const mockError = new Error("Network error") | ||
|
|
||
| const mockClient = { | ||
| models: { | ||
| generateContentStream: vi.fn().mockRejectedValue(mockError), | ||
| }, | ||
| } | ||
| ;(handler as any).client = mockClient | ||
|
|
||
| try { | ||
| const stream = handler.createMessage("system", [{ role: "user", content: "test" }]) | ||
| for await (const chunk of stream) { | ||
| // Should not reach here | ||
| } | ||
| expect.fail("Should have thrown an error") | ||
| } catch (error) { | ||
| expect(error).toBeInstanceOf(GeminiError) | ||
| const geminiError = error as GeminiError | ||
| // The message will be the translated error message | ||
| expect(geminiError.message).toBeDefined() | ||
| expect(geminiError.status).toBeUndefined() | ||
| expect(geminiError.errorDetails).toBeUndefined() | ||
| } | ||
| }) | ||
| }) | ||
|
|
||
| describe("Error transformation in completePrompt", () => { | ||
| it("should transform 429 errors in completePrompt", async () => { | ||
| const mockError = { | ||
| status: 429, | ||
| message: "Rate limit", | ||
| error: { | ||
| status: "RESOURCE_EXHAUSTED", | ||
| details: [ | ||
| { | ||
| "@type": "type.googleapis.com/google.rpc.RetryInfo", | ||
| retryDelay: "30s", | ||
| }, | ||
| ], | ||
| }, | ||
| } | ||
|
|
||
| const mockClient = { | ||
| models: { | ||
| generateContent: vi.fn().mockRejectedValue(mockError), | ||
| }, | ||
| } | ||
| ;(handler as any).client = mockClient | ||
|
|
||
| try { | ||
| await handler.completePrompt("test prompt") | ||
| expect.fail("Should have thrown an error") | ||
| } catch (error) { | ||
| expect(error).toBeInstanceOf(GeminiError) | ||
| const geminiError = error as GeminiError | ||
| expect(geminiError.status).toBe(429) | ||
| expect(geminiError.errorDetails?.[0].retryDelay).toBe("30s") | ||
| } | ||
| }) | ||
| }) | ||
|
|
||
| describe("Retry delay parsing", () => { | ||
| it("should correctly parse various delay formats", () => { | ||
| const testCases = [ | ||
| { input: "59s", expected: 59 }, | ||
| { input: "120s", expected: 120 }, | ||
| { input: "1s", expected: 1 }, | ||
| { input: "0s", expected: 0 }, | ||
| ] | ||
|
|
||
| testCases.forEach(({ input, expected }) => { | ||
| const match = input.match(/^(\d+)s$/) | ||
| expect(match).toBeTruthy() | ||
| expect(Number(match![1])).toBe(expected) | ||
| }) | ||
| }) | ||
|
|
||
| it("should not match invalid delay formats", () => { | ||
| const invalidFormats = ["59", "s59", "59m", "59.5s", ""] | ||
|
|
||
| invalidFormats.forEach((format) => { | ||
| const match = format.match(/^(\d+)s$/) | ||
| expect(match).toBeFalsy() | ||
| }) | ||
| }) | ||
| }) | ||
| }) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,44 @@ import { getModelParams } from "../transform/model-params" | |
| import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" | ||
| import { BaseProvider } from "./base-provider" | ||
|
|
||
| // Error detail types for Gemini API errors | ||
| export interface GeminiRetryInfo { | ||
| "@type": "type.googleapis.com/google.rpc.RetryInfo" | ||
| retryDelay?: string // e.g., "59s" | ||
| } | ||
|
|
||
| export interface GeminiQuotaFailure { | ||
| "@type": "type.googleapis.com/google.rpc.QuotaFailure" | ||
| violations?: Array<{ | ||
| subject?: string | ||
| description?: string | ||
| }> | ||
| } | ||
|
|
||
| export interface GeminiErrorDetails { | ||
| status?: number | ||
| error?: { | ||
| status?: string // e.g., "RESOURCE_EXHAUSTED" | ||
| message?: string | ||
| details?: Array<GeminiRetryInfo | GeminiQuotaFailure | any> | ||
| } | ||
| errorDetails?: Array<GeminiRetryInfo | GeminiQuotaFailure | any> | ||
| } | ||
|
|
||
| export class GeminiError extends Error { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider adding JSDoc comments to document the purpose and structure of this error class. It would help future maintainers understand when and how to use it. |
||
| status?: number | ||
| errorStatus?: string | ||
| errorDetails?: Array<GeminiRetryInfo | GeminiQuotaFailure | any> | ||
|
|
||
| constructor(message: string, details?: GeminiErrorDetails) { | ||
| super(message) | ||
| this.name = "GeminiError" | ||
| this.status = details?.status | ||
| this.errorStatus = details?.error?.status | ||
| this.errorDetails = details?.error?.details || details?.errorDetails | ||
| } | ||
| } | ||
|
|
||
| type GeminiHandlerOptions = ApiHandlerOptions & { | ||
| isVertex?: boolean | ||
| } | ||
|
|
@@ -154,8 +192,20 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl | |
| } | ||
| } | ||
| } catch (error) { | ||
| if (error instanceof Error) { | ||
| throw new Error(t("common:errors.gemini.generate_stream", { error: error.message })) | ||
| // Parse and enhance error information for better handling upstream | ||
| if (error && typeof error === "object" && "status" in error) { | ||
| const errorObj = error as any | ||
| const geminiError = new GeminiError( | ||
| errorObj.message || t("common:errors.gemini.generate_stream", { error: "Unknown error" }), | ||
| { | ||
| status: errorObj.status, | ||
| error: errorObj.error, | ||
| errorDetails: errorObj.errorDetails || errorObj.error?.details, | ||
| }, | ||
| ) | ||
| throw geminiError | ||
| } else if (error instanceof Error) { | ||
| throw new GeminiError(t("common:errors.gemini.generate_stream", { error: error.message })) | ||
| } | ||
|
|
||
| throw error | ||
|
|
@@ -246,8 +296,20 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl | |
|
|
||
| return text | ||
| } catch (error) { | ||
| if (error instanceof Error) { | ||
| throw new Error(t("common:errors.gemini.generate_complete_prompt", { error: error.message })) | ||
| // Parse and enhance error information for better handling upstream | ||
| if (error && typeof error === "object" && "status" in error) { | ||
| const errorObj = error as any | ||
| const geminiError = new GeminiError( | ||
| errorObj.message || t("common:errors.gemini.generate_complete_prompt", { error: "Unknown error" }), | ||
| { | ||
| status: errorObj.status, | ||
| error: errorObj.error, | ||
| errorDetails: errorObj.errorDetails || errorObj.error?.details, | ||
| }, | ||
| ) | ||
| throw geminiError | ||
| } else if (error instanceof Error) { | ||
| throw new GeminiError(t("common:errors.gemini.generate_complete_prompt", { error: error.message })) | ||
| } | ||
|
|
||
| throw error | ||
|
|
||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great test coverage! Could we add a test case for when retryDelay is "0s" to ensure the buffer is still applied correctly in edge cases?