From 64e6086f611d3009703bf8c308a22b044661c410 Mon Sep 17 00:00:00 2001 From: Shariq Riaz Date: Sun, 24 Aug 2025 02:15:30 +0500 Subject: [PATCH] feat: add Gemini free tier models with -free aliases Add three new free tier Gemini models with 250K context windows: - gemini-2.5-pro-free (5 RPM, 250K TPM, 100 RPD) - gemini-2.5-flash-free (10 RPM, 250K TPM, 250 RPD) - gemini-2.5-flash-lite-free (15 RPM, 250K TPM, 1000 RPD) The provider maps -free aliases to base model IDs for API calls while preserving the 250K context limit and $0 pricing in the UI. Descriptions include rate limiting guidance for users to set appropriate delays in provider settings (12s, 6s, 4s respectively). --- packages/types/src/providers/gemini.ts | 43 ++++++++++++++++++++++++++ src/api/providers/gemini.ts | 10 +++++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/packages/types/src/providers/gemini.ts b/packages/types/src/providers/gemini.ts index a7225c7330..a773303340 100644 --- a/packages/types/src/providers/gemini.ts +++ b/packages/types/src/providers/gemini.ts @@ -295,4 +295,47 @@ export const geminiModels = { supportsReasoningBudget: true, maxThinkingTokens: 24_576, }, + "gemini-2.5-pro-free": { + maxTokens: 64_000, + contextWindow: 250_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0, + outputPrice: 0, + cacheReadsPrice: 0, + cacheWritesPrice: 0, + maxThinkingTokens: 32_768, + supportsReasoningBudget: true, + requiredReasoningBudget: true, + description: + "Free tier version of Gemini 2.5 Pro with 250K context window and rate limits (5 RPM, 250K TPM, 100 RPD). Set minimum 12 seconds between requests in provider settings to avoid rate limits.", + }, + "gemini-2.5-flash-free": { + maxTokens: 64_000, + contextWindow: 250_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0, + outputPrice: 0, + cacheReadsPrice: 0, + cacheWritesPrice: 0, + maxThinkingTokens: 24_576, + supportsReasoningBudget: true, + description: + "Free tier version of Gemini 2.5 Flash with 250K context window and rate limits (10 RPM, 250K TPM, 250 RPD). Set minimum 6 seconds between requests in provider settings to avoid rate limits.", + }, + "gemini-2.5-flash-lite-free": { + maxTokens: 64_000, + contextWindow: 250_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0, + outputPrice: 0, + cacheReadsPrice: 0, + cacheWritesPrice: 0, + supportsReasoningBudget: true, + maxThinkingTokens: 24_576, + description: + "Free tier version of Gemini 2.5 Flash Lite with 250K context window and rate limits (15 RPM, 250K TPM, 1000 RPD). Set minimum 4 seconds between requests in provider settings to avoid rate limits.", + }, } as const satisfies Record diff --git a/src/api/providers/gemini.ts b/src/api/providers/gemini.ts index 5e547edbdc..3f1d3eb6a1 100644 --- a/src/api/providers/gemini.ts +++ b/src/api/providers/gemini.ts @@ -172,7 +172,15 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl // reasoning model and that reasoning is required to be enabled. // The actual model ID honored by Gemini's API does not have this // suffix. - return { id: id.endsWith(":thinking") ? id.replace(":thinking", "") : id, info, ...params } + let apiModelId = id.endsWith(":thinking") ? id.replace(":thinking", "") : id + + // The `-free` suffix indicates free tier models with rate limits. + // Map them to their corresponding paid models for API calls. + if (apiModelId.endsWith("-free")) { + apiModelId = apiModelId.replace("-free", "") as GeminiModelId + } + + return { id: apiModelId, info, ...params } } private extractCitationsOnly(groundingMetadata?: GroundingMetadata): string | null {