feat: add Gemini free tier models with -free aliases

shariqriazz · shariqriazz · commit 64e6086f611d · 2025-08-24T02:32:04.000+05:00
Add three new free tier Gemini models with 250K context windows:
- gemini-2.5-pro-free (5 RPM, 250K TPM, 100 RPD)
- gemini-2.5-flash-free (10 RPM, 250K TPM, 250 RPD)
- gemini-2.5-flash-lite-free (15 RPM, 250K TPM, 1000 RPD)

The provider maps -free aliases to base model IDs for API calls while
preserving the 250K context limit and $0 pricing in the UI.

Descriptions include rate limiting guidance for users to set appropriate
delays in provider settings (12s, 6s, 4s respectively).
diff --git a/packages/types/src/providers/gemini.ts b/packages/types/src/providers/gemini.ts
@@ -295,4 +295,47 @@ export const geminiModels = {
 		supportsReasoningBudget: true,
 		maxThinkingTokens: 24_576,
 	},
+	"gemini-2.5-pro-free": {
+		maxTokens: 64_000,
+		contextWindow: 250_000,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 0,
+		outputPrice: 0,
+		cacheReadsPrice: 0,
+		cacheWritesPrice: 0,
+		maxThinkingTokens: 32_768,
+		supportsReasoningBudget: true,
+		requiredReasoningBudget: true,
+		description:
+			"Free tier version of Gemini 2.5 Pro with 250K context window and rate limits (5 RPM, 250K TPM, 100 RPD). Set minimum 12 seconds between requests in provider settings to avoid rate limits.",
+	},
+	"gemini-2.5-flash-free": {
+		maxTokens: 64_000,
+		contextWindow: 250_000,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 0,
+		outputPrice: 0,
+		cacheReadsPrice: 0,
+		cacheWritesPrice: 0,
+		maxThinkingTokens: 24_576,
+		supportsReasoningBudget: true,
+		description:
+			"Free tier version of Gemini 2.5 Flash with 250K context window and rate limits (10 RPM, 250K TPM, 250 RPD). Set minimum 6 seconds between requests in provider settings to avoid rate limits.",
+	},
+	"gemini-2.5-flash-lite-free": {
+		maxTokens: 64_000,
+		contextWindow: 250_000,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 0,
+		outputPrice: 0,
+		cacheReadsPrice: 0,
+		cacheWritesPrice: 0,
+		supportsReasoningBudget: true,
+		maxThinkingTokens: 24_576,
+		description:
+			"Free tier version of Gemini 2.5 Flash Lite with 250K context window and rate limits (15 RPM, 250K TPM, 1000 RPD). Set minimum 4 seconds between requests in provider settings to avoid rate limits.",
+	},
 } as const satisfies Record<string, ModelInfo>
diff --git a/src/api/providers/gemini.ts b/src/api/providers/gemini.ts
@@ -172,7 +172,15 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 		// reasoning model and that reasoning is required to be enabled.
 		// The actual model ID honored by Gemini's API does not have this
 		// suffix.
-		return { id: id.endsWith(":thinking") ? id.replace(":thinking", "") : id, info, ...params }
+		let apiModelId = id.endsWith(":thinking") ? id.replace(":thinking", "") : id
+
+		// The `-free` suffix indicates free tier models with rate limits.
+		// Map them to their corresponding paid models for API calls.
+		if (apiModelId.endsWith("-free")) {
+			apiModelId = apiModelId.replace("-free", "") as GeminiModelId
+		}
+
+		return { id: apiModelId, info, ...params }
 	}
 
 	private extractCitationsOnly(groundingMetadata?: GroundingMetadata): string | null {