feat: add Gemini 2.5 models (Pro, Flash and Flash Lite) (#4798)

daniel-lxs · web-flow · commit f18cf3d7ea54 · 2025-06-17T16:36:37.000-04:00
diff --git a/packages/types/src/providers/gemini.ts b/packages/types/src/providers/gemini.ts
@@ -48,6 +48,18 @@ export const geminiModels = {
 		cacheReadsPrice: 0.0375,
 		cacheWritesPrice: 1.0,
 	},
+	"gemini-2.5-flash": {
+		maxTokens: 64_000,
+		contextWindow: 1_048_576,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 0.15,
+		outputPrice: 0.6,
+		cacheReadsPrice: 0.0375,
+		cacheWritesPrice: 1.0,
+		maxThinkingTokens: 24_576,
+		supportsReasoningBudget: true,
+	},
 	"gemini-2.5-pro-exp-03-25": {
 		maxTokens: 65_535,
 		contextWindow: 1_048_576,
@@ -130,6 +142,33 @@ export const geminiModels = {
 			},
 		],
 	},
+	"gemini-2.5-pro": {
+		maxTokens: 64_000,
+		contextWindow: 1_048_576,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
+		outputPrice: 15,
+		cacheReadsPrice: 0.625,
+		cacheWritesPrice: 4.5,
+		maxThinkingTokens: 32_768,
+		supportsReasoningBudget: true,
+		requiredReasoningBudget: true,
+		tiers: [
+			{
+				contextWindow: 200_000,
+				inputPrice: 1.25,
+				outputPrice: 10,
+				cacheReadsPrice: 0.31,
+			},
+			{
+				contextWindow: Infinity,
+				inputPrice: 2.5,
+				outputPrice: 15,
+				cacheReadsPrice: 0.625,
+			},
+		],
+	},
 	"gemini-2.0-flash-001": {
 		maxTokens: 8192,
 		contextWindow: 1_048_576,
@@ -244,4 +283,16 @@ export const geminiModels = {
 		inputPrice: 0,
 		outputPrice: 0,
 	},
+	"gemini-2.5-flash-lite-preview-06-17": {
+		maxTokens: 64_000,
+		contextWindow: 1_048_576,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 0.1,
+		outputPrice: 0.4,
+		cacheReadsPrice: 0.025,
+		cacheWritesPrice: 1.0,
+		supportsReasoningBudget: true,
+		maxThinkingTokens: 24_576,
+	},
 } as const satisfies Record<string, ModelInfo>
diff --git a/packages/types/src/providers/openrouter.ts b/packages/types/src/providers/openrouter.ts
@@ -43,6 +43,8 @@ export const OPEN_ROUTER_PROMPT_CACHING_MODELS = new Set([
 	"google/gemini-2.5-flash-preview:thinking",
 	"google/gemini-2.5-flash-preview-05-20",
 	"google/gemini-2.5-flash-preview-05-20:thinking",
+	"google/gemini-2.5-flash",
+	"google/gemini-2.5-flash-lite-preview-06-17",
 	"google/gemini-2.0-flash-001",
 	"google/gemini-flash-1.5",
 	"google/gemini-flash-1.5-8b",
@@ -68,6 +70,7 @@ export const OPEN_ROUTER_COMPUTER_USE_MODELS = new Set([
 // We should *not* be adding new models to this set.
 export const OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS = new Set([
 	"anthropic/claude-3.7-sonnet:thinking",
+	"google/gemini-2.5-pro",
 	"google/gemini-2.5-flash-preview-05-20:thinking",
 ])
 
@@ -76,7 +79,10 @@ export const OPEN_ROUTER_REASONING_BUDGET_MODELS = new Set([
 	"anthropic/claude-opus-4",
 	"anthropic/claude-sonnet-4",
 	"google/gemini-2.5-pro-preview",
+	"google/gemini-2.5-pro",
 	"google/gemini-2.5-flash-preview-05-20",
+	"google/gemini-2.5-flash",
+	"google/gemini-2.5-flash-lite-preview-06-17",
 	// Also include the models that require the reasoning budget to be enabled
 	// even though `OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS` takes precedence.
 	"anthropic/claude-3.7-sonnet:thinking",
diff --git a/packages/types/src/providers/vertex.ts b/packages/types/src/providers/vertex.ts
@@ -25,6 +25,16 @@ export const vertexModels = {
 		inputPrice: 0.15,
 		outputPrice: 0.6,
 	},
+	"gemini-2.5-flash": {
+		maxTokens: 64_000,
+		contextWindow: 1_048_576,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 0.15,
+		outputPrice: 0.6,
+		maxThinkingTokens: 24_576,
+		supportsReasoningBudget: true,
+	},
 	"gemini-2.5-flash-preview-04-17:thinking": {
 		maxTokens: 65_535,
 		contextWindow: 1_048_576,
@@ -70,6 +80,31 @@ export const vertexModels = {
 		maxThinkingTokens: 32_768,
 		supportsReasoningBudget: true,
 	},
+	"gemini-2.5-pro": {
+		maxTokens: 64_000,
+		contextWindow: 1_048_576,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 2.5,
+		outputPrice: 15,
+		maxThinkingTokens: 32_768,
+		supportsReasoningBudget: true,
+		requiredReasoningBudget: true,
+		tiers: [
+			{
+				contextWindow: 200_000,
+				inputPrice: 1.25,
+				outputPrice: 10,
+				cacheReadsPrice: 0.31,
+			},
+			{
+				contextWindow: Infinity,
+				inputPrice: 2.5,
+				outputPrice: 15,
+				cacheReadsPrice: 0.625,
+			},
+		],
+	},
 	"gemini-2.5-pro-exp-03-25": {
 		maxTokens: 65_535,
 		contextWindow: 1_048_576,
@@ -224,6 +259,18 @@ export const vertexModels = {
 		cacheWritesPrice: 0.3,
 		cacheReadsPrice: 0.03,
 	},
+	"gemini-2.5-flash-lite-preview-06-17": {
+		maxTokens: 64_000,
+		contextWindow: 1_048_576,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 0.1,
+		outputPrice: 0.4,
+		cacheReadsPrice: 0.025,
+		cacheWritesPrice: 1.0,
+		maxThinkingTokens: 24_576,
+		supportsReasoningBudget: true,
+	},
 } as const satisfies Record<string, ModelInfo>
 
 export const VERTEX_REGIONS = [
diff --git a/src/api/providers/fetchers/__tests__/openrouter.spec.ts b/src/api/providers/fetchers/__tests__/openrouter.spec.ts
@@ -27,15 +27,23 @@ describe("OpenRouter API", () => {
 				.filter(([_, model]) => model.supportsPromptCache)
 				.map(([id, _]) => id)
 
-			const ourCachingModels = Array.from(OPEN_ROUTER_PROMPT_CACHING_MODELS)
+			// Define models that are intentionally excluded
+			const excludedModels = new Set([
+				"google/gemini-2.5-pro-preview", // Excluded due to lag issue (#4487)
+				"google/gemini-2.5-flash", // OpenRouter doesn't report this as supporting prompt caching
+				"google/gemini-2.5-flash-lite-preview-06-17", // OpenRouter doesn't report this as supporting prompt caching
+			])
+
+			const ourCachingModels = Array.from(OPEN_ROUTER_PROMPT_CACHING_MODELS).filter(
+				(id) => !excludedModels.has(id),
+			)
 
 			// Verify all our caching models are actually supported by OpenRouter
 			for (const modelId of ourCachingModels) {
 				expect(openRouterSupportedCaching).toContain(modelId)
 			}
 
 			// Verify we have all supported models except intentionally excluded ones
-			const excludedModels = new Set(["google/gemini-2.5-pro-preview"]) // Excluded due to lag issue (#4487)
 			const expectedCachingModels = openRouterSupportedCaching.filter((id) => !excludedModels.has(id)).sort()
 
 			expect(ourCachingModels.sort()).toEqual(expectedCachingModels)
@@ -109,20 +117,36 @@ describe("OpenRouter API", () => {
 				"tngtech/deepseek-r1t-chimera:free",
 				"x-ai/grok-3-mini-beta",
 			])
+			// OpenRouter is taking a while to update their models, so we exclude some known models
+			const excludedReasoningBudgetModels = new Set([
+				"google/gemini-2.5-flash",
+				"google/gemini-2.5-flash-lite-preview-06-17",
+				"google/gemini-2.5-pro",
+			])
+
+			const expectedReasoningBudgetModels = Array.from(OPEN_ROUTER_REASONING_BUDGET_MODELS)
+				.filter((id) => !excludedReasoningBudgetModels.has(id))
+				.sort()
 
 			expect(
 				Object.entries(models)
 					.filter(([_, model]) => model.supportsReasoningBudget)
 					.map(([id, _]) => id)
 					.sort(),
-			).toEqual(Array.from(OPEN_ROUTER_REASONING_BUDGET_MODELS).sort())
+			).toEqual(expectedReasoningBudgetModels)
+
+			const excludedRequiredReasoningBudgetModels = new Set(["google/gemini-2.5-pro"])
+
+			const expectedRequiredReasoningBudgetModels = Array.from(OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS)
+				.filter((id) => !excludedRequiredReasoningBudgetModels.has(id))
+				.sort()
 
 			expect(
 				Object.entries(models)
 					.filter(([_, model]) => model.requiredReasoningBudget)
 					.map(([id, _]) => id)
 					.sort(),
-			).toEqual(Array.from(OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS).sort())
+			).toEqual(expectedRequiredReasoningBudgetModels)
 
 			expect(models["anthropic/claude-3.7-sonnet"]).toEqual({
 				maxTokens: 8192,
diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts
@@ -84,7 +84,10 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 		// other providers (including Gemini), so we need to explicitly disable
 		// i We should generalize this using the logic in `getModelParams`, but
 		// this is easier for now.
-		if (modelId === "google/gemini-2.5-pro-preview" && typeof reasoning === "undefined") {
+		if (
+			(modelId === "google/gemini-2.5-pro-preview" || modelId === "google/gemini-2.5-pro") &&
+			typeof reasoning === "undefined"
+		) {
 			reasoning = { exclude: true }
 		}