Skip to content

Commit f18cf3d

Browse files
authored
feat: add Gemini 2.5 models (Pro, Flash and Flash Lite) (#4798)
1 parent 5c5ee8c commit f18cf3d

File tree

5 files changed

+136
-5
lines changed

5 files changed

+136
-5
lines changed

packages/types/src/providers/gemini.ts

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,18 @@ export const geminiModels = {
4848
cacheReadsPrice: 0.0375,
4949
cacheWritesPrice: 1.0,
5050
},
51+
"gemini-2.5-flash": {
52+
maxTokens: 64_000,
53+
contextWindow: 1_048_576,
54+
supportsImages: true,
55+
supportsPromptCache: true,
56+
inputPrice: 0.15,
57+
outputPrice: 0.6,
58+
cacheReadsPrice: 0.0375,
59+
cacheWritesPrice: 1.0,
60+
maxThinkingTokens: 24_576,
61+
supportsReasoningBudget: true,
62+
},
5163
"gemini-2.5-pro-exp-03-25": {
5264
maxTokens: 65_535,
5365
contextWindow: 1_048_576,
@@ -130,6 +142,33 @@ export const geminiModels = {
130142
},
131143
],
132144
},
145+
"gemini-2.5-pro": {
146+
maxTokens: 64_000,
147+
contextWindow: 1_048_576,
148+
supportsImages: true,
149+
supportsPromptCache: true,
150+
inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
151+
outputPrice: 15,
152+
cacheReadsPrice: 0.625,
153+
cacheWritesPrice: 4.5,
154+
maxThinkingTokens: 32_768,
155+
supportsReasoningBudget: true,
156+
requiredReasoningBudget: true,
157+
tiers: [
158+
{
159+
contextWindow: 200_000,
160+
inputPrice: 1.25,
161+
outputPrice: 10,
162+
cacheReadsPrice: 0.31,
163+
},
164+
{
165+
contextWindow: Infinity,
166+
inputPrice: 2.5,
167+
outputPrice: 15,
168+
cacheReadsPrice: 0.625,
169+
},
170+
],
171+
},
133172
"gemini-2.0-flash-001": {
134173
maxTokens: 8192,
135174
contextWindow: 1_048_576,
@@ -244,4 +283,16 @@ export const geminiModels = {
244283
inputPrice: 0,
245284
outputPrice: 0,
246285
},
286+
"gemini-2.5-flash-lite-preview-06-17": {
287+
maxTokens: 64_000,
288+
contextWindow: 1_048_576,
289+
supportsImages: true,
290+
supportsPromptCache: true,
291+
inputPrice: 0.1,
292+
outputPrice: 0.4,
293+
cacheReadsPrice: 0.025,
294+
cacheWritesPrice: 1.0,
295+
supportsReasoningBudget: true,
296+
maxThinkingTokens: 24_576,
297+
},
247298
} as const satisfies Record<string, ModelInfo>

packages/types/src/providers/openrouter.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ export const OPEN_ROUTER_PROMPT_CACHING_MODELS = new Set([
4343
"google/gemini-2.5-flash-preview:thinking",
4444
"google/gemini-2.5-flash-preview-05-20",
4545
"google/gemini-2.5-flash-preview-05-20:thinking",
46+
"google/gemini-2.5-flash",
47+
"google/gemini-2.5-flash-lite-preview-06-17",
4648
"google/gemini-2.0-flash-001",
4749
"google/gemini-flash-1.5",
4850
"google/gemini-flash-1.5-8b",
@@ -68,6 +70,7 @@ export const OPEN_ROUTER_COMPUTER_USE_MODELS = new Set([
6870
// We should *not* be adding new models to this set.
6971
export const OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS = new Set([
7072
"anthropic/claude-3.7-sonnet:thinking",
73+
"google/gemini-2.5-pro",
7174
"google/gemini-2.5-flash-preview-05-20:thinking",
7275
])
7376

@@ -76,7 +79,10 @@ export const OPEN_ROUTER_REASONING_BUDGET_MODELS = new Set([
7679
"anthropic/claude-opus-4",
7780
"anthropic/claude-sonnet-4",
7881
"google/gemini-2.5-pro-preview",
82+
"google/gemini-2.5-pro",
7983
"google/gemini-2.5-flash-preview-05-20",
84+
"google/gemini-2.5-flash",
85+
"google/gemini-2.5-flash-lite-preview-06-17",
8086
// Also include the models that require the reasoning budget to be enabled
8187
// even though `OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS` takes precedence.
8288
"anthropic/claude-3.7-sonnet:thinking",

packages/types/src/providers/vertex.ts

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,16 @@ export const vertexModels = {
2525
inputPrice: 0.15,
2626
outputPrice: 0.6,
2727
},
28+
"gemini-2.5-flash": {
29+
maxTokens: 64_000,
30+
contextWindow: 1_048_576,
31+
supportsImages: true,
32+
supportsPromptCache: true,
33+
inputPrice: 0.15,
34+
outputPrice: 0.6,
35+
maxThinkingTokens: 24_576,
36+
supportsReasoningBudget: true,
37+
},
2838
"gemini-2.5-flash-preview-04-17:thinking": {
2939
maxTokens: 65_535,
3040
contextWindow: 1_048_576,
@@ -70,6 +80,31 @@ export const vertexModels = {
7080
maxThinkingTokens: 32_768,
7181
supportsReasoningBudget: true,
7282
},
83+
"gemini-2.5-pro": {
84+
maxTokens: 64_000,
85+
contextWindow: 1_048_576,
86+
supportsImages: true,
87+
supportsPromptCache: true,
88+
inputPrice: 2.5,
89+
outputPrice: 15,
90+
maxThinkingTokens: 32_768,
91+
supportsReasoningBudget: true,
92+
requiredReasoningBudget: true,
93+
tiers: [
94+
{
95+
contextWindow: 200_000,
96+
inputPrice: 1.25,
97+
outputPrice: 10,
98+
cacheReadsPrice: 0.31,
99+
},
100+
{
101+
contextWindow: Infinity,
102+
inputPrice: 2.5,
103+
outputPrice: 15,
104+
cacheReadsPrice: 0.625,
105+
},
106+
],
107+
},
73108
"gemini-2.5-pro-exp-03-25": {
74109
maxTokens: 65_535,
75110
contextWindow: 1_048_576,
@@ -224,6 +259,18 @@ export const vertexModels = {
224259
cacheWritesPrice: 0.3,
225260
cacheReadsPrice: 0.03,
226261
},
262+
"gemini-2.5-flash-lite-preview-06-17": {
263+
maxTokens: 64_000,
264+
contextWindow: 1_048_576,
265+
supportsImages: true,
266+
supportsPromptCache: true,
267+
inputPrice: 0.1,
268+
outputPrice: 0.4,
269+
cacheReadsPrice: 0.025,
270+
cacheWritesPrice: 1.0,
271+
maxThinkingTokens: 24_576,
272+
supportsReasoningBudget: true,
273+
},
227274
} as const satisfies Record<string, ModelInfo>
228275

229276
export const VERTEX_REGIONS = [

src/api/providers/fetchers/__tests__/openrouter.spec.ts

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,23 @@ describe("OpenRouter API", () => {
2727
.filter(([_, model]) => model.supportsPromptCache)
2828
.map(([id, _]) => id)
2929

30-
const ourCachingModels = Array.from(OPEN_ROUTER_PROMPT_CACHING_MODELS)
30+
// Define models that are intentionally excluded
31+
const excludedModels = new Set([
32+
"google/gemini-2.5-pro-preview", // Excluded due to lag issue (#4487)
33+
"google/gemini-2.5-flash", // OpenRouter doesn't report this as supporting prompt caching
34+
"google/gemini-2.5-flash-lite-preview-06-17", // OpenRouter doesn't report this as supporting prompt caching
35+
])
36+
37+
const ourCachingModels = Array.from(OPEN_ROUTER_PROMPT_CACHING_MODELS).filter(
38+
(id) => !excludedModels.has(id),
39+
)
3140

3241
// Verify all our caching models are actually supported by OpenRouter
3342
for (const modelId of ourCachingModels) {
3443
expect(openRouterSupportedCaching).toContain(modelId)
3544
}
3645

3746
// Verify we have all supported models except intentionally excluded ones
38-
const excludedModels = new Set(["google/gemini-2.5-pro-preview"]) // Excluded due to lag issue (#4487)
3947
const expectedCachingModels = openRouterSupportedCaching.filter((id) => !excludedModels.has(id)).sort()
4048

4149
expect(ourCachingModels.sort()).toEqual(expectedCachingModels)
@@ -109,20 +117,36 @@ describe("OpenRouter API", () => {
109117
"tngtech/deepseek-r1t-chimera:free",
110118
"x-ai/grok-3-mini-beta",
111119
])
120+
// OpenRouter is taking a while to update their models, so we exclude some known models
121+
const excludedReasoningBudgetModels = new Set([
122+
"google/gemini-2.5-flash",
123+
"google/gemini-2.5-flash-lite-preview-06-17",
124+
"google/gemini-2.5-pro",
125+
])
126+
127+
const expectedReasoningBudgetModels = Array.from(OPEN_ROUTER_REASONING_BUDGET_MODELS)
128+
.filter((id) => !excludedReasoningBudgetModels.has(id))
129+
.sort()
112130

113131
expect(
114132
Object.entries(models)
115133
.filter(([_, model]) => model.supportsReasoningBudget)
116134
.map(([id, _]) => id)
117135
.sort(),
118-
).toEqual(Array.from(OPEN_ROUTER_REASONING_BUDGET_MODELS).sort())
136+
).toEqual(expectedReasoningBudgetModels)
137+
138+
const excludedRequiredReasoningBudgetModels = new Set(["google/gemini-2.5-pro"])
139+
140+
const expectedRequiredReasoningBudgetModels = Array.from(OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS)
141+
.filter((id) => !excludedRequiredReasoningBudgetModels.has(id))
142+
.sort()
119143

120144
expect(
121145
Object.entries(models)
122146
.filter(([_, model]) => model.requiredReasoningBudget)
123147
.map(([id, _]) => id)
124148
.sort(),
125-
).toEqual(Array.from(OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS).sort())
149+
).toEqual(expectedRequiredReasoningBudgetModels)
126150

127151
expect(models["anthropic/claude-3.7-sonnet"]).toEqual({
128152
maxTokens: 8192,

src/api/providers/openrouter.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,10 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
8484
// other providers (including Gemini), so we need to explicitly disable
8585
// i We should generalize this using the logic in `getModelParams`, but
8686
// this is easier for now.
87-
if (modelId === "google/gemini-2.5-pro-preview" && typeof reasoning === "undefined") {
87+
if (
88+
(modelId === "google/gemini-2.5-pro-preview" || modelId === "google/gemini-2.5-pro") &&
89+
typeof reasoning === "undefined"
90+
) {
8891
reasoning = { exclude: true }
8992
}
9093

0 commit comments

Comments
 (0)