Skip to content

Commit 27c9c2d

Browse files
committed
feat: reduce Gemini 2.5 Pro minimum thinking budget to 128
- Add GEMINI_25_PRO_MIN_THINKING_TOKENS constant set to 128 - Update model-params.ts to detect Gemini 2.5 Pro models and use 128 as minimum - Change default thinking tokens from 8192 to 128 for Gemini 2.5 Pro models - Add tests to verify the new 128 minimum for Gemini 2.5 Pro - Other models remain unaffected with 1024 minimum thinking budget
1 parent 8513263 commit 27c9c2d

File tree

3 files changed

+67
-4
lines changed

3 files changed

+67
-4
lines changed

src/api/transform/__tests__/model-params.spec.ts

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,57 @@ describe("getModelParams", () => {
331331
})
332332
})
333333

334+
it("should clamp Gemini 2.5 Pro thinking budget to at least 128 tokens", () => {
335+
const model: ModelInfo = {
336+
...baseModel,
337+
requiredReasoningBudget: true,
338+
}
339+
340+
expect(
341+
getModelParams({
342+
modelId: "gemini-2.5-pro",
343+
format: "gemini" as const,
344+
settings: { modelMaxTokens: 2000, modelMaxThinkingTokens: 50 },
345+
model,
346+
}),
347+
).toEqual({
348+
format: "gemini",
349+
maxTokens: 2000,
350+
temperature: 1.0,
351+
reasoningEffort: undefined,
352+
reasoningBudget: 128, // Minimum is 128 for Gemini 2.5 Pro
353+
reasoning: {
354+
thinkingBudget: 128,
355+
includeThoughts: true,
356+
},
357+
})
358+
})
359+
360+
it("should use 128 as default thinking budget for Gemini 2.5 Pro", () => {
361+
const model: ModelInfo = {
362+
...baseModel,
363+
requiredReasoningBudget: true,
364+
}
365+
366+
expect(
367+
getModelParams({
368+
modelId: "google/gemini-2.5-pro",
369+
format: "openrouter" as const,
370+
settings: { modelMaxTokens: 4000 },
371+
model,
372+
}),
373+
).toEqual({
374+
format: "openrouter",
375+
maxTokens: 4000,
376+
temperature: 1.0,
377+
reasoningEffort: undefined,
378+
reasoningBudget: 128, // Default is 128 for Gemini 2.5 Pro
379+
reasoning: {
380+
max_tokens: 128,
381+
},
382+
})
383+
})
384+
334385
it("should clamp thinking budget to at most 80% of max tokens", () => {
335386
const model: ModelInfo = {
336387
...baseModel,

src/api/transform/model-params.ts

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { type ModelInfo, type ProviderSettings, ANTHROPIC_DEFAULT_MAX_TOKENS } f
33
import {
44
DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS,
55
DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS,
6+
GEMINI_25_PRO_MIN_THINKING_TOKENS,
67
shouldUseReasoningBudget,
78
shouldUseReasoningEffort,
89
getModelMaxOutputTokens,
@@ -90,18 +91,28 @@ export function getModelParams({
9091
let reasoningEffort: ModelParams["reasoningEffort"] = undefined
9192

9293
if (shouldUseReasoningBudget({ model, settings })) {
94+
// Check if this is a Gemini 2.5 Pro model
95+
const isGemini25Pro = modelId.includes("gemini-2.5-pro") || modelId.includes("gemini-2.5-pro")
96+
9397
// If `customMaxThinkingTokens` is not specified use the default.
94-
reasoningBudget = customMaxThinkingTokens ?? DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS
98+
// For Gemini 2.5 Pro, default to 128 instead of 8192
99+
const defaultThinkingTokens = isGemini25Pro
100+
? GEMINI_25_PRO_MIN_THINKING_TOKENS
101+
: DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS
102+
reasoningBudget = customMaxThinkingTokens ?? defaultThinkingTokens
95103

96104
// Reasoning cannot exceed 80% of the `maxTokens` value.
97105
// maxTokens should always be defined for reasoning budget models, but add a guard just in case
98106
if (maxTokens && reasoningBudget > Math.floor(maxTokens * 0.8)) {
99107
reasoningBudget = Math.floor(maxTokens * 0.8)
100108
}
101109

102-
// Reasoning cannot be less than 1024 tokens.
103-
if (reasoningBudget < 1024) {
104-
reasoningBudget = 1024
110+
// Reasoning cannot be less than minimum tokens.
111+
// For Gemini 2.5 Pro models, the minimum is 128 tokens
112+
// For other models, the minimum is 1024 tokens
113+
const minThinkingTokens = isGemini25Pro ? GEMINI_25_PRO_MIN_THINKING_TOKENS : 1024
114+
if (reasoningBudget < minThinkingTokens) {
115+
reasoningBudget = minThinkingTokens
105116
}
106117

107118
// Let's assume that "Hybrid" reasoning models require a temperature of

src/shared/api.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ export const shouldUseReasoningEffort = ({
5151

5252
export const DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS = 16_384
5353
export const DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS = 8_192
54+
export const GEMINI_25_PRO_MIN_THINKING_TOKENS = 128
5455

5556
// Max Tokens
5657

0 commit comments

Comments
 (0)