feat: reduce Gemini 2.5 Pro minimum thinking budget to 128

roomote · roomote · commit 27c9c2d4d3cc · 2025-08-02T06:11:43.000Z
- Add GEMINI_25_PRO_MIN_THINKING_TOKENS constant set to 128
- Update model-params.ts to detect Gemini 2.5 Pro models and use 128 as minimum
- Change default thinking tokens from 8192 to 128 for Gemini 2.5 Pro models
- Add tests to verify the new 128 minimum for Gemini 2.5 Pro
- Other models remain unaffected with 1024 minimum thinking budget
diff --git a/src/api/transform/__tests__/model-params.spec.ts b/src/api/transform/__tests__/model-params.spec.ts
@@ -331,6 +331,57 @@ describe("getModelParams", () => {
 			})
 		})
 
+		it("should clamp Gemini 2.5 Pro thinking budget to at least 128 tokens", () => {
+			const model: ModelInfo = {
+				...baseModel,
+				requiredReasoningBudget: true,
+			}
+
+			expect(
+				getModelParams({
+					modelId: "gemini-2.5-pro",
+					format: "gemini" as const,
+					settings: { modelMaxTokens: 2000, modelMaxThinkingTokens: 50 },
+					model,
+				}),
+			).toEqual({
+				format: "gemini",
+				maxTokens: 2000,
+				temperature: 1.0,
+				reasoningEffort: undefined,
+				reasoningBudget: 128, // Minimum is 128 for Gemini 2.5 Pro
+				reasoning: {
+					thinkingBudget: 128,
+					includeThoughts: true,
+				},
+			})
+		})
+
+		it("should use 128 as default thinking budget for Gemini 2.5 Pro", () => {
+			const model: ModelInfo = {
+				...baseModel,
+				requiredReasoningBudget: true,
+			}
+
+			expect(
+				getModelParams({
+					modelId: "google/gemini-2.5-pro",
+					format: "openrouter" as const,
+					settings: { modelMaxTokens: 4000 },
+					model,
+				}),
+			).toEqual({
+				format: "openrouter",
+				maxTokens: 4000,
+				temperature: 1.0,
+				reasoningEffort: undefined,
+				reasoningBudget: 128, // Default is 128 for Gemini 2.5 Pro
+				reasoning: {
+					max_tokens: 128,
+				},
+			})
+		})
+
 		it("should clamp thinking budget to at most 80% of max tokens", () => {
 			const model: ModelInfo = {
 				...baseModel,
diff --git a/src/api/transform/model-params.ts b/src/api/transform/model-params.ts
@@ -3,6 +3,7 @@ import { type ModelInfo, type ProviderSettings, ANTHROPIC_DEFAULT_MAX_TOKENS } f
 import {
 	DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS,
 	DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS,
+	GEMINI_25_PRO_MIN_THINKING_TOKENS,
 	shouldUseReasoningBudget,
 	shouldUseReasoningEffort,
 	getModelMaxOutputTokens,
@@ -90,18 +91,28 @@ export function getModelParams({
 	let reasoningEffort: ModelParams["reasoningEffort"] = undefined
 
 	if (shouldUseReasoningBudget({ model, settings })) {
+		// Check if this is a Gemini 2.5 Pro model
+		const isGemini25Pro = modelId.includes("gemini-2.5-pro") || modelId.includes("gemini-2.5-pro")
+
 		// If `customMaxThinkingTokens` is not specified use the default.
-		reasoningBudget = customMaxThinkingTokens ?? DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS
+		// For Gemini 2.5 Pro, default to 128 instead of 8192
+		const defaultThinkingTokens = isGemini25Pro
+			? GEMINI_25_PRO_MIN_THINKING_TOKENS
+			: DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS
+		reasoningBudget = customMaxThinkingTokens ?? defaultThinkingTokens
 
 		// Reasoning cannot exceed 80% of the `maxTokens` value.
 		// maxTokens should always be defined for reasoning budget models, but add a guard just in case
 		if (maxTokens && reasoningBudget > Math.floor(maxTokens * 0.8)) {
 			reasoningBudget = Math.floor(maxTokens * 0.8)
 		}
 
-		// Reasoning cannot be less than 1024 tokens.
-		if (reasoningBudget < 1024) {
-			reasoningBudget = 1024
+		// Reasoning cannot be less than minimum tokens.
+		// For Gemini 2.5 Pro models, the minimum is 128 tokens
+		// For other models, the minimum is 1024 tokens
+		const minThinkingTokens = isGemini25Pro ? GEMINI_25_PRO_MIN_THINKING_TOKENS : 1024
+		if (reasoningBudget < minThinkingTokens) {
+			reasoningBudget = minThinkingTokens
 		}
 
 		// Let's assume that "Hybrid" reasoning models require a temperature of
diff --git a/src/shared/api.ts b/src/shared/api.ts
@@ -51,6 +51,7 @@ export const shouldUseReasoningEffort = ({
 
 export const DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS = 16_384
 export const DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS = 8_192
+export const GEMINI_25_PRO_MIN_THINKING_TOKENS = 128
 
 // Max Tokens