feat: reduce Gemini 2.5 Pro minimum thinking budget to 128 (#6588)

roomote[bot] · roomote · web-flow · commit 263e317ebdac · 2025-08-05T15:43:20.000-07:00
Co-authored-by: Roo Code &lt;roomote@roocode.com&gt;
diff --git a/src/api/transform/__tests__/model-params.spec.ts b/src/api/transform/__tests__/model-params.spec.ts
@@ -331,6 +331,57 @@ describe("getModelParams", () => {
 			})
 		})
 
+		it("should clamp Gemini 2.5 Pro thinking budget to at least 128 tokens", () => {
+			const model: ModelInfo = {
+				...baseModel,
+				requiredReasoningBudget: true,
+			}
+
+			expect(
+				getModelParams({
+					modelId: "gemini-2.5-pro",
+					format: "gemini" as const,
+					settings: { modelMaxTokens: 2000, modelMaxThinkingTokens: 50 },
+					model,
+				}),
+			).toEqual({
+				format: "gemini",
+				maxTokens: 2000,
+				temperature: 1.0,
+				reasoningEffort: undefined,
+				reasoningBudget: 128, // Minimum is 128 for Gemini 2.5 Pro
+				reasoning: {
+					thinkingBudget: 128,
+					includeThoughts: true,
+				},
+			})
+		})
+
+		it("should use 128 as default thinking budget for Gemini 2.5 Pro", () => {
+			const model: ModelInfo = {
+				...baseModel,
+				requiredReasoningBudget: true,
+			}
+
+			expect(
+				getModelParams({
+					modelId: "google/gemini-2.5-pro",
+					format: "openrouter" as const,
+					settings: { modelMaxTokens: 4000 },
+					model,
+				}),
+			).toEqual({
+				format: "openrouter",
+				maxTokens: 4000,
+				temperature: 1.0,
+				reasoningEffort: undefined,
+				reasoningBudget: 128, // Default is 128 for Gemini 2.5 Pro
+				reasoning: {
+					max_tokens: 128,
+				},
+			})
+		})
+
 		it("should clamp thinking budget to at most 80% of max tokens", () => {
 			const model: ModelInfo = {
 				...baseModel,
diff --git a/src/api/transform/model-params.ts b/src/api/transform/model-params.ts
@@ -3,6 +3,7 @@ import { type ModelInfo, type ProviderSettings, ANTHROPIC_DEFAULT_MAX_TOKENS } f
 import {
 	DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS,
 	DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS,
+	GEMINI_25_PRO_MIN_THINKING_TOKENS,
 	shouldUseReasoningBudget,
 	shouldUseReasoningEffort,
 	getModelMaxOutputTokens,
@@ -90,18 +91,28 @@ export function getModelParams({
 	let reasoningEffort: ModelParams["reasoningEffort"] = undefined
 
 	if (shouldUseReasoningBudget({ model, settings })) {
+		// Check if this is a Gemini 2.5 Pro model
+		const isGemini25Pro = modelId.includes("gemini-2.5-pro")
+
 		// If `customMaxThinkingTokens` is not specified use the default.
-		reasoningBudget = customMaxThinkingTokens ?? DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS
+		// For Gemini 2.5 Pro, default to 128 instead of 8192
+		const defaultThinkingTokens = isGemini25Pro
+			? GEMINI_25_PRO_MIN_THINKING_TOKENS
+			: DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS
+		reasoningBudget = customMaxThinkingTokens ?? defaultThinkingTokens
 
 		// Reasoning cannot exceed 80% of the `maxTokens` value.
 		// maxTokens should always be defined for reasoning budget models, but add a guard just in case
 		if (maxTokens && reasoningBudget > Math.floor(maxTokens * 0.8)) {
 			reasoningBudget = Math.floor(maxTokens * 0.8)
 		}
 
-		// Reasoning cannot be less than 1024 tokens.
-		if (reasoningBudget < 1024) {
-			reasoningBudget = 1024
+		// Reasoning cannot be less than minimum tokens.
+		// For Gemini 2.5 Pro models, the minimum is 128 tokens
+		// For other models, the minimum is 1024 tokens
+		const minThinkingTokens = isGemini25Pro ? GEMINI_25_PRO_MIN_THINKING_TOKENS : 1024
+		if (reasoningBudget < minThinkingTokens) {
+			reasoningBudget = minThinkingTokens
 		}
 
 		// Let's assume that "Hybrid" reasoning models require a temperature of
diff --git a/src/shared/api.ts b/src/shared/api.ts
@@ -51,6 +51,7 @@ export const shouldUseReasoningEffort = ({
 
 export const DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS = 16_384
 export const DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS = 8_192
+export const GEMINI_25_PRO_MIN_THINKING_TOKENS = 128
 
 // Max Tokens
 
diff --git a/webview-ui/src/components/settings/ThinkingBudget.tsx b/webview-ui/src/components/settings/ThinkingBudget.tsx
@@ -3,10 +3,15 @@ import { Checkbox } from "vscrui"
 
 import { type ProviderSettings, type ModelInfo, type ReasoningEffort, reasoningEfforts } from "@roo-code/types"
 
-import { DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS, DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS } from "@roo/api"
+import {
+	DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS,
+	DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS,
+	GEMINI_25_PRO_MIN_THINKING_TOKENS,
+} from "@roo/api"
 
 import { useAppTranslation } from "@src/i18n/TranslationContext"
 import { Slider, Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@src/components/ui"
+import { useSelectedModel } from "@src/components/ui/hooks/useSelectedModel"
 
 interface ThinkingBudgetProps {
 	apiConfiguration: ProviderSettings
@@ -16,6 +21,11 @@ interface ThinkingBudgetProps {
 
 export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, modelInfo }: ThinkingBudgetProps) => {
 	const { t } = useAppTranslation()
+	const { id: selectedModelId } = useSelectedModel(apiConfiguration)
+
+	// Check if this is a Gemini 2.5 Pro model
+	const isGemini25Pro = selectedModelId && selectedModelId.includes("gemini-2.5-pro")
+	const minThinkingTokens = isGemini25Pro ? GEMINI_25_PRO_MIN_THINKING_TOKENS : 1024
 
 	const isReasoningBudgetSupported = !!modelInfo && modelInfo.supportsReasoningBudget
 	const isReasoningBudgetRequired = !!modelInfo && modelInfo.requiredReasoningBudget
@@ -81,9 +91,9 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
 						<div className="font-medium">{t("settings:thinkingBudget.maxThinkingTokens")}</div>
 						<div className="flex items-center gap-1" data-testid="reasoning-budget">
 							<Slider
-								min={1024}
+								min={minThinkingTokens}
 								max={modelMaxThinkingTokens}
-								step={1024}
+								step={minThinkingTokens === 128 ? 128 : 1024}
 								value={[customMaxThinkingTokens]}
 								onValueChange={([value]) => setApiConfigurationField("modelMaxThinkingTokens", value)}
 							/>
diff --git a/webview-ui/src/components/settings/__tests__/ThinkingBudget.spec.tsx b/webview-ui/src/components/settings/__tests__/ThinkingBudget.spec.tsx
@@ -7,18 +7,38 @@ import type { ModelInfo } from "@roo-code/types"
 import { ThinkingBudget } from "../ThinkingBudget"
 
 vi.mock("@/components/ui", () => ({
-	Slider: ({ value, onValueChange, min, max }: any) => (
+	Slider: ({ value, onValueChange, min, max, step }: any) => (
 		<input
 			type="range"
 			data-testid="slider"
 			min={min}
 			max={max}
+			step={step}
 			value={value[0]}
 			onChange={(e) => onValueChange([parseInt(e.target.value)])}
 		/>
 	),
 }))
 
+vi.mock("@/components/ui/hooks/useSelectedModel", () => ({
+	useSelectedModel: (apiConfiguration: any) => {
+		// Return the model ID based on apiConfiguration for testing
+		// For Gemini tests, check if apiProvider is gemini and use apiModelId
+		if (apiConfiguration?.apiProvider === "gemini") {
+			return {
+				id: apiConfiguration?.apiModelId || "gemini-2.0-flash-exp",
+				provider: "gemini",
+				info: undefined,
+			}
+		}
+		return {
+			id: apiConfiguration?.apiModelId || "claude-3-5-sonnet-20241022",
+			provider: apiConfiguration?.apiProvider || "anthropic",
+			info: undefined,
+		}
+	},
+}))
+
 describe("ThinkingBudget", () => {
 	const mockModelInfo: ModelInfo = {
 		supportsReasoningBudget: true,
@@ -103,13 +123,61 @@ describe("ThinkingBudget", () => {
 		expect(sliders[1]).toHaveValue("8000") // 80% of 10000
 	})
 
-	it("should use min thinking tokens of 1024", () => {
+	it("should use min thinking tokens of 1024 for non-Gemini models", () => {
 		render(<ThinkingBudget {...defaultProps} apiConfiguration={{ modelMaxTokens: 1000 }} />)
 
 		const sliders = screen.getAllByTestId("slider")
 		expect(sliders[1].getAttribute("min")).toBe("1024")
 	})
 
+	it("should use min thinking tokens of 128 for Gemini 2.5 Pro models", () => {
+		render(
+			<ThinkingBudget
+				{...defaultProps}
+				apiConfiguration={{
+					modelMaxTokens: 10000,
+					apiProvider: "gemini",
+					apiModelId: "gemini-2.5-pro-002",
+				}}
+			/>,
+		)
+
+		const sliders = screen.getAllByTestId("slider")
+		expect(sliders[1].getAttribute("min")).toBe("128")
+	})
+
+	it("should use step of 128 for Gemini 2.5 Pro models", () => {
+		render(
+			<ThinkingBudget
+				{...defaultProps}
+				apiConfiguration={{
+					modelMaxTokens: 10000,
+					apiProvider: "gemini",
+					apiModelId: "gemini-2.5-pro-002",
+				}}
+			/>,
+		)
+
+		const sliders = screen.getAllByTestId("slider")
+		expect(sliders[1].getAttribute("step")).toBe("128")
+	})
+
+	it("should use step of 1024 for non-Gemini models", () => {
+		render(
+			<ThinkingBudget
+				{...defaultProps}
+				apiConfiguration={{
+					modelMaxTokens: 10000,
+					apiProvider: "anthropic",
+					apiModelId: "claude-3-5-sonnet-20241022",
+				}}
+			/>,
+		)
+
+		const sliders = screen.getAllByTestId("slider")
+		expect(sliders[1].getAttribute("step")).toBe("1024")
+	})
+
 	it("should update max tokens when slider changes", () => {
 		const setApiConfigurationField = vi.fn()