Skip to content

Commit 263e317

Browse files
feat: reduce Gemini 2.5 Pro minimum thinking budget to 128 (#6588)
Co-authored-by: Roo Code <[email protected]>
1 parent 1237eb8 commit 263e317

File tree

5 files changed

+150
-9
lines changed

5 files changed

+150
-9
lines changed

src/api/transform/__tests__/model-params.spec.ts

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,57 @@ describe("getModelParams", () => {
331331
})
332332
})
333333

334+
it("should clamp Gemini 2.5 Pro thinking budget to at least 128 tokens", () => {
335+
const model: ModelInfo = {
336+
...baseModel,
337+
requiredReasoningBudget: true,
338+
}
339+
340+
expect(
341+
getModelParams({
342+
modelId: "gemini-2.5-pro",
343+
format: "gemini" as const,
344+
settings: { modelMaxTokens: 2000, modelMaxThinkingTokens: 50 },
345+
model,
346+
}),
347+
).toEqual({
348+
format: "gemini",
349+
maxTokens: 2000,
350+
temperature: 1.0,
351+
reasoningEffort: undefined,
352+
reasoningBudget: 128, // Minimum is 128 for Gemini 2.5 Pro
353+
reasoning: {
354+
thinkingBudget: 128,
355+
includeThoughts: true,
356+
},
357+
})
358+
})
359+
360+
it("should use 128 as default thinking budget for Gemini 2.5 Pro", () => {
361+
const model: ModelInfo = {
362+
...baseModel,
363+
requiredReasoningBudget: true,
364+
}
365+
366+
expect(
367+
getModelParams({
368+
modelId: "google/gemini-2.5-pro",
369+
format: "openrouter" as const,
370+
settings: { modelMaxTokens: 4000 },
371+
model,
372+
}),
373+
).toEqual({
374+
format: "openrouter",
375+
maxTokens: 4000,
376+
temperature: 1.0,
377+
reasoningEffort: undefined,
378+
reasoningBudget: 128, // Default is 128 for Gemini 2.5 Pro
379+
reasoning: {
380+
max_tokens: 128,
381+
},
382+
})
383+
})
384+
334385
it("should clamp thinking budget to at most 80% of max tokens", () => {
335386
const model: ModelInfo = {
336387
...baseModel,

src/api/transform/model-params.ts

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { type ModelInfo, type ProviderSettings, ANTHROPIC_DEFAULT_MAX_TOKENS } f
33
import {
44
DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS,
55
DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS,
6+
GEMINI_25_PRO_MIN_THINKING_TOKENS,
67
shouldUseReasoningBudget,
78
shouldUseReasoningEffort,
89
getModelMaxOutputTokens,
@@ -90,18 +91,28 @@ export function getModelParams({
9091
let reasoningEffort: ModelParams["reasoningEffort"] = undefined
9192

9293
if (shouldUseReasoningBudget({ model, settings })) {
94+
// Check if this is a Gemini 2.5 Pro model
95+
const isGemini25Pro = modelId.includes("gemini-2.5-pro")
96+
9397
// If `customMaxThinkingTokens` is not specified use the default.
94-
reasoningBudget = customMaxThinkingTokens ?? DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS
98+
// For Gemini 2.5 Pro, default to 128 instead of 8192
99+
const defaultThinkingTokens = isGemini25Pro
100+
? GEMINI_25_PRO_MIN_THINKING_TOKENS
101+
: DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS
102+
reasoningBudget = customMaxThinkingTokens ?? defaultThinkingTokens
95103

96104
// Reasoning cannot exceed 80% of the `maxTokens` value.
97105
// maxTokens should always be defined for reasoning budget models, but add a guard just in case
98106
if (maxTokens && reasoningBudget > Math.floor(maxTokens * 0.8)) {
99107
reasoningBudget = Math.floor(maxTokens * 0.8)
100108
}
101109

102-
// Reasoning cannot be less than 1024 tokens.
103-
if (reasoningBudget < 1024) {
104-
reasoningBudget = 1024
110+
// Reasoning cannot be less than minimum tokens.
111+
// For Gemini 2.5 Pro models, the minimum is 128 tokens
112+
// For other models, the minimum is 1024 tokens
113+
const minThinkingTokens = isGemini25Pro ? GEMINI_25_PRO_MIN_THINKING_TOKENS : 1024
114+
if (reasoningBudget < minThinkingTokens) {
115+
reasoningBudget = minThinkingTokens
105116
}
106117

107118
// Let's assume that "Hybrid" reasoning models require a temperature of

src/shared/api.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ export const shouldUseReasoningEffort = ({
5151

5252
export const DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS = 16_384
5353
export const DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS = 8_192
54+
export const GEMINI_25_PRO_MIN_THINKING_TOKENS = 128
5455

5556
// Max Tokens
5657

webview-ui/src/components/settings/ThinkingBudget.tsx

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,15 @@ import { Checkbox } from "vscrui"
33

44
import { type ProviderSettings, type ModelInfo, type ReasoningEffort, reasoningEfforts } from "@roo-code/types"
55

6-
import { DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS, DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS } from "@roo/api"
6+
import {
7+
DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS,
8+
DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS,
9+
GEMINI_25_PRO_MIN_THINKING_TOKENS,
10+
} from "@roo/api"
711

812
import { useAppTranslation } from "@src/i18n/TranslationContext"
913
import { Slider, Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@src/components/ui"
14+
import { useSelectedModel } from "@src/components/ui/hooks/useSelectedModel"
1015

1116
interface ThinkingBudgetProps {
1217
apiConfiguration: ProviderSettings
@@ -16,6 +21,11 @@ interface ThinkingBudgetProps {
1621

1722
export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, modelInfo }: ThinkingBudgetProps) => {
1823
const { t } = useAppTranslation()
24+
const { id: selectedModelId } = useSelectedModel(apiConfiguration)
25+
26+
// Check if this is a Gemini 2.5 Pro model
27+
const isGemini25Pro = selectedModelId && selectedModelId.includes("gemini-2.5-pro")
28+
const minThinkingTokens = isGemini25Pro ? GEMINI_25_PRO_MIN_THINKING_TOKENS : 1024
1929

2030
const isReasoningBudgetSupported = !!modelInfo && modelInfo.supportsReasoningBudget
2131
const isReasoningBudgetRequired = !!modelInfo && modelInfo.requiredReasoningBudget
@@ -81,9 +91,9 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
8191
<div className="font-medium">{t("settings:thinkingBudget.maxThinkingTokens")}</div>
8292
<div className="flex items-center gap-1" data-testid="reasoning-budget">
8393
<Slider
84-
min={1024}
94+
min={minThinkingTokens}
8595
max={modelMaxThinkingTokens}
86-
step={1024}
96+
step={minThinkingTokens === 128 ? 128 : 1024}
8797
value={[customMaxThinkingTokens]}
8898
onValueChange={([value]) => setApiConfigurationField("modelMaxThinkingTokens", value)}
8999
/>

webview-ui/src/components/settings/__tests__/ThinkingBudget.spec.tsx

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,38 @@ import type { ModelInfo } from "@roo-code/types"
77
import { ThinkingBudget } from "../ThinkingBudget"
88

99
vi.mock("@/components/ui", () => ({
10-
Slider: ({ value, onValueChange, min, max }: any) => (
10+
Slider: ({ value, onValueChange, min, max, step }: any) => (
1111
<input
1212
type="range"
1313
data-testid="slider"
1414
min={min}
1515
max={max}
16+
step={step}
1617
value={value[0]}
1718
onChange={(e) => onValueChange([parseInt(e.target.value)])}
1819
/>
1920
),
2021
}))
2122

23+
vi.mock("@/components/ui/hooks/useSelectedModel", () => ({
24+
useSelectedModel: (apiConfiguration: any) => {
25+
// Return the model ID based on apiConfiguration for testing
26+
// For Gemini tests, check if apiProvider is gemini and use apiModelId
27+
if (apiConfiguration?.apiProvider === "gemini") {
28+
return {
29+
id: apiConfiguration?.apiModelId || "gemini-2.0-flash-exp",
30+
provider: "gemini",
31+
info: undefined,
32+
}
33+
}
34+
return {
35+
id: apiConfiguration?.apiModelId || "claude-3-5-sonnet-20241022",
36+
provider: apiConfiguration?.apiProvider || "anthropic",
37+
info: undefined,
38+
}
39+
},
40+
}))
41+
2242
describe("ThinkingBudget", () => {
2343
const mockModelInfo: ModelInfo = {
2444
supportsReasoningBudget: true,
@@ -103,13 +123,61 @@ describe("ThinkingBudget", () => {
103123
expect(sliders[1]).toHaveValue("8000") // 80% of 10000
104124
})
105125

106-
it("should use min thinking tokens of 1024", () => {
126+
it("should use min thinking tokens of 1024 for non-Gemini models", () => {
107127
render(<ThinkingBudget {...defaultProps} apiConfiguration={{ modelMaxTokens: 1000 }} />)
108128

109129
const sliders = screen.getAllByTestId("slider")
110130
expect(sliders[1].getAttribute("min")).toBe("1024")
111131
})
112132

133+
it("should use min thinking tokens of 128 for Gemini 2.5 Pro models", () => {
134+
render(
135+
<ThinkingBudget
136+
{...defaultProps}
137+
apiConfiguration={{
138+
modelMaxTokens: 10000,
139+
apiProvider: "gemini",
140+
apiModelId: "gemini-2.5-pro-002",
141+
}}
142+
/>,
143+
)
144+
145+
const sliders = screen.getAllByTestId("slider")
146+
expect(sliders[1].getAttribute("min")).toBe("128")
147+
})
148+
149+
it("should use step of 128 for Gemini 2.5 Pro models", () => {
150+
render(
151+
<ThinkingBudget
152+
{...defaultProps}
153+
apiConfiguration={{
154+
modelMaxTokens: 10000,
155+
apiProvider: "gemini",
156+
apiModelId: "gemini-2.5-pro-002",
157+
}}
158+
/>,
159+
)
160+
161+
const sliders = screen.getAllByTestId("slider")
162+
expect(sliders[1].getAttribute("step")).toBe("128")
163+
})
164+
165+
it("should use step of 1024 for non-Gemini models", () => {
166+
render(
167+
<ThinkingBudget
168+
{...defaultProps}
169+
apiConfiguration={{
170+
modelMaxTokens: 10000,
171+
apiProvider: "anthropic",
172+
apiModelId: "claude-3-5-sonnet-20241022",
173+
}}
174+
/>,
175+
)
176+
177+
const sliders = screen.getAllByTestId("slider")
178+
expect(sliders[1].getAttribute("step")).toBe("1024")
179+
})
180+
113181
it("should update max tokens when slider changes", () => {
114182
const setApiConfigurationField = vi.fn()
115183

0 commit comments

Comments
 (0)