Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions src/api/transform/__tests__/model-params.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,57 @@ describe("getModelParams", () => {
})
})

it("should clamp Gemini 2.5 Pro thinking budget to at least 128 tokens", () => {
const model: ModelInfo = {
...baseModel,
requiredReasoningBudget: true,
}

expect(
getModelParams({
modelId: "gemini-2.5-pro",
format: "gemini" as const,
settings: { modelMaxTokens: 2000, modelMaxThinkingTokens: 50 },
model,
}),
).toEqual({
format: "gemini",
maxTokens: 2000,
temperature: 1.0,
reasoningEffort: undefined,
reasoningBudget: 128, // Minimum is 128 for Gemini 2.5 Pro
reasoning: {
thinkingBudget: 128,
includeThoughts: true,
},
})
})

it("should use 128 as default thinking budget for Gemini 2.5 Pro", () => {
const model: ModelInfo = {
...baseModel,
requiredReasoningBudget: true,
}

expect(
getModelParams({
modelId: "google/gemini-2.5-pro",
format: "openrouter" as const,
settings: { modelMaxTokens: 4000 },
model,
}),
).toEqual({
format: "openrouter",
maxTokens: 4000,
temperature: 1.0,
reasoningEffort: undefined,
reasoningBudget: 128, // Default is 128 for Gemini 2.5 Pro
reasoning: {
max_tokens: 128,
},
})
})
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding test cases for model ID variations and edge cases:

  • Model IDs like "gemini-2.5-pro-latest" or "vertex-ai/gemini-2.5-pro"
  • The interaction between the 128 minimum and 80% rule when maxTokens is very small (e.g., 150)


it("should clamp thinking budget to at most 80% of max tokens", () => {
const model: ModelInfo = {
...baseModel,
Expand Down
19 changes: 15 additions & 4 deletions src/api/transform/model-params.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { type ModelInfo, type ProviderSettings, ANTHROPIC_DEFAULT_MAX_TOKENS } f
import {
DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS,
DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS,
GEMINI_25_PRO_MIN_THINKING_TOKENS,
shouldUseReasoningBudget,
shouldUseReasoningEffort,
getModelMaxOutputTokens,
Expand Down Expand Up @@ -90,18 +91,28 @@ export function getModelParams({
let reasoningEffort: ModelParams["reasoningEffort"] = undefined

if (shouldUseReasoningBudget({ model, settings })) {
// Check if this is a Gemini 2.5 Pro model
const isGemini25Pro = modelId.includes("gemini-2.5-pro")

// If `customMaxThinkingTokens` is not specified use the default.
reasoningBudget = customMaxThinkingTokens ?? DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS
// For Gemini 2.5 Pro, default to 128 instead of 8192
const defaultThinkingTokens = isGemini25Pro
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we make this comment more explicit about why Gemini 2.5 Pro has a different default? Perhaps mention performance or cost considerations that led to this decision?

? GEMINI_25_PRO_MIN_THINKING_TOKENS
: DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS
reasoningBudget = customMaxThinkingTokens ?? defaultThinkingTokens

// Reasoning cannot exceed 80% of the `maxTokens` value.
// maxTokens should always be defined for reasoning budget models, but add a guard just in case
if (maxTokens && reasoningBudget > Math.floor(maxTokens * 0.8)) {
reasoningBudget = Math.floor(maxTokens * 0.8)
}

// Reasoning cannot be less than 1024 tokens.
if (reasoningBudget < 1024) {
reasoningBudget = 1024
// Reasoning cannot be less than minimum tokens.
// For Gemini 2.5 Pro models, the minimum is 128 tokens
// For other models, the minimum is 1024 tokens
const minThinkingTokens = isGemini25Pro ? GEMINI_25_PRO_MIN_THINKING_TOKENS : 1024
if (reasoningBudget < minThinkingTokens) {
reasoningBudget = minThinkingTokens
}

// Let's assume that "Hybrid" reasoning models require a temperature of
Expand Down
1 change: 1 addition & 0 deletions src/shared/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ export const shouldUseReasoningEffort = ({

export const DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS = 16_384
export const DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS = 8_192
export const GEMINI_25_PRO_MIN_THINKING_TOKENS = 128

// Max Tokens

Expand Down
16 changes: 13 additions & 3 deletions webview-ui/src/components/settings/ThinkingBudget.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,15 @@ import { Checkbox } from "vscrui"

import { type ProviderSettings, type ModelInfo, type ReasoningEffort, reasoningEfforts } from "@roo-code/types"

import { DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS, DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS } from "@roo/api"
import {
DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS,
DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS,
GEMINI_25_PRO_MIN_THINKING_TOKENS,
} from "@roo/api"

import { useAppTranslation } from "@src/i18n/TranslationContext"
import { Slider, Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@src/components/ui"
import { useSelectedModel } from "@src/components/ui/hooks/useSelectedModel"

interface ThinkingBudgetProps {
apiConfiguration: ProviderSettings
Expand All @@ -16,6 +21,11 @@ interface ThinkingBudgetProps {

export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, modelInfo }: ThinkingBudgetProps) => {
const { t } = useAppTranslation()
const { id: selectedModelId } = useSelectedModel(apiConfiguration)

// Check if this is a Gemini 2.5 Pro model
const isGemini25Pro = selectedModelId && selectedModelId.includes("gemini-2.5-pro")
const minThinkingTokens = isGemini25Pro ? GEMINI_25_PRO_MIN_THINKING_TOKENS : 1024

const isReasoningBudgetSupported = !!modelInfo && modelInfo.supportsReasoningBudget
const isReasoningBudgetRequired = !!modelInfo && modelInfo.requiredReasoningBudget
Expand Down Expand Up @@ -81,9 +91,9 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
<div className="font-medium">{t("settings:thinkingBudget.maxThinkingTokens")}</div>
<div className="flex items-center gap-1" data-testid="reasoning-budget">
<Slider
min={1024}
min={minThinkingTokens}
max={modelMaxThinkingTokens}
step={1024}
step={minThinkingTokens === 128 ? 128 : 1024}
value={[customMaxThinkingTokens]}
onValueChange={([value]) => setApiConfigurationField("modelMaxThinkingTokens", value)}
/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,38 @@ import type { ModelInfo } from "@roo-code/types"
import { ThinkingBudget } from "../ThinkingBudget"

vi.mock("@/components/ui", () => ({
Slider: ({ value, onValueChange, min, max }: any) => (
Slider: ({ value, onValueChange, min, max, step }: any) => (
<input
type="range"
data-testid="slider"
min={min}
max={max}
step={step}
value={value[0]}
onChange={(e) => onValueChange([parseInt(e.target.value)])}
/>
),
}))

vi.mock("@/components/ui/hooks/useSelectedModel", () => ({
useSelectedModel: (apiConfiguration: any) => {
// Return the model ID based on apiConfiguration for testing
// For Gemini tests, check if apiProvider is gemini and use apiModelId
if (apiConfiguration?.apiProvider === "gemini") {
return {
id: apiConfiguration?.apiModelId || "gemini-2.0-flash-exp",
provider: "gemini",
info: undefined,
}
}
return {
id: apiConfiguration?.apiModelId || "claude-3-5-sonnet-20241022",
provider: apiConfiguration?.apiProvider || "anthropic",
info: undefined,
}
},
}))

describe("ThinkingBudget", () => {
const mockModelInfo: ModelInfo = {
supportsReasoningBudget: true,
Expand Down Expand Up @@ -103,13 +123,61 @@ describe("ThinkingBudget", () => {
expect(sliders[1]).toHaveValue("8000") // 80% of 10000
})

it("should use min thinking tokens of 1024", () => {
it("should use min thinking tokens of 1024 for non-Gemini models", () => {
render(<ThinkingBudget {...defaultProps} apiConfiguration={{ modelMaxTokens: 1000 }} />)

const sliders = screen.getAllByTestId("slider")
expect(sliders[1].getAttribute("min")).toBe("1024")
})

it("should use min thinking tokens of 128 for Gemini 2.5 Pro models", () => {
render(
<ThinkingBudget
{...defaultProps}
apiConfiguration={{
modelMaxTokens: 10000,
apiProvider: "gemini",
apiModelId: "gemini-2.5-pro-002",
}}
/>,
)

const sliders = screen.getAllByTestId("slider")
expect(sliders[1].getAttribute("min")).toBe("128")
})

it("should use step of 128 for Gemini 2.5 Pro models", () => {
render(
<ThinkingBudget
{...defaultProps}
apiConfiguration={{
modelMaxTokens: 10000,
apiProvider: "gemini",
apiModelId: "gemini-2.5-pro-002",
}}
/>,
)

const sliders = screen.getAllByTestId("slider")
expect(sliders[1].getAttribute("step")).toBe("128")
})

it("should use step of 1024 for non-Gemini models", () => {
render(
<ThinkingBudget
{...defaultProps}
apiConfiguration={{
modelMaxTokens: 10000,
apiProvider: "anthropic",
apiModelId: "claude-3-5-sonnet-20241022",
}}
/>,
)

const sliders = screen.getAllByTestId("slider")
expect(sliders[1].getAttribute("step")).toBe("1024")
})

it("should update max tokens when slider changes", () => {
const setApiConfigurationField = vi.fn()

Expand Down
Loading