-
Notifications
You must be signed in to change notification settings - Fork 2.6k
feat: reduce Gemini 2.5 Pro minimum thinking budget to 128 #6588
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
27c9c2d
6dd2d52
2c6ccd0
1fa71fb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ import { type ModelInfo, type ProviderSettings, ANTHROPIC_DEFAULT_MAX_TOKENS } f | |
| import { | ||
| DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS, | ||
| DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS, | ||
| GEMINI_25_PRO_MIN_THINKING_TOKENS, | ||
| shouldUseReasoningBudget, | ||
| shouldUseReasoningEffort, | ||
| getModelMaxOutputTokens, | ||
|
|
@@ -90,18 +91,28 @@ export function getModelParams({ | |
| let reasoningEffort: ModelParams["reasoningEffort"] = undefined | ||
|
|
||
| if (shouldUseReasoningBudget({ model, settings })) { | ||
| // Check if this is a Gemini 2.5 Pro model | ||
| const isGemini25Pro = modelId.includes("gemini-2.5-pro") || modelId.includes("gemini-25-pro") | ||
|
||
|
|
||
| // If `customMaxThinkingTokens` is not specified use the default. | ||
| reasoningBudget = customMaxThinkingTokens ?? DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS | ||
| // For Gemini 2.5 Pro, default to 128 instead of 8192 | ||
| const defaultThinkingTokens = isGemini25Pro | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we make this comment more explicit about why Gemini 2.5 Pro has a different default? Perhaps mention performance or cost considerations that led to this decision? |
||
| ? GEMINI_25_PRO_MIN_THINKING_TOKENS | ||
| : DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS | ||
| reasoningBudget = customMaxThinkingTokens ?? defaultThinkingTokens | ||
|
|
||
| // Reasoning cannot exceed 80% of the `maxTokens` value. | ||
| // maxTokens should always be defined for reasoning budget models, but add a guard just in case | ||
| if (maxTokens && reasoningBudget > Math.floor(maxTokens * 0.8)) { | ||
| reasoningBudget = Math.floor(maxTokens * 0.8) | ||
| } | ||
|
|
||
| // Reasoning cannot be less than 1024 tokens. | ||
| if (reasoningBudget < 1024) { | ||
| reasoningBudget = 1024 | ||
| // Reasoning cannot be less than minimum tokens. | ||
| // For Gemini 2.5 Pro models, the minimum is 128 tokens | ||
| // For other models, the minimum is 1024 tokens | ||
| const minThinkingTokens = isGemini25Pro ? GEMINI_25_PRO_MIN_THINKING_TOKENS : 1024 | ||
| if (reasoningBudget < minThinkingTokens) { | ||
| reasoningBudget = minThinkingTokens | ||
| } | ||
|
|
||
| // Let's assume that "Hybrid" reasoning models require a temperature of | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider adding test cases for model ID variations and edge cases: