Skip to content

Commit f3b449e

Browse files
committed
Enforce Gemini's max thinking tokens limit
1 parent 679e2c7 commit f3b449e

File tree

2 files changed

+28
-28
lines changed

2 files changed

+28
-28
lines changed

src/shared/api.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,8 @@ export const vertexModels = {
492492
supportsPromptCache: false,
493493
inputPrice: 0.15,
494494
outputPrice: 0.6,
495-
thinking: true, // TODO: Max thinking budget is 24_576, so we need a new `ModelInfo` property for this.
495+
thinking: true,
496+
maxThinkingTokens: 24_576,
496497
},
497498
"gemini-2.5-flash-preview-04-17": {
498499
maxTokens: 65_535,
@@ -657,7 +658,8 @@ export const geminiModels = {
657658
supportsPromptCache: false,
658659
inputPrice: 0.15,
659660
outputPrice: 0.6,
660-
thinking: true, // TODO: Max thinking budget is 24_576, so we need a new `ModelInfo` property for this.
661+
thinking: true,
662+
maxThinkingTokens: 24_576,
661663
},
662664
"gemini-2.5-flash-preview-04-17": {
663665
maxTokens: 65_535,

webview-ui/src/components/settings/ThinkingBudget.tsx

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1-
import { useEffect, useMemo } from "react"
21
import { useAppTranslation } from "@/i18n/TranslationContext"
32

43
import { Slider } from "@/components/ui"
54

65
import { ApiConfiguration, ModelInfo } from "../../../../src/shared/api"
76

7+
const DEFAULT_MAX_OUTPUT_TOKENS = 16_384
8+
const DEFAULT_MAX_THINKING_TOKENS = 8_192
9+
810
interface ThinkingBudgetProps {
911
apiConfiguration: ApiConfiguration
1012
setApiConfigurationField: <K extends keyof ApiConfiguration>(field: K, value: ApiConfiguration[K]) => void
@@ -13,27 +15,23 @@ interface ThinkingBudgetProps {
1315

1416
export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, modelInfo }: ThinkingBudgetProps) => {
1517
const { t } = useAppTranslation()
16-
const tokens = apiConfiguration?.modelMaxTokens || 16_384
17-
const tokensMin = 8192
18-
const tokensMax = modelInfo?.maxTokens || 64_000
1918

20-
// Get the appropriate thinking tokens based on provider
21-
const thinkingTokens = useMemo(() => {
22-
const value = apiConfiguration?.modelMaxThinkingTokens
23-
return value || Math.min(Math.floor(0.8 * tokens), 8192)
24-
}, [apiConfiguration, tokens])
19+
if (!modelInfo || !modelInfo.thinking || !modelInfo.maxTokens) {
20+
return null
21+
}
2522

26-
const thinkingTokensMin = 1024
27-
const thinkingTokensMax = Math.floor(0.8 * tokens)
23+
const customMaxOutputTokens = apiConfiguration.modelMaxTokens || DEFAULT_MAX_OUTPUT_TOKENS
2824

29-
useEffect(() => {
30-
if (thinkingTokens > thinkingTokensMax) {
31-
setApiConfigurationField("modelMaxThinkingTokens", thinkingTokensMax)
32-
}
33-
}, [thinkingTokens, thinkingTokensMax, setApiConfigurationField])
25+
// Dynamically expand or shrink the max thinking budget based on the custom
26+
// max output tokens so that there's always a 20% buffer.
27+
const modelMaxThinkingTokens = modelInfo.maxThinkingTokens
28+
? Math.min(modelInfo.maxThinkingTokens, Math.floor(0.8 * customMaxOutputTokens))
29+
: Math.floor(0.8 * customMaxOutputTokens)
3430

35-
if (!modelInfo?.thinking) {
36-
return null
31+
let customMaxThinkingTokens = apiConfiguration.modelMaxThinkingTokens || DEFAULT_MAX_THINKING_TOKENS
32+
33+
if (customMaxThinkingTokens > modelMaxThinkingTokens) {
34+
customMaxThinkingTokens = modelMaxThinkingTokens
3735
}
3836

3937
return (
@@ -42,26 +40,26 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
4240
<div className="font-medium">{t("settings:thinkingBudget.maxTokens")}</div>
4341
<div className="flex items-center gap-1">
4442
<Slider
45-
min={tokensMin}
46-
max={tokensMax}
43+
min={8192}
44+
max={modelInfo.maxTokens!}
4745
step={1024}
48-
value={[tokens]}
46+
value={[customMaxOutputTokens]}
4947
onValueChange={([value]) => setApiConfigurationField("modelMaxTokens", value)}
5048
/>
51-
<div className="w-12 text-sm text-center">{tokens}</div>
49+
<div className="w-12 text-sm text-center">{customMaxOutputTokens}</div>
5250
</div>
5351
</div>
5452
<div className="flex flex-col gap-1">
5553
<div className="font-medium">{t("settings:thinkingBudget.maxThinkingTokens")}</div>
5654
<div className="flex items-center gap-1">
5755
<Slider
58-
min={thinkingTokensMin}
59-
max={thinkingTokensMax}
56+
min={1024}
57+
max={modelMaxThinkingTokens}
6058
step={1024}
61-
value={[thinkingTokens]}
59+
value={[customMaxThinkingTokens]}
6260
onValueChange={([value]) => setApiConfigurationField("modelMaxThinkingTokens", value)}
6361
/>
64-
<div className="w-12 text-sm text-center">{thinkingTokens}</div>
62+
<div className="w-12 text-sm text-center">{customMaxThinkingTokens}</div>
6563
</div>
6664
</div>
6765
</>

0 commit comments

Comments
 (0)