Enforce Gemini's max thinking tokens limit

cte · cte · commit f3b449e77bbe · 2025-04-18T11:46:36.000-07:00
diff --git a/src/shared/api.ts b/src/shared/api.ts
@@ -492,7 +492,8 @@ export const vertexModels = {
 		supportsPromptCache: false,
 		inputPrice: 0.15,
 		outputPrice: 0.6,
-		thinking: true, // TODO: Max thinking budget is 24_576, so we need a new `ModelInfo` property for this.
+		thinking: true,
+		maxThinkingTokens: 24_576,
 	},
 	"gemini-2.5-flash-preview-04-17": {
 		maxTokens: 65_535,
@@ -657,7 +658,8 @@ export const geminiModels = {
 		supportsPromptCache: false,
 		inputPrice: 0.15,
 		outputPrice: 0.6,
-		thinking: true, // TODO: Max thinking budget is 24_576, so we need a new `ModelInfo` property for this.
+		thinking: true,
+		maxThinkingTokens: 24_576,
 	},
 	"gemini-2.5-flash-preview-04-17": {
 		maxTokens: 65_535,
diff --git a/webview-ui/src/components/settings/ThinkingBudget.tsx b/webview-ui/src/components/settings/ThinkingBudget.tsx
@@ -1,10 +1,12 @@
-import { useEffect, useMemo } from "react"
 import { useAppTranslation } from "@/i18n/TranslationContext"
 
 import { Slider } from "@/components/ui"
 
 import { ApiConfiguration, ModelInfo } from "../../../../src/shared/api"
 
+const DEFAULT_MAX_OUTPUT_TOKENS = 16_384
+const DEFAULT_MAX_THINKING_TOKENS = 8_192
+
 interface ThinkingBudgetProps {
 	apiConfiguration: ApiConfiguration
 	setApiConfigurationField: <K extends keyof ApiConfiguration>(field: K, value: ApiConfiguration[K]) => void
@@ -13,27 +15,23 @@ interface ThinkingBudgetProps {
 
 export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, modelInfo }: ThinkingBudgetProps) => {
 	const { t } = useAppTranslation()
-	const tokens = apiConfiguration?.modelMaxTokens || 16_384
-	const tokensMin = 8192
-	const tokensMax = modelInfo?.maxTokens || 64_000
 
-	// Get the appropriate thinking tokens based on provider
-	const thinkingTokens = useMemo(() => {
-		const value = apiConfiguration?.modelMaxThinkingTokens
-		return value || Math.min(Math.floor(0.8 * tokens), 8192)
-	}, [apiConfiguration, tokens])
+	if (!modelInfo || !modelInfo.thinking || !modelInfo.maxTokens) {
+		return null
+	}
 
-	const thinkingTokensMin = 1024
-	const thinkingTokensMax = Math.floor(0.8 * tokens)
+	const customMaxOutputTokens = apiConfiguration.modelMaxTokens || DEFAULT_MAX_OUTPUT_TOKENS
 
-	useEffect(() => {
-		if (thinkingTokens > thinkingTokensMax) {
-			setApiConfigurationField("modelMaxThinkingTokens", thinkingTokensMax)
-		}
-	}, [thinkingTokens, thinkingTokensMax, setApiConfigurationField])
+	// Dynamically expand or shrink the max thinking budget based on the custom
+	// max output tokens so that there's always a 20% buffer.
+	const modelMaxThinkingTokens = modelInfo.maxThinkingTokens
+		? Math.min(modelInfo.maxThinkingTokens, Math.floor(0.8 * customMaxOutputTokens))
+		: Math.floor(0.8 * customMaxOutputTokens)
 
-	if (!modelInfo?.thinking) {
-		return null
+	let customMaxThinkingTokens = apiConfiguration.modelMaxThinkingTokens || DEFAULT_MAX_THINKING_TOKENS
+
+	if (customMaxThinkingTokens > modelMaxThinkingTokens) {
+		customMaxThinkingTokens = modelMaxThinkingTokens
 	}
 
 	return (
@@ -42,26 +40,26 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
 				<div className="font-medium">{t("settings:thinkingBudget.maxTokens")}</div>
 				<div className="flex items-center gap-1">
 					<Slider
-						min={tokensMin}
-						max={tokensMax}
+						min={8192}
+						max={modelInfo.maxTokens!}
 						step={1024}
-						value={[tokens]}
+						value={[customMaxOutputTokens]}
 						onValueChange={([value]) => setApiConfigurationField("modelMaxTokens", value)}
 					/>
-					<div className="w-12 text-sm text-center">{tokens}</div>
+					<div className="w-12 text-sm text-center">{customMaxOutputTokens}</div>
 				</div>
 			</div>
 			<div className="flex flex-col gap-1">
 				<div className="font-medium">{t("settings:thinkingBudget.maxThinkingTokens")}</div>
 				<div className="flex items-center gap-1">
 					<Slider
-						min={thinkingTokensMin}
-						max={thinkingTokensMax}
+						min={1024}
+						max={modelMaxThinkingTokens}
 						step={1024}
-						value={[thinkingTokens]}
+						value={[customMaxThinkingTokens]}
 						onValueChange={([value]) => setApiConfigurationField("modelMaxThinkingTokens", value)}
 					/>
-					<div className="w-12 text-sm text-center">{thinkingTokens}</div>
+					<div className="w-12 text-sm text-center">{customMaxThinkingTokens}</div>
 				</div>
 			</div>
 		</>