Fix max tokens in the task header (#3893)

cte · web-flow · commit 872902754a83 · 2025-05-23T02:32:21.000-07:00
diff --git a/.changeset/ten-bags-hang.md b/.changeset/ten-bags-hang.md
@@ -0,0 +1,5 @@
+---
+"roo-cline": patch
+---
+
+Fix max tokens in task header
diff --git a/src/api/transform/model-params.ts b/src/api/transform/model-params.ts
@@ -87,16 +87,20 @@ export function getModelParams({
 		reasoningEffort = customReasoningEffort ?? model.reasoningEffort
 	}
 
+	// TODO: We should consolidate this logic to compute `maxTokens` with
+	// `getModelMaxOutputTokens` in order to maintain a single source of truth.
+
+	const isAnthropic = format === "anthropic" || (format === "openrouter" && modelId.startsWith("anthropic/"))
+
 	// For "Hybrid" reasoning models, we should discard the model's actual
-	// `maxTokens` value if we're not using reasoning.
-	if (model.supportsReasoningBudget && !reasoningBudget) {
+	// `maxTokens` value if we're not using reasoning. We do this for Anthropic
+	// models only for now. Should we do this for Gemini too?
+	if (model.supportsReasoningBudget && !reasoningBudget && isAnthropic) {
 		maxTokens = ANTHROPIC_DEFAULT_MAX_TOKENS
 	}
 
 	// For Anthropic models we should always make sure a `maxTokens` value is
 	// set.
-	const isAnthropic = format === "anthropic" || (format === "openrouter" && modelId.startsWith("anthropic/"))
-
 	if (!maxTokens && isAnthropic) {
 		maxTokens = ANTHROPIC_DEFAULT_MAX_TOKENS
 	}
diff --git a/src/shared/__tests__/api.test.ts b/src/shared/__tests__/api.test.ts
@@ -7,8 +7,11 @@ import {
 	shouldUseReasoningBudget,
 	shouldUseReasoningEffort,
 } from "../api"
+import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "../../api/providers/constants"
 
 describe("getMaxTokensForModel", () => {
+	const modelId = "test"
+
 	/**
 	 * Testing the specific fix in commit cc79178f:
 	 * For thinking models, use apiConfig.modelMaxTokens if available,
@@ -27,7 +30,7 @@ describe("getMaxTokensForModel", () => {
 			modelMaxTokens: 4000,
 		}
 
-		expect(getModelMaxOutputTokens({ model, settings })).toBe(4000)
+		expect(getModelMaxOutputTokens({ modelId, model, settings })).toBe(4000)
 	})
 
 	it("should return 16_384 for thinking models when modelMaxTokens not provided", () => {
@@ -40,7 +43,7 @@ describe("getMaxTokensForModel", () => {
 
 		const settings = {}
 
-		expect(getModelMaxOutputTokens({ model, settings })).toBe(16_384)
+		expect(getModelMaxOutputTokens({ modelId, model, settings })).toBe(16_384)
 	})
 
 	it("should return 16_384 for thinking models when apiConfig is undefined", () => {
@@ -51,7 +54,7 @@ describe("getMaxTokensForModel", () => {
 			maxTokens: 8000,
 		}
 
-		expect(getModelMaxOutputTokens({ model, settings: undefined })).toBe(16_384)
+		expect(getModelMaxOutputTokens({ modelId, model, settings: undefined })).toBe(16_384)
 	})
 
 	it("should return modelInfo.maxTokens for non-thinking models", () => {
@@ -65,7 +68,7 @@ describe("getMaxTokensForModel", () => {
 			modelMaxTokens: 4000,
 		}
 
-		expect(getModelMaxOutputTokens({ model, settings })).toBe(8000)
+		expect(getModelMaxOutputTokens({ modelId, model, settings })).toBe(8000)
 	})
 
 	it("should return undefined for non-thinking models with undefined maxTokens", () => {
@@ -78,7 +81,7 @@ describe("getMaxTokensForModel", () => {
 			modelMaxTokens: 4000,
 		}
 
-		expect(getModelMaxOutputTokens({ model, settings })).toBeUndefined()
+		expect(getModelMaxOutputTokens({ modelId, model, settings })).toBeUndefined()
 	})
 
 	test("should return maxTokens from modelInfo when thinking is false", () => {
@@ -92,7 +95,7 @@ describe("getMaxTokensForModel", () => {
 			modelMaxTokens: 4096,
 		}
 
-		const result = getModelMaxOutputTokens({ model, settings })
+		const result = getModelMaxOutputTokens({ modelId, model, settings })
 		expect(result).toBe(2048)
 	})
 
@@ -108,7 +111,7 @@ describe("getMaxTokensForModel", () => {
 			modelMaxTokens: 4096,
 		}
 
-		const result = getModelMaxOutputTokens({ model, settings })
+		const result = getModelMaxOutputTokens({ modelId, model, settings })
 		expect(result).toBe(4096)
 	})
 
@@ -122,7 +125,7 @@ describe("getMaxTokensForModel", () => {
 
 		const settings: ProviderSettings = {}
 
-		const result = getModelMaxOutputTokens({ model, settings: undefined })
+		const result = getModelMaxOutputTokens({ modelId, model, settings: undefined })
 		expect(result).toBe(16_384)
 	})
 
@@ -133,7 +136,7 @@ describe("getMaxTokensForModel", () => {
 			maxTokens: 2048,
 		}
 
-		expect(getModelMaxOutputTokens({ model: modelInfoOnly, settings: undefined })).toBe(2048)
+		expect(getModelMaxOutputTokens({ modelId, model: modelInfoOnly, settings: undefined })).toBe(2048)
 	})
 
 	test("should handle missing properties gracefully", () => {
@@ -147,15 +150,51 @@ describe("getMaxTokensForModel", () => {
 			modelMaxTokens: 4096,
 		}
 
-		expect(getModelMaxOutputTokens({ model: modelInfoWithoutMaxTokens, settings })).toBe(4096)
+		expect(getModelMaxOutputTokens({ modelId, model: modelInfoWithoutMaxTokens, settings })).toBe(4096)
 
 		const modelInfoWithoutThinking: ModelInfo = {
 			contextWindow: 200_000,
 			supportsPromptCache: true,
 			maxTokens: 2048,
 		}
 
-		expect(getModelMaxOutputTokens({ model: modelInfoWithoutThinking, settings: undefined })).toBe(2048)
+		expect(getModelMaxOutputTokens({ modelId, model: modelInfoWithoutThinking, settings: undefined })).toBe(2048)
+	})
+
+	test("should return ANTHROPIC_DEFAULT_MAX_TOKENS for Anthropic models that support reasoning budget but aren't using it", () => {
+		// Test case for models that support reasoning budget but enableReasoningEffort is false
+		const anthropicModelId = "claude-sonnet-4-20250514"
+		const model: ModelInfo = {
+			contextWindow: 200_000,
+			supportsPromptCache: true,
+			supportsReasoningBudget: true,
+			maxTokens: 64_000, // This should be ignored
+		}
+
+		const settings: ProviderSettings = {
+			enableReasoningEffort: false, // Not using reasoning
+		}
+
+		const result = getModelMaxOutputTokens({ modelId: anthropicModelId, model, settings })
+		expect(result).toBe(ANTHROPIC_DEFAULT_MAX_TOKENS) // Should be 8192, not 64_000
+	})
+
+	test("should return model.maxTokens for non-Anthropic models that support reasoning budget but aren't using it", () => {
+		// Test case for non-Anthropic models - should still use model.maxTokens
+		const geminiModelId = "gemini-2.5-flash-preview-04-17"
+		const model: ModelInfo = {
+			contextWindow: 1_048_576,
+			supportsPromptCache: false,
+			supportsReasoningBudget: true,
+			maxTokens: 65_535,
+		}
+
+		const settings: ProviderSettings = {
+			enableReasoningEffort: false, // Not using reasoning
+		}
+
+		const result = getModelMaxOutputTokens({ modelId: geminiModelId, model, settings })
+		expect(result).toBe(65_535) // Should use model.maxTokens, not ANTHROPIC_DEFAULT_MAX_TOKENS
 	})
 })
 
diff --git a/src/shared/api.ts b/src/shared/api.ts
@@ -1,3 +1,4 @@
+import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "../api/providers/constants"
 import { ModelInfo, ProviderName, ProviderSettings } from "../schemas"
 
 export type { ModelInfo, ProviderName, ProviderSettings }
@@ -1936,15 +1937,26 @@ export const DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS = 16_384
 export const DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS = 8_192
 
 export const getModelMaxOutputTokens = ({
+	modelId,
 	model,
 	settings,
 }: {
+	modelId: string
 	model: ModelInfo
 	settings?: ProviderSettings
 }): number | undefined => {
 	if (shouldUseReasoningBudget({ model, settings })) {
 		return settings?.modelMaxTokens || DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS
 	}
 
+	const isAnthropicModel = modelId.includes("claude")
+
+	// For "Hybrid" reasoning models, we should discard the model's actual
+	// `maxTokens` value if we're not using reasoning. We do this for Anthropic
+	// models only for now. Should we do this for Gemini too?
+	if (model.supportsReasoningBudget && isAnthropicModel) {
+		return ANTHROPIC_DEFAULT_MAX_TOKENS
+	}
+
 	return model.maxTokens ?? undefined
 }
diff --git a/webview-ui/src/__tests__/ContextWindowProgress.test.tsx b/webview-ui/src/__tests__/ContextWindowProgress.test.tsx
@@ -38,6 +38,7 @@ jest.mock("@src/components/chat/TaskHeader", () => {
 // Mock useSelectedModel hook
 jest.mock("@src/components/ui/hooks/useSelectedModel", () => ({
 	useSelectedModel: jest.fn(() => ({
+		id: "test",
 		info: { contextWindow: 4000 },
 	})),
 }))
diff --git a/webview-ui/src/components/chat/TaskHeader.tsx b/webview-ui/src/components/chat/TaskHeader.tsx
@@ -48,7 +48,7 @@ const TaskHeader = ({
 }: TaskHeaderProps) => {
 	const { t } = useTranslation()
 	const { apiConfiguration, currentTaskItem } = useExtensionState()
-	const { info: model } = useSelectedModel(apiConfiguration)
+	const { id: modelId, info: model } = useSelectedModel(apiConfiguration)
 	const [isTaskExpanded, setIsTaskExpanded] = useState(false)
 
 	const textContainerRef = useRef<HTMLDivElement>(null)
@@ -101,7 +101,9 @@ const TaskHeader = ({
 							contextWindow={contextWindow}
 							contextTokens={contextTokens || 0}
 							maxTokens={
-								model ? getModelMaxOutputTokens({ model, settings: apiConfiguration }) : undefined
+								model
+									? getModelMaxOutputTokens({ modelId, model, settings: apiConfiguration })
+									: undefined
 							}
 						/>
 						{!!totalCost && <VSCodeBadge>${totalCost.toFixed(2)}</VSCodeBadge>}
@@ -140,7 +142,11 @@ const TaskHeader = ({
 										contextTokens={contextTokens || 0}
 										maxTokens={
 											model
-												? getModelMaxOutputTokens({ model, settings: apiConfiguration })
+												? getModelMaxOutputTokens({
+														modelId,
+														model,
+														settings: apiConfiguration,
+													})
 												: undefined
 										}
 									/>

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"roo-cline": patch
 +---
++
 +Fix max tokens in task header