fix: improve Claude Code max output tokens implementation

daniel-lxs · daniel-lxs · commit f1b28d4bb99d · 2025-07-11T18:54:57.000-05:00
- Extract hardcoded default value (8000) to CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS constant
- Fix truthy check to use explicit !== undefined for handling 0 value
- Add validation (min: 1, max: 200000) to claudeCodeMaxOutputTokens in zod schema
- Restore missing test coverage for shouldUseReasoningBudget and shouldUseReasoningEffort
- Update all references to use the new constant instead of hardcoded values
diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts
@@ -80,7 +80,7 @@ const anthropicSchema = apiModelIdProviderModelSchema.extend({
 
 const claudeCodeSchema = apiModelIdProviderModelSchema.extend({
 	claudeCodePath: z.string().optional(),
-	claudeCodeMaxOutputTokens: z.number().optional(),
+	claudeCodeMaxOutputTokens: z.number().int().min(1).max(200000).optional(),
 })
 
 const glamaSchema = baseProviderSettingsSchema.extend({
diff --git a/packages/types/src/providers/claude-code.ts b/packages/types/src/providers/claude-code.ts
@@ -4,6 +4,7 @@ import { anthropicModels } from "./anthropic.js"
 // Claude Code
 export type ClaudeCodeModelId = keyof typeof claudeCodeModels
 export const claudeCodeDefaultModelId: ClaudeCodeModelId = "claude-sonnet-4-20250514"
+export const CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS = 8000
 export const claudeCodeModels = {
 	"claude-sonnet-4-20250514": {
 		...anthropicModels["claude-sonnet-4-20250514"],
diff --git a/src/api/providers/claude-code.ts b/src/api/providers/claude-code.ts
@@ -143,7 +143,7 @@ export class ClaudeCodeHandler extends BaseProvider implements ApiHandler {
 		const defaultModelInfo: ModelInfo = { ...claudeCodeModels[claudeCodeDefaultModelId] }
 
 		// Override maxTokens with the configured value if provided
-		if (this.options.claudeCodeMaxOutputTokens) {
+		if (this.options.claudeCodeMaxOutputTokens !== undefined) {
 			defaultModelInfo.maxTokens = this.options.claudeCodeMaxOutputTokens
 		}
 
diff --git a/src/integrations/claude-code/run.ts b/src/integrations/claude-code/run.ts
@@ -3,6 +3,7 @@ import type Anthropic from "@anthropic-ai/sdk"
 import { execa } from "execa"
 import { ClaudeCodeMessage } from "./types"
 import readline from "readline"
+import { CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS } from "@roo-code/types"
 
 const cwd = vscode.workspace.workspaceFolders?.map((folder) => folder.uri.fsPath).at(0)
 
@@ -142,9 +143,11 @@ function runProcess({
 		stderr: "pipe",
 		env: {
 			...process.env,
-			// Use the configured value, or the environment variable, or default to 8000
+			// Use the configured value, or the environment variable, or default to CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS
 			CLAUDE_CODE_MAX_OUTPUT_TOKENS:
-				maxOutputTokens?.toString() || process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS || "8000",
+				maxOutputTokens?.toString() ||
+				process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS ||
+				CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS.toString(),
 		},
 		cwd,
 		maxBuffer: 1024 * 1024 * 1000,
diff --git a/src/shared/__tests__/api.spec.ts b/src/shared/__tests__/api.spec.ts
@@ -1,6 +1,7 @@
 import { describe, test, expect } from "vitest"
-import { getModelMaxOutputTokens } from "../api"
+import { getModelMaxOutputTokens, shouldUseReasoningBudget, shouldUseReasoningEffort } from "../api"
 import type { ModelInfo, ProviderSettings } from "@roo-code/types"
+import { CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS, ANTHROPIC_DEFAULT_MAX_TOKENS } from "@roo-code/types"
 
 describe("getModelMaxOutputTokens", () => {
 	const mockModel: ModelInfo = {
@@ -38,7 +39,7 @@ describe("getModelMaxOutputTokens", () => {
 		expect(result).toBe(8192)
 	})
 
-	test("should return default 8000 when claude-code provider has no custom max tokens", () => {
+	test("should return default CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS when claude-code provider has no custom max tokens", () => {
 		const settings: ProviderSettings = {
 			apiProvider: "claude-code",
 			// No claudeCodeMaxOutputTokens set
@@ -50,7 +51,7 @@ describe("getModelMaxOutputTokens", () => {
 			settings,
 		})
 
-		expect(result).toBe(8000)
+		expect(result).toBe(CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS)
 	})
 
 	test("should handle reasoning budget models correctly", () => {
@@ -89,4 +90,202 @@ describe("getModelMaxOutputTokens", () => {
 
 		expect(result).toBe(20000) // 20% of 100000
 	})
+
+	test("should return ANTHROPIC_DEFAULT_MAX_TOKENS for Anthropic models that support reasoning budget but aren't using it", () => {
+		const anthropicModelId = "claude-sonnet-4-20250514"
+		const model: ModelInfo = {
+			contextWindow: 200_000,
+			supportsPromptCache: true,
+			supportsReasoningBudget: true,
+			maxTokens: 64_000, // This should be ignored
+		}
+
+		const settings: ProviderSettings = {
+			apiProvider: "anthropic",
+			enableReasoningEffort: false, // Not using reasoning
+		}
+
+		const result = getModelMaxOutputTokens({ modelId: anthropicModelId, model, settings })
+		expect(result).toBe(ANTHROPIC_DEFAULT_MAX_TOKENS) // Should be 8192, not 64_000
+	})
+
+	test("should return model.maxTokens for non-Anthropic models that support reasoning budget but aren't using it", () => {
+		const geminiModelId = "gemini-2.5-flash-preview-04-17"
+		const model: ModelInfo = {
+			contextWindow: 1_048_576,
+			supportsPromptCache: false,
+			supportsReasoningBudget: true,
+			maxTokens: 65_535,
+		}
+
+		const settings: ProviderSettings = {
+			apiProvider: "gemini",
+			enableReasoningEffort: false, // Not using reasoning
+		}
+
+		const result = getModelMaxOutputTokens({ modelId: geminiModelId, model, settings })
+		expect(result).toBe(65_535) // Should use model.maxTokens, not ANTHROPIC_DEFAULT_MAX_TOKENS
+	})
+
+	test("should return modelMaxTokens from settings when reasoning budget is required", () => {
+		const model: ModelInfo = {
+			contextWindow: 200_000,
+			supportsPromptCache: true,
+			requiredReasoningBudget: true,
+			maxTokens: 8000,
+		}
+
+		const settings: ProviderSettings = {
+			modelMaxTokens: 4000,
+		}
+
+		expect(getModelMaxOutputTokens({ modelId: "test", model, settings })).toBe(4000)
+	})
+
+	test("should return default 16_384 for reasoning budget models when modelMaxTokens not provided", () => {
+		const model: ModelInfo = {
+			contextWindow: 200_000,
+			supportsPromptCache: true,
+			requiredReasoningBudget: true,
+			maxTokens: 8000,
+		}
+
+		const settings = {}
+
+		expect(getModelMaxOutputTokens({ modelId: "test", model, settings })).toBe(16_384)
+	})
+})
+
+describe("shouldUseReasoningBudget", () => {
+	test("should return true when model has requiredReasoningBudget", () => {
+		const model: ModelInfo = {
+			contextWindow: 200_000,
+			supportsPromptCache: true,
+			requiredReasoningBudget: true,
+		}
+
+		// Should return true regardless of settings
+		expect(shouldUseReasoningBudget({ model })).toBe(true)
+		expect(shouldUseReasoningBudget({ model, settings: {} })).toBe(true)
+		expect(shouldUseReasoningBudget({ model, settings: { enableReasoningEffort: false } })).toBe(true)
+	})
+
+	test("should return true when model supports reasoning budget and settings enable reasoning effort", () => {
+		const model: ModelInfo = {
+			contextWindow: 200_000,
+			supportsPromptCache: true,
+			supportsReasoningBudget: true,
+		}
+
+		const settings: ProviderSettings = {
+			enableReasoningEffort: true,
+		}
+
+		expect(shouldUseReasoningBudget({ model, settings })).toBe(true)
+	})
+
+	test("should return false when model supports reasoning budget but settings don't enable reasoning effort", () => {
+		const model: ModelInfo = {
+			contextWindow: 200_000,
+			supportsPromptCache: true,
+			supportsReasoningBudget: true,
+		}
+
+		const settings: ProviderSettings = {
+			enableReasoningEffort: false,
+		}
+
+		expect(shouldUseReasoningBudget({ model, settings })).toBe(false)
+		expect(shouldUseReasoningBudget({ model, settings: {} })).toBe(false)
+		expect(shouldUseReasoningBudget({ model })).toBe(false)
+	})
+
+	test("should return false when model doesn't support reasoning budget", () => {
+		const model: ModelInfo = {
+			contextWindow: 200_000,
+			supportsPromptCache: true,
+		}
+
+		const settings: ProviderSettings = {
+			enableReasoningEffort: true,
+		}
+
+		expect(shouldUseReasoningBudget({ model, settings })).toBe(false)
+		expect(shouldUseReasoningBudget({ model })).toBe(false)
+	})
+})
+
+describe("shouldUseReasoningEffort", () => {
+	test("should return true when model has reasoningEffort property", () => {
+		const model: ModelInfo = {
+			contextWindow: 200_000,
+			supportsPromptCache: true,
+			reasoningEffort: "medium",
+		}
+
+		// Should return true regardless of settings
+		expect(shouldUseReasoningEffort({ model })).toBe(true)
+		expect(shouldUseReasoningEffort({ model, settings: {} })).toBe(true)
+		expect(shouldUseReasoningEffort({ model, settings: { reasoningEffort: undefined } })).toBe(true)
+	})
+
+	test("should return true when model supports reasoning effort and settings provide reasoning effort", () => {
+		const model: ModelInfo = {
+			contextWindow: 200_000,
+			supportsPromptCache: true,
+			supportsReasoningEffort: true,
+		}
+
+		const settings: ProviderSettings = {
+			reasoningEffort: "high",
+		}
+
+		expect(shouldUseReasoningEffort({ model, settings })).toBe(true)
+	})
+
+	test("should return false when model supports reasoning effort but settings don't provide reasoning effort", () => {
+		const model: ModelInfo = {
+			contextWindow: 200_000,
+			supportsPromptCache: true,
+			supportsReasoningEffort: true,
+		}
+
+		const settings: ProviderSettings = {
+			reasoningEffort: undefined,
+		}
+
+		expect(shouldUseReasoningEffort({ model, settings })).toBe(false)
+		expect(shouldUseReasoningEffort({ model, settings: {} })).toBe(false)
+		expect(shouldUseReasoningEffort({ model })).toBe(false)
+	})
+
+	test("should return false when model doesn't support reasoning effort", () => {
+		const model: ModelInfo = {
+			contextWindow: 200_000,
+			supportsPromptCache: true,
+		}
+
+		const settings: ProviderSettings = {
+			reasoningEffort: "high",
+		}
+
+		expect(shouldUseReasoningEffort({ model, settings })).toBe(false)
+		expect(shouldUseReasoningEffort({ model })).toBe(false)
+	})
+
+	test("should handle different reasoning effort values", () => {
+		const model: ModelInfo = {
+			contextWindow: 200_000,
+			supportsPromptCache: true,
+			supportsReasoningEffort: true,
+		}
+
+		const settingsLow: ProviderSettings = { reasoningEffort: "low" }
+		const settingsMedium: ProviderSettings = { reasoningEffort: "medium" }
+		const settingsHigh: ProviderSettings = { reasoningEffort: "high" }
+
+		expect(shouldUseReasoningEffort({ model, settings: settingsLow })).toBe(true)
+		expect(shouldUseReasoningEffort({ model, settings: settingsMedium })).toBe(true)
+		expect(shouldUseReasoningEffort({ model, settings: settingsHigh })).toBe(true)
+	})
 })
diff --git a/src/shared/api.ts b/src/shared/api.ts
@@ -1,4 +1,9 @@
-import { type ModelInfo, type ProviderSettings, ANTHROPIC_DEFAULT_MAX_TOKENS } from "@roo-code/types"
+import {
+	type ModelInfo,
+	type ProviderSettings,
+	ANTHROPIC_DEFAULT_MAX_TOKENS,
+	CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS,
+} from "@roo-code/types"
 
 // ApiHandlerOptions
 
@@ -60,8 +65,8 @@ export const getModelMaxOutputTokens = ({
 }): number | undefined => {
 	// Check for Claude Code specific max output tokens setting
 	if (settings?.apiProvider === "claude-code") {
-		// Return the configured value or default to 8000
-		return settings.claudeCodeMaxOutputTokens || 8000
+		// Return the configured value or default to CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS
+		return settings.claudeCodeMaxOutputTokens || CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS
 	}
 
 	if (shouldUseReasoningBudget({ model, settings })) {

Original file line number	Diff line number	Diff line change
`@@ -143,7 +143,7 @@ export class ClaudeCodeHandler extends BaseProvider implements ApiHandler {`
`143`	`143`	`const defaultModelInfo: ModelInfo = { ...claudeCodeModels[claudeCodeDefaultModelId] }`
`144`	`144`
`145`	`145`	`// Override maxTokens with the configured value if provided`
`146`		`- if (this.options.claudeCodeMaxOutputTokens) {`
	`146`	`+ if (this.options.claudeCodeMaxOutputTokens !== undefined) {`
`147`	`147`	`defaultModelInfo.maxTokens = this.options.claudeCodeMaxOutputTokens`
`148`	`148`	`}`
`149`	`149`