diff --git a/src/api/providers/__tests__/anthropic-vertex.spec.ts b/src/api/providers/__tests__/anthropic-vertex.spec.ts
index 9d83f265c7..c057b5eb55 100644
--- a/src/api/providers/__tests__/anthropic-vertex.spec.ts
+++ b/src/api/providers/__tests__/anthropic-vertex.spec.ts
@@ -705,7 +705,7 @@ describe("VertexHandler", () => {
 			expect(result.temperature).toBe(1.0)
 		})
 
-		it("does not honor custom maxTokens for non-thinking models", () => {
+		it("honors custom maxTokens for all models", () => {
 			const handler = new AnthropicVertexHandler({
 				apiKey: "test-api-key",
 				apiModelId: "claude-3-7-sonnet@20250219",
@@ -714,7 +714,7 @@ describe("VertexHandler", () => {
 			})
 
 			const result = handler.getModel()
-			expect(result.maxTokens).toBe(8192)
+			expect(result.maxTokens).toBe(8192) // Capped to model's actual maxTokens
 			expect(result.reasoningBudget).toBeUndefined()
 			expect(result.temperature).toBe(0)
 		})
diff --git a/src/api/providers/__tests__/anthropic.spec.ts b/src/api/providers/__tests__/anthropic.spec.ts
index b1d0a2f6b3..1ae5390461 100644
--- a/src/api/providers/__tests__/anthropic.spec.ts
+++ b/src/api/providers/__tests__/anthropic.spec.ts
@@ -251,7 +251,7 @@ describe("AnthropicHandler", () => {
 			expect(result.temperature).toBe(1.0)
 		})
 
-		it("does not honor custom maxTokens for non-thinking models", () => {
+		it("honors custom maxTokens for all models", () => {
 			const handler = new AnthropicHandler({
 				apiKey: "test-api-key",
 				apiModelId: "claude-3-7-sonnet-20250219",
@@ -260,7 +260,7 @@ describe("AnthropicHandler", () => {
 			})
 
 			const result = handler.getModel()
-			expect(result.maxTokens).toBe(8192)
+			expect(result.maxTokens).toBe(8192) // Capped to model's actual maxTokens
 			expect(result.reasoningBudget).toBeUndefined()
 			expect(result.temperature).toBe(0)
 		})
diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts
index b4b5f29204..4810575786 100644
--- a/src/api/providers/__tests__/openai.spec.ts
+++ b/src/api/providers/__tests__/openai.spec.ts
@@ -263,14 +263,14 @@ describe("OpenAiHandler", () => {
 			expect(callArgs.max_completion_tokens).toBeUndefined()
 		})
 
-		it("should use user-configured modelMaxTokens instead of model default maxTokens", async () => {
+		it("should use user-configured modelMaxTokens but cap it to model's max capability", async () => {
 			const optionsWithUserMaxTokens: ApiHandlerOptions = {
 				...mockOptions,
 				includeMaxTokens: true,
-				modelMaxTokens: 32000, // User-configured value
+				modelMaxTokens: 32000, // User tries to set higher than model supports
 				openAiCustomModelInfo: {
 					contextWindow: 128_000,
-					maxTokens: 4096, // Model's default value (should not be used)
+					maxTokens: 4096, // Model's actual max capability
 					supportsPromptCache: false,
 				},
 			}
@@ -279,10 +279,32 @@ describe("OpenAiHandler", () => {
 			// Consume the stream to trigger the API call
 			for await (const _chunk of stream) {
 			}
-			// Assert the mockCreate was called with user-configured modelMaxTokens (32000), not model default maxTokens (4096)
+			// Assert the mockCreate was called with the model's max capability (4096), not the user's request (32000)
 			expect(mockCreate).toHaveBeenCalled()
 			const callArgs = mockCreate.mock.calls[0][0]
-			expect(callArgs.max_completion_tokens).toBe(32000)
+			expect(callArgs.max_completion_tokens).toBe(4096)
+		})
+
+		it("should use user-configured modelMaxTokens when it's less than model's max", async () => {
+			const optionsWithLowerUserMaxTokens: ApiHandlerOptions = {
+				...mockOptions,
+				includeMaxTokens: true,
+				modelMaxTokens: 2000, // User sets lower than model's max
+				openAiCustomModelInfo: {
+					contextWindow: 128_000,
+					maxTokens: 4096, // Model's max capability
+					supportsPromptCache: false,
+				},
+			}
+			const handlerWithLowerMaxTokens = new OpenAiHandler(optionsWithLowerUserMaxTokens)
+			const stream = handlerWithLowerMaxTokens.createMessage(systemPrompt, messages)
+			// Consume the stream to trigger the API call
+			for await (const _chunk of stream) {
+			}
+			// Assert the mockCreate was called with user's setting (2000)
+			expect(mockCreate).toHaveBeenCalled()
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.max_completion_tokens).toBe(2000)
 		})
 
 		it("should fallback to model default maxTokens when user modelMaxTokens is not set", async () => {
diff --git a/src/api/providers/__tests__/openrouter.spec.ts b/src/api/providers/__tests__/openrouter.spec.ts
index ea850c47be..8d3d05a8c5 100644
--- a/src/api/providers/__tests__/openrouter.spec.ts
+++ b/src/api/providers/__tests__/openrouter.spec.ts
@@ -89,7 +89,7 @@ describe("OpenRouterHandler", () => {
 			expect(result.info.supportsPromptCache).toBe(true)
 		})
 
-		it("honors custom maxTokens for thinking models", async () => {
+		it("honors custom maxTokens for all models", async () => {
 			const handler = new OpenRouterHandler({
 				openRouterApiKey: "test-key",
 				openRouterModelId: "anthropic/claude-3.7-sonnet:thinking",
@@ -98,12 +98,12 @@ describe("OpenRouterHandler", () => {
 			})
 
 			const result = await handler.fetchModel()
-			expect(result.maxTokens).toBe(128000) // Use actual implementation value
+			expect(result.maxTokens).toBe(32_768) // Not capped since model supports 128000
 			expect(result.reasoningBudget).toBeUndefined() // Use actual implementation value
 			expect(result.temperature).toBe(0) // Use actual implementation value
 		})
 
-		it("does not honor custom maxTokens for non-thinking models", async () => {
+		it("honors custom maxTokens for non-thinking models", async () => {
 			const handler = new OpenRouterHandler({
 				...mockOptions,
 				modelMaxTokens: 32_768,
@@ -111,7 +111,7 @@ describe("OpenRouterHandler", () => {
 			})
 
 			const result = await handler.fetchModel()
-			expect(result.maxTokens).toBe(8192)
+			expect(result.maxTokens).toBe(8192) // Capped to model's actual maxTokens
 			expect(result.reasoningBudget).toBeUndefined()
 			expect(result.temperature).toBe(0)
 		})
diff --git a/src/api/providers/chutes.ts b/src/api/providers/chutes.ts
index 62121bd19d..db32610dce 100644
--- a/src/api/providers/chutes.ts
+++ b/src/api/providers/chutes.ts
@@ -3,6 +3,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
 
 import type { ApiHandlerOptions } from "../../shared/api"
+import { getModelMaxOutputTokens } from "../../shared/api"
 import { XmlMatcher } from "../../utils/xml-matcher"
 import { convertToR1Format } from "../transform/r1-format"
 import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -27,15 +28,17 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
 	): OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming {
-		const {
-			id: model,
-			info: { maxTokens: max_tokens },
-		} = this.getModel()
+		const model = this.getModel()
+		const max_tokens = getModelMaxOutputTokens({
+			modelId: model.id,
+			model: model.info,
+			settings: this.options as any,
+		})
 
-		const temperature = this.options.modelTemperature ?? this.getModel().info.temperature
+		const temperature = this.options.modelTemperature ?? model.info.temperature
 
 		return {
-			model,
+			model: model.id,
 			max_tokens,
 			temperature,
 			messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
diff --git a/src/api/providers/gemini.ts b/src/api/providers/gemini.ts
index 6765c8676d..6ab441735f 100644
--- a/src/api/providers/gemini.ts
+++ b/src/api/providers/gemini.ts
@@ -71,7 +71,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 			systemInstruction,
 			httpOptions: this.options.googleGeminiBaseUrl ? { baseUrl: this.options.googleGeminiBaseUrl } : undefined,
 			thinkingConfig,
-			maxOutputTokens: this.options.modelMaxTokens ?? maxTokens ?? undefined,
+			maxOutputTokens: maxTokens ?? undefined,
 			temperature: this.options.modelTemperature ?? 0,
 		}
 
diff --git a/src/api/providers/glama.ts b/src/api/providers/glama.ts
index 774d615709..eda7acf41b 100644
--- a/src/api/providers/glama.ts
+++ b/src/api/providers/glama.ts
@@ -5,7 +5,7 @@ import OpenAI from "openai"
 import { glamaDefaultModelId, glamaDefaultModelInfo, GLAMA_DEFAULT_TEMPERATURE } from "@roo-code/types"
 
 import { Package } from "../../shared/package"
-import { ApiHandlerOptions } from "../../shared/api"
+import { ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api"
 
 import { ApiStream } from "../transform/stream"
 import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -49,12 +49,14 @@ export class GlamaHandler extends RouterProvider implements SingleCompletionHand
 			addCacheBreakpoints(systemPrompt, openAiMessages)
 		}
 
-		// Required by Anthropic; other providers default to max tokens allowed.
-		let maxTokens: number | undefined
-
-		if (modelId.startsWith("anthropic/")) {
-			maxTokens = info.maxTokens ?? undefined
-		}
+		// Use getModelMaxOutputTokens to respect user's custom max tokens setting
+		const maxTokens = modelId.startsWith("anthropic/")
+			? getModelMaxOutputTokens({
+					modelId,
+					model: info,
+					settings: this.options as any,
+				})
+			: undefined
 
 		const requestOptions: OpenAI.Chat.ChatCompletionCreateParams = {
 			model: modelId,
@@ -130,7 +132,11 @@ export class GlamaHandler extends RouterProvider implements SingleCompletionHand
 			}
 
 			if (modelId.startsWith("anthropic/")) {
-				requestOptions.max_tokens = info.maxTokens
+				requestOptions.max_tokens = getModelMaxOutputTokens({
+					modelId,
+					model: info,
+					settings: this.options as any,
+				})
 			}
 
 			const response = await this.client.chat.completions.create(requestOptions)
diff --git a/src/api/providers/lite-llm.ts b/src/api/providers/lite-llm.ts
index e8cd58b12c..cbdb3d86b6 100644
--- a/src/api/providers/lite-llm.ts
+++ b/src/api/providers/lite-llm.ts
@@ -5,7 +5,7 @@ import { litellmDefaultModelId, litellmDefaultModelInfo } from "@roo-code/types"
 
 import { calculateApiCostOpenAI } from "../../shared/cost"
 
-import { ApiHandlerOptions } from "../../shared/api"
+import { ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api"
 
 import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
 import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -44,8 +44,12 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
 			...convertToOpenAiMessages(messages),
 		]
 
-		// Required by some providers; others default to max tokens allowed
-		let maxTokens: number | undefined = info.maxTokens ?? undefined
+		// Use getModelMaxOutputTokens to respect user's custom max tokens setting
+		const maxTokens = getModelMaxOutputTokens({
+			modelId,
+			model: info,
+			settings: this.options as any,
+		})
 
 		const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
 			model: modelId,
@@ -119,7 +123,11 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
 				requestOptions.temperature = this.options.modelTemperature ?? 0
 			}
 
-			requestOptions.max_tokens = info.maxTokens
+			requestOptions.max_tokens = getModelMaxOutputTokens({
+				modelId,
+				model: info,
+				settings: this.options as any,
+			})
 
 			const response = await this.client.chat.completions.create(requestOptions)
 			return response.choices[0]?.message.content || ""
diff --git a/src/api/providers/mistral.ts b/src/api/providers/mistral.ts
index 7d48b9ef01..62d4065ef9 100644
--- a/src/api/providers/mistral.ts
+++ b/src/api/providers/mistral.ts
@@ -3,7 +3,7 @@ import { Mistral } from "@mistralai/mistralai"
 
 import { type MistralModelId, mistralDefaultModelId, mistralModels, MISTRAL_DEFAULT_TEMPERATURE } from "@roo-code/types"
 
-import { ApiHandlerOptions } from "../../shared/api"
+import { ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api"
 
 import { convertToMistralMessages } from "../transform/mistral-format"
 import { ApiStream } from "../transform/stream"
@@ -78,7 +78,13 @@ export class MistralHandler extends BaseProvider implements SingleCompletionHand
 		const info = mistralModels[id as MistralModelId] ?? mistralModels[mistralDefaultModelId]
 
 		// @TODO: Move this to the `getModelParams` function.
-		const maxTokens = this.options.includeMaxTokens ? info.maxTokens : undefined
+		const maxTokens = this.options.includeMaxTokens
+			? getModelMaxOutputTokens({
+					modelId: id,
+					model: info,
+					settings: { ...this.options, apiProvider: "mistral" } as any,
+				})
+			: undefined
 		const temperature = this.options.modelTemperature ?? MISTRAL_DEFAULT_TEMPERATURE
 
 		return { id, info, maxTokens, temperature }
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index f5e4e4c985..3be3f679f9 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -10,7 +10,7 @@ import {
 	OPENAI_AZURE_AI_INFERENCE_PATH,
 } from "@roo-code/types"
 
-import type { ApiHandlerOptions } from "../../shared/api"
+import { ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api"
 
 import { XmlMatcher } from "../../utils/xml-matcher"
 
@@ -403,9 +403,14 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 	): void {
 		// Only add max_completion_tokens if includeMaxTokens is true
 		if (this.options.includeMaxTokens === true) {
-			// Use user-configured modelMaxTokens if available, otherwise fall back to model's default maxTokens
+			// Use getModelMaxOutputTokens to properly handle user settings and model limits
 			// Using max_completion_tokens as max_tokens is deprecated
-			requestOptions.max_completion_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
+			const modelId = this.options.openAiModelId ?? ""
+			requestOptions.max_completion_tokens = getModelMaxOutputTokens({
+				modelId,
+				model: modelInfo,
+				settings: { ...this.options, apiProvider: "openai" } as any,
+			})
 		}
 	}
 }
diff --git a/src/api/providers/unbound.ts b/src/api/providers/unbound.ts
index bc85dfd499..ee2e50a529 100644
--- a/src/api/providers/unbound.ts
+++ b/src/api/providers/unbound.ts
@@ -4,6 +4,7 @@ import OpenAI from "openai"
 import { unboundDefaultModelId, unboundDefaultModelInfo } from "@roo-code/types"
 
 import type { ApiHandlerOptions } from "../../shared/api"
+import { getModelMaxOutputTokens } from "../../shared/api"
 
 import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
 import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -76,12 +77,14 @@ export class UnboundHandler extends RouterProvider implements SingleCompletionHa
 			addVertexCacheBreakpoints(messages)
 		}
 
-		// Required by Anthropic; other providers default to max tokens allowed.
-		let maxTokens: number | undefined
-
-		if (modelId.startsWith("anthropic/")) {
-			maxTokens = info.maxTokens ?? undefined
-		}
+		// Use getModelMaxOutputTokens to respect user's custom max tokens setting
+		const maxTokens = modelId.startsWith("anthropic/")
+			? getModelMaxOutputTokens({
+					modelId,
+					model: info,
+					settings: this.options as any,
+				})
+			: undefined
 
 		const requestOptions: UnboundChatCompletionCreateParamsStreaming = {
 			model: modelId.split("/")[1],
@@ -149,7 +152,11 @@ export class UnboundHandler extends RouterProvider implements SingleCompletionHa
 			}
 
 			if (modelId.startsWith("anthropic/")) {
-				requestOptions.max_tokens = info.maxTokens
+				requestOptions.max_tokens = getModelMaxOutputTokens({
+					modelId,
+					model: info,
+					settings: this.options as any,
+				})
 			}
 
 			const response = await this.client.chat.completions.create(requestOptions, { headers: DEFAULT_HEADERS })
diff --git a/src/api/providers/xai.ts b/src/api/providers/xai.ts
index 596c9e89b8..c1140804a2 100644
--- a/src/api/providers/xai.ts
+++ b/src/api/providers/xai.ts
@@ -3,7 +3,7 @@ import OpenAI from "openai"
 
 import { type XAIModelId, xaiDefaultModelId, xaiModels } from "@roo-code/types"
 
-import type { ApiHandlerOptions } from "../../shared/api"
+import { ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api"
 
 import { ApiStream } from "../transform/stream"
 import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -50,7 +50,11 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler
 		// Use the OpenAI-compatible API.
 		const stream = await this.client.chat.completions.create({
 			model: modelId,
-			max_tokens: modelInfo.maxTokens,
+			max_tokens: getModelMaxOutputTokens({
+				modelId,
+				model: modelInfo,
+				settings: { ...this.options, apiProvider: "xai" } as any,
+			}),
 			temperature: this.options.modelTemperature ?? XAI_DEFAULT_TEMPERATURE,
 			messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
 			stream: true,
@@ -78,12 +82,15 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler
 			if (chunk.usage) {
 				// Extract detailed token information if available
 				// First check for prompt_tokens_details structure (real API response)
-				const promptDetails = "prompt_tokens_details" in chunk.usage ? chunk.usage.prompt_tokens_details : null;
-				const cachedTokens = promptDetails && "cached_tokens" in promptDetails ? promptDetails.cached_tokens : 0;
+				const promptDetails = "prompt_tokens_details" in chunk.usage ? chunk.usage.prompt_tokens_details : null
+				const cachedTokens = promptDetails && "cached_tokens" in promptDetails ? promptDetails.cached_tokens : 0
 
 				// Fall back to direct fields in usage (used in test mocks)
-				const readTokens = cachedTokens || ("cache_read_input_tokens" in chunk.usage ? (chunk.usage as any).cache_read_input_tokens : 0);
-				const writeTokens = "cache_creation_input_tokens" in chunk.usage ? (chunk.usage as any).cache_creation_input_tokens : 0;
+				const readTokens =
+					cachedTokens ||
+					("cache_read_input_tokens" in chunk.usage ? (chunk.usage as any).cache_read_input_tokens : 0)
+				const writeTokens =
+					"cache_creation_input_tokens" in chunk.usage ? (chunk.usage as any).cache_creation_input_tokens : 0
 
 				yield {
 					type: "usage",
diff --git a/src/api/transform/__tests__/model-params.spec.ts b/src/api/transform/__tests__/model-params.spec.ts
index c29d17559e..9fcba2959d 100644
--- a/src/api/transform/__tests__/model-params.spec.ts
+++ b/src/api/transform/__tests__/model-params.spec.ts
@@ -665,7 +665,7 @@ describe("getModelParams", () => {
 				model,
 			})
 
-			expect(result.maxTokens).toBe(20000)
+			expect(result.maxTokens).toBe(16000) // Capped to model's actual maxTokens
 			expect(result.reasoningBudget).toBe(10000)
 			expect(result.temperature).toBe(1.0) // Overridden for reasoning budget models
 			expect(result.reasoningEffort).toBeUndefined() // Budget takes precedence
diff --git a/src/shared/__tests__/api.spec.ts b/src/shared/__tests__/api.spec.ts
index 08d4bdf3bb..f75568b8a0 100644
--- a/src/shared/__tests__/api.spec.ts
+++ b/src/shared/__tests__/api.spec.ts
@@ -73,7 +73,8 @@ describe("getModelMaxOutputTokens", () => {
 			settings,
 		})
 
-		expect(result).toBe(32000)
+		// Should cap to model's maxTokens (8192) even though user set 32000
+		expect(result).toBe(8192)
 	})
 
 	test("should return default of 8192 when maxTokens is undefined", () => {
@@ -154,6 +155,93 @@ describe("getModelMaxOutputTokens", () => {
 
 		expect(getModelMaxOutputTokens({ modelId: "test", model, settings })).toBe(16_384)
 	})
+
+	test("should use user-configured modelMaxTokens for non-reasoning models", () => {
+		const settings: ProviderSettings = {
+			modelMaxTokens: 16000,
+		}
+
+		const result = getModelMaxOutputTokens({
+			modelId: "gpt-4",
+			model: mockModel,
+			settings,
+		})
+
+		// Should cap to model's maxTokens (8192) even though user set 16000
+		expect(result).toBe(8192)
+	})
+
+	test("should ignore modelMaxTokens when it's 0 or negative", () => {
+		const settings: ProviderSettings = {
+			modelMaxTokens: 0,
+		}
+
+		const result = getModelMaxOutputTokens({
+			modelId: "claude-3-5-sonnet",
+			model: mockModel,
+			settings,
+		})
+
+		// Should fall back to model's maxTokens
+		expect(result).toBe(8192)
+	})
+
+	test("should prioritize user-configured modelMaxTokens over model's default", () => {
+		const modelWithHighMaxTokens: ModelInfo = {
+			maxTokens: 64000,
+			contextWindow: 200000,
+			supportsPromptCache: true,
+		}
+
+		const settings: ProviderSettings = {
+			modelMaxTokens: 32000,
+		}
+
+		const result = getModelMaxOutputTokens({
+			modelId: "some-model",
+			model: modelWithHighMaxTokens,
+			settings,
+		})
+
+		expect(result).toBe(32000)
+	})
+
+	test("should use modelMaxTokens even for Anthropic models when configured", () => {
+		const settings: ProviderSettings = {
+			modelMaxTokens: 20000,
+		}
+
+		const result = getModelMaxOutputTokens({
+			modelId: "claude-3-5-sonnet",
+			model: mockModel,
+			settings,
+			format: "anthropic",
+		})
+
+		// Should cap to model's maxTokens (8192) even though user set 20000
+		expect(result).toBe(8192)
+	})
+
+	test("should cap modelMaxTokens to model's actual max when user sets higher value", () => {
+		const model: ModelInfo = {
+			maxTokens: 16000,
+			contextWindow: 100000,
+			supportsPromptCache: true,
+		}
+
+		const settings: ProviderSettings = {
+			modelMaxTokens: 32000, // Higher than model's max
+		}
+
+		const result = getModelMaxOutputTokens({
+			modelId: "some-model",
+			model,
+			settings,
+		})
+
+		// Should cap at model's max
+		expect(result).toBe(16000)
+	})
 })
 
 describe("shouldUseReasoningBudget", () => {
diff --git a/src/shared/api.ts b/src/shared/api.ts
index 8cbfc72133..f05e8c46a7 100644
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -70,8 +70,15 @@ export const getModelMaxOutputTokens = ({
 		return settings.claudeCodeMaxOutputTokens || CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS
 	}
 
+	// Check for user-configured modelMaxTokens (takes precedence over all other logic)
+	if (settings?.modelMaxTokens && settings.modelMaxTokens > 0) {
+		const maxAllowed = model?.maxTokens || Number.MAX_SAFE_INTEGER
+		return Math.min(settings.modelMaxTokens, maxAllowed)
+	}
+
+	// Existing reasoning budget logic
 	if (shouldUseReasoningBudget({ model, settings })) {
-		return settings?.modelMaxTokens || DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS
+		return DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS
 	}
 
 	const isAnthropicContext =
diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx
index 06994b16b9..788219e8b8 100644
--- a/webview-ui/src/components/settings/ApiOptions.tsx
+++ b/webview-ui/src/components/settings/ApiOptions.tsx
@@ -81,6 +81,7 @@ import { DiffSettingsControl } from "./DiffSettingsControl"
 import { TemperatureControl } from "./TemperatureControl"
 import { RateLimitSecondsControl } from "./RateLimitSecondsControl"
 import { ConsecutiveMistakeLimitControl } from "./ConsecutiveMistakeLimitControl"
+import { MaxTokensSlider } from "./MaxTokensSlider"
 import { BedrockCustomArn } from "./providers/BedrockCustomArn"
 import { buildDocLink } from "@src/utils/docLinks"
 
@@ -574,6 +575,13 @@ const ApiOptions = ({
 							onChange={handleInputChange("modelTemperature", noTransform)}
 							maxValue={2}
 						/>
+						{selectedProvider !== "openai" && (
+							<MaxTokensSlider
+								value={apiConfiguration.modelMaxTokens}
+								onChange={(value) => setApiConfigurationField("modelMaxTokens", value)}
+								modelInfo={selectedModelInfo}
+							/>
+						)}
 						<RateLimitSecondsControl
 							value={apiConfiguration.rateLimitSeconds || 0}
 							onChange={(value) => setApiConfigurationField("rateLimitSeconds", value)}
diff --git a/webview-ui/src/components/settings/MaxTokensSlider.tsx b/webview-ui/src/components/settings/MaxTokensSlider.tsx
new file mode 100644
index 0000000000..b34e8de82c
--- /dev/null
+++ b/webview-ui/src/components/settings/MaxTokensSlider.tsx
@@ -0,0 +1,46 @@
+import React from "react"
+import { useAppTranslation } from "@/i18n/TranslationContext"
+import { ModelInfo } from "@roo-code/types"
+import { Slider } from "@/components/ui"
+import { DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS } from "@roo/api"
+
+interface MaxTokensSliderProps {
+	value?: number
+	onChange: (value: number | undefined) => void
+	modelInfo?: ModelInfo
+	className?: string
+}
+
+export const MaxTokensSlider: React.FC<MaxTokensSliderProps> = ({ value, onChange, modelInfo, className }) => {
+	const { t } = useAppTranslation()
+
+	// Use the same logic as the original ThinkingBudget component
+	const customMaxOutputTokens = value || DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS
+	const maxValue = modelInfo?.maxTokens
+		? Math.max(modelInfo.maxTokens, customMaxOutputTokens, DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS)
+		: Math.max(customMaxOutputTokens, DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS)
+
+	return (
+		<div className={`flex flex-col gap-1 ${className || ""}`}>
+			<div className="font-medium">{t("settings:providers.maxOutputTokens.label")}</div>
+			<div className="flex items-center gap-1">
+				<Slider
+					min={8192}
+					max={maxValue}
+					step={1024}
+					value={[customMaxOutputTokens]}
+					onValueChange={([value]) => onChange(value)}
+				/>
+				<div className="w-12 text-sm text-center">{customMaxOutputTokens}</div>
+			</div>
+			<div className="text-sm text-vscode-descriptionForeground">
+				{t("settings:providers.maxOutputTokens.description")}
+			</div>
+			{modelInfo && (
+				<div className="text-sm text-vscode-descriptionForeground">
+					{t("settings:providers.maxOutputTokens.modelSupports", { max: modelInfo.maxTokens })}
+				</div>
+			)}
+		</div>
+	)
+}
diff --git a/webview-ui/src/components/settings/ThinkingBudget.tsx b/webview-ui/src/components/settings/ThinkingBudget.tsx
index 0adb62f2a0..d37e24db5e 100644
--- a/webview-ui/src/components/settings/ThinkingBudget.tsx
+++ b/webview-ui/src/components/settings/ThinkingBudget.tsx
@@ -59,38 +59,19 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
 				</div>
 			)}
 			{(isReasoningBudgetRequired || enableReasoningEffort) && (
-				<>
-					<div className="flex flex-col gap-1">
-						<div className="font-medium">{t("settings:thinkingBudget.maxTokens")}</div>
-						<div className="flex items-center gap-1">
-							<Slider
-								min={8192}
-								max={Math.max(
-									modelInfo.maxTokens || 8192,
-									customMaxOutputTokens,
-									DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS,
-								)}
-								step={1024}
-								value={[customMaxOutputTokens]}
-								onValueChange={([value]) => setApiConfigurationField("modelMaxTokens", value)}
-							/>
-							<div className="w-12 text-sm text-center">{customMaxOutputTokens}</div>
-						</div>
-					</div>
-					<div className="flex flex-col gap-1">
-						<div className="font-medium">{t("settings:thinkingBudget.maxThinkingTokens")}</div>
-						<div className="flex items-center gap-1" data-testid="reasoning-budget">
-							<Slider
-								min={1024}
-								max={modelMaxThinkingTokens}
-								step={1024}
-								value={[customMaxThinkingTokens]}
-								onValueChange={([value]) => setApiConfigurationField("modelMaxThinkingTokens", value)}
-							/>
-							<div className="w-12 text-sm text-center">{customMaxThinkingTokens}</div>
-						</div>
+				<div className="flex flex-col gap-1">
+					<div className="font-medium">{t("settings:thinkingBudget.maxThinkingTokens")}</div>
+					<div className="flex items-center gap-1" data-testid="reasoning-budget">
+						<Slider
+							min={1024}
+							max={modelMaxThinkingTokens}
+							step={1024}
+							value={[Math.min(customMaxThinkingTokens, modelMaxThinkingTokens)]}
+							onValueChange={([value]) => setApiConfigurationField("modelMaxThinkingTokens", value)}
+						/>
+						<div className="w-12 text-sm text-center">{customMaxThinkingTokens}</div>
 					</div>
-				</>
+				</div>
 			)}
 		</>
 	) : isReasoningEffortSupported ? (
diff --git a/webview-ui/src/components/settings/__tests__/ApiOptions.spec.tsx b/webview-ui/src/components/settings/__tests__/ApiOptions.spec.tsx
index bf02840429..6beb192035 100644
--- a/webview-ui/src/components/settings/__tests__/ApiOptions.spec.tsx
+++ b/webview-ui/src/components/settings/__tests__/ApiOptions.spec.tsx
@@ -83,9 +83,21 @@ vi.mock("@/components/ui", () => ({
 	Popover: ({ children, _open, _onOpenChange }: any) => <div className="popover-mock">{children}</div>,
 	PopoverContent: ({ children, _className }: any) => <div className="popover-content-mock">{children}</div>,
 	PopoverTrigger: ({ children, _asChild }: any) => <div className="popover-trigger-mock">{children}</div>,
-	Slider: ({ value, onChange }: any) => (
+	Slider: ({ value, onChange, onValueChange, min, max, step }: any) => (
 		<div data-testid="slider">
-			<input type="range" value={value || 0} onChange={(e) => onChange(parseFloat(e.target.value))} />
+			<input
+				type="range"
+				min={min}
+				max={max}
+				step={step}
+				value={value?.[0] || value || 0}
+				onChange={(e) => {
+					const val = parseFloat(e.target.value)
+					if (onChange) onChange(val)
+					if (onValueChange) onValueChange([val])
+				}}
+				data-testid="slider-input"
+			/>
 		</div>
 	),
 	SearchableSelect: ({ value, onValueChange, options, placeholder, "data-testid": dataTestId }: any) => (
@@ -114,6 +126,19 @@ vi.mock("@/components/ui", () => ({
 	CollapsibleContent: ({ children, className }: any) => (
 		<div className={`collapsible-content-mock ${className || ""}`}>{children}</div>
 	),
+	// Add Input component for other controls
+	Input: ({ id, type, value, onChange, min, max, className, ...props }: any) => (
+		<input
+			id={id}
+			type={type}
+			value={value}
+			onChange={onChange}
+			min={min}
+			max={max}
+			className={className}
+			{...props}
+		/>
+	),
 }))
 
 vi.mock("../TemperatureControl", () => ({
diff --git a/webview-ui/src/components/settings/__tests__/MaxTokensSlider.spec.tsx b/webview-ui/src/components/settings/__tests__/MaxTokensSlider.spec.tsx
new file mode 100644
index 0000000000..e9dbf6cd42
--- /dev/null
+++ b/webview-ui/src/components/settings/__tests__/MaxTokensSlider.spec.tsx
@@ -0,0 +1,107 @@
+import React from "react"
+import { render, screen, fireEvent } from "@testing-library/react"
+import { describe, it, expect, vi, beforeEach } from "vitest"
+import { MaxTokensSlider } from "../MaxTokensSlider"
+import { ModelInfo } from "@roo-code/types"
+
+// Mock the translation hook
+vi.mock("@/i18n/TranslationContext", () => ({
+	useAppTranslation: () => ({
+		t: (key: string, params?: any) => {
+			if (key === "settings:providers.maxOutputTokens.label") return "Max Output Tokens"
+			if (key === "settings:providers.maxOutputTokens.description") return "Maximum number of tokens"
+			if (key === "settings:providers.maxOutputTokens.modelSupports")
+				return `Model supports up to ${params?.max} tokens`
+			return key
+		},
+	}),
+}))
+
+// Mock the Slider component
+vi.mock("@/components/ui", () => ({
+	Slider: ({ value, onValueChange, min, max, step }: any) => (
+		<div data-testid="slider">
+			<input
+				type="range"
+				min={min}
+				max={max}
+				step={step}
+				value={value[0]}
+				onChange={(e) => onValueChange([parseInt(e.target.value)])}
+				data-testid="slider-input"
+			/>
+		</div>
+	),
+}))
+
+describe("MaxTokensSlider", () => {
+	const mockOnChange = vi.fn()
+
+	beforeEach(() => {
+		mockOnChange.mockClear()
+	})
+
+	it("renders with default value", () => {
+		render(<MaxTokensSlider onChange={mockOnChange} />)
+
+		expect(screen.getByText("Max Output Tokens")).toBeInTheDocument()
+		expect(screen.getByText("Maximum number of tokens")).toBeInTheDocument()
+		// Default value is DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS which is 16384
+		expect(screen.getByText("16384")).toBeInTheDocument()
+	})
+
+	it("renders with custom value", () => {
+		render(<MaxTokensSlider value={16384} onChange={mockOnChange} />)
+
+		expect(screen.getByText("16384")).toBeInTheDocument()
+	})
+
+	it("shows model support information when modelInfo is provided", () => {
+		const modelInfo: ModelInfo = {
+			maxTokens: 32768,
+			contextWindow: 100000,
+			supportsPromptCache: false,
+		}
+
+		render(<MaxTokensSlider onChange={mockOnChange} modelInfo={modelInfo} />)
+
+		expect(screen.getByText("Model supports up to 32768 tokens")).toBeInTheDocument()
+	})
+
+	it("calls onChange when slider value changes", () => {
+		render(<MaxTokensSlider value={8192} onChange={mockOnChange} />)
+
+		const slider = screen.getByTestId("slider-input") as HTMLInputElement
+		// Simulate changing the slider value
+		fireEvent.change(slider, { target: { value: "16384" } })
+
+		expect(mockOnChange).toHaveBeenCalledWith(16384)
+	})
+
+	it("calculates max value correctly based on model info", () => {
+		const modelInfo: ModelInfo = {
+			maxTokens: 65536,
+			contextWindow: 100000,
+			supportsPromptCache: false,
+		}
+
+		render(<MaxTokensSlider value={8192} onChange={mockOnChange} modelInfo={modelInfo} />)
+
+		const slider = screen.getByTestId("slider-input") as HTMLInputElement
+		expect(slider.max).toBe("65536")
+	})
+
+	it("uses default max value when model info is not provided", () => {
+		render(<MaxTokensSlider value={8192} onChange={mockOnChange} />)
+
+		const slider = screen.getByTestId("slider-input") as HTMLInputElement
+		// When no model info, max is Math.max(value, DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS)
+		expect(slider.max).toBe("16384")
+	})
+
+	it("applies custom className", () => {
+		const { container } = render(<MaxTokensSlider onChange={mockOnChange} className="custom-class" />)
+
+		expect(container.firstChild).toHaveClass("custom-class")
+	})
+})
diff --git a/webview-ui/src/components/settings/__tests__/ThinkingBudget.spec.tsx b/webview-ui/src/components/settings/__tests__/ThinkingBudget.spec.tsx
index 5ca51b4528..44dc291fd3 100644
--- a/webview-ui/src/components/settings/__tests__/ThinkingBudget.spec.tsx
+++ b/webview-ui/src/components/settings/__tests__/ThinkingBudget.spec.tsx
@@ -57,10 +57,10 @@ describe("ThinkingBudget", () => {
 		expect(container.firstChild).toBeNull()
 	})
 
-	it("should render sliders when model supports thinking", () => {
+	it("should render slider when model supports thinking", () => {
 		render(<ThinkingBudget {...defaultProps} />)
 
-		expect(screen.getAllByTestId("slider")).toHaveLength(2)
+		expect(screen.getAllByTestId("slider")).toHaveLength(1)
 	})
 
 	it("should update modelMaxThinkingTokens", () => {
@@ -74,8 +74,8 @@ describe("ThinkingBudget", () => {
 			/>,
 		)
 
-		const sliders = screen.getAllByTestId("slider")
-		fireEvent.change(sliders[1], { target: { value: "5000" } })
+		const slider = screen.getByTestId("slider")
+		fireEvent.change(slider, { target: { value: "5000" } })
 
 		expect(setApiConfigurationField).toHaveBeenCalledWith("modelMaxThinkingTokens", 5000)
 	})
@@ -95,35 +95,39 @@ describe("ThinkingBudget", () => {
 		expect(setApiConfigurationField).toHaveBeenCalledWith("modelMaxThinkingTokens", 8000) // 80% of 10000
 	})
 
+	it("should allow max thinking tokens up to 80% of max output tokens", () => {
+		render(<ThinkingBudget {...defaultProps} apiConfiguration={{ modelMaxTokens: 5000 }} />)
+
+		const slider = screen.getByTestId("slider")
+		// Max should be 80% of 5000 = 4000
+		expect(slider.getAttribute("max")).toBe("4000")
+	})
+
 	it("should use default thinking tokens if not provided", () => {
 		render(<ThinkingBudget {...defaultProps} apiConfiguration={{ modelMaxTokens: 10000 }} />)
 
 		// Default is 80% of max tokens, capped at 8192
-		const sliders = screen.getAllByTestId("slider")
-		expect(sliders[1]).toHaveValue("8000") // 80% of 10000
+		const slider = screen.getByTestId("slider")
+		expect(slider).toHaveValue("8000") // 80% of 10000
 	})
 
 	it("should use min thinking tokens of 1024", () => {
-		render(<ThinkingBudget {...defaultProps} apiConfiguration={{ modelMaxTokens: 1000 }} />)
+		render(<ThinkingBudget {...defaultProps} apiConfiguration={{ modelMaxTokens: 3000 }} />)
 
-		const sliders = screen.getAllByTestId("slider")
-		expect(sliders[1].getAttribute("min")).toBe("1024")
+		const slider = screen.getByTestId("slider")
+		expect(slider.getAttribute("min")).toBe("1024")
 	})
 
-	it("should update max tokens when slider changes", () => {
-		const setApiConfigurationField = vi.fn()
-
+	it("should cap displayed value at 80% even if stored value is higher", () => {
 		render(
 			<ThinkingBudget
 				{...defaultProps}
-				apiConfiguration={{ modelMaxTokens: 10000 }}
-				setApiConfigurationField={setApiConfigurationField}
+				apiConfiguration={{ modelMaxTokens: 5000, modelMaxThinkingTokens: 8192 }}
 			/>,
 		)
 
-		const sliders = screen.getAllByTestId("slider")
-		fireEvent.change(sliders[0], { target: { value: "12000" } })
-
-		expect(setApiConfigurationField).toHaveBeenCalledWith("modelMaxTokens", 12000)
+		const slider = screen.getByTestId("slider")
+		// Value should be capped at 4000 (80% of 5000) even though stored value is 8192
+		expect(slider).toHaveValue("4000")
 	})
 })
diff --git a/webview-ui/src/i18n/locales/ca/settings.json b/webview-ui/src/i18n/locales/ca/settings.json
index 207fc88fd9..bda230d575 100644
--- a/webview-ui/src/i18n/locales/ca/settings.json
+++ b/webview-ui/src/i18n/locales/ca/settings.json
@@ -386,6 +386,15 @@
 			"placeholder": "Per defecte: claude",
 			"maxTokensLabel": "Tokens màxims de sortida",
 			"maxTokensDescription": "Nombre màxim de tokens de sortida per a les respostes de Claude Code. El valor per defecte és 8000."
+		},
+		"maxOutputTokens": {
+			"label": "Tokens màxims de sortida",
+			"description": "Nombre màxim de tokens a reservar per a la sortida del model. El valor predeterminat és 8192.",
+			"validation": {
+				"tooHigh": "El valor excedeix el màxim del model de {{max}} tokens",
+				"tooLow": "El valor ha de ser almenys {{min}} tokens"
+			},
+			"modelSupports": "Aquest model admet fins a {{max}} tokens"
 		}
 	},
 	"browser": {
@@ -654,7 +663,6 @@
 		}
 	},
 	"thinkingBudget": {
-		"maxTokens": "Tokens màxims",
 		"maxThinkingTokens": "Tokens de pensament màxims"
 	},
 	"validation": {
diff --git a/webview-ui/src/i18n/locales/de/settings.json b/webview-ui/src/i18n/locales/de/settings.json
index 68e9f8f97d..3d08c1f99d 100644
--- a/webview-ui/src/i18n/locales/de/settings.json
+++ b/webview-ui/src/i18n/locales/de/settings.json
@@ -386,6 +386,15 @@
 			"placeholder": "Standard: claude",
 			"maxTokensLabel": "Maximale Ausgabe-Tokens",
 			"maxTokensDescription": "Maximale Anzahl an Ausgabe-Tokens für Claude Code-Antworten. Standard ist 8000."
+		},
+		"maxOutputTokens": {
+			"label": "Maximale Ausgabe-Token",
+			"description": "Maximale Anzahl von Token, die für die Modellausgabe reserviert werden sollen. Standard ist 8192.",
+			"validation": {
+				"tooHigh": "Wert überschreitet das Maximum des Modells von {{max}} Token",
+				"tooLow": "Wert muss mindestens {{min}} Token betragen"
+			},
+			"modelSupports": "Dieses Modell unterstützt bis zu {{max}} Token"
 		}
 	},
 	"browser": {
@@ -654,7 +663,6 @@
 		}
 	},
 	"thinkingBudget": {
-		"maxTokens": "Max Tokens",
 		"maxThinkingTokens": "Max Thinking Tokens"
 	},
 	"validation": {
diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json
index fa1fbab13c..e4900fedf2 100644
--- a/webview-ui/src/i18n/locales/en/settings.json
+++ b/webview-ui/src/i18n/locales/en/settings.json
@@ -367,6 +367,15 @@
 			"label": "Rate limit",
 			"description": "Minimum time between API requests."
 		},
+		"maxOutputTokens": {
+			"label": "Max Output Tokens",
+			"description": "Maximum number of tokens to reserve for model output. Default is 8192.",
+			"validation": {
+				"tooHigh": "Value exceeds model's maximum of {{max}} tokens",
+				"tooLow": "Value must be at least {{min}} tokens"
+			},
+			"modelSupports": "This model supports up to {{max}} tokens"
+		},
 		"consecutiveMistakeLimit": {
 			"label": "Error & Repetition Limit",
 			"description": "Number of consecutive errors or repeated actions before showing 'Roo is having trouble' dialog",
@@ -654,7 +663,6 @@
 		}
 	},
 	"thinkingBudget": {
-		"maxTokens": "Max Tokens",
 		"maxThinkingTokens": "Max Thinking Tokens"
 	},
 	"validation": {
diff --git a/webview-ui/src/i18n/locales/es/settings.json b/webview-ui/src/i18n/locales/es/settings.json
index 80de7042b0..48d80fd510 100644
--- a/webview-ui/src/i18n/locales/es/settings.json
+++ b/webview-ui/src/i18n/locales/es/settings.json
@@ -367,6 +367,15 @@
 			"label": "Límite de tasa",
 			"description": "Tiempo mínimo entre solicitudes de API."
 		},
+		"maxOutputTokens": {
+			"label": "Tokens máximos de salida",
+			"description": "Número máximo de tokens a reservar para la salida del modelo. El valor predeterminado es 8192.",
+			"validation": {
+				"tooHigh": "El valor excede el máximo del modelo de {{max}} tokens",
+				"tooLow": "El valor debe ser al menos {{min}} tokens"
+			},
+			"modelSupports": "Este modelo admite hasta {{max}} tokens"
+		},
 		"consecutiveMistakeLimit": {
 			"label": "Límite de errores y repeticiones",
 			"description": "Número de errores consecutivos o acciones repetidas antes de mostrar el diálogo 'Roo está teniendo problemas'",
@@ -654,7 +663,6 @@
 		}
 	},
 	"thinkingBudget": {
-		"maxTokens": "Tokens máximos",
 		"maxThinkingTokens": "Tokens máximos de pensamiento"
 	},
 	"validation": {
diff --git a/webview-ui/src/i18n/locales/fr/settings.json b/webview-ui/src/i18n/locales/fr/settings.json
index e10b3ae35f..2c2346ed48 100644
--- a/webview-ui/src/i18n/locales/fr/settings.json
+++ b/webview-ui/src/i18n/locales/fr/settings.json
@@ -386,6 +386,15 @@
 			"placeholder": "Défaut : claude",
 			"maxTokensLabel": "Jetons de sortie max",
 			"maxTokensDescription": "Nombre maximum de jetons de sortie pour les réponses de Claude Code. La valeur par défaut est 8000."
+		},
+		"maxOutputTokens": {
+			"label": "Tokens de sortie maximum",
+			"description": "Nombre maximum de tokens à réserver pour la sortie du modèle. La valeur par défaut est 8192.",
+			"validation": {
+				"tooHigh": "La valeur dépasse le maximum du modèle de {{max}} tokens",
+				"tooLow": "La valeur doit être d'au moins {{min}} tokens"
+			},
+			"modelSupports": "Ce modèle prend en charge jusqu'à {{max}} tokens"
 		}
 	},
 	"browser": {
@@ -654,7 +663,6 @@
 		}
 	},
 	"thinkingBudget": {
-		"maxTokens": "Tokens maximum",
 		"maxThinkingTokens": "Tokens de réflexion maximum"
 	},
 	"validation": {
diff --git a/webview-ui/src/i18n/locales/hi/settings.json b/webview-ui/src/i18n/locales/hi/settings.json
index 5b64359f8f..5387175f5c 100644
--- a/webview-ui/src/i18n/locales/hi/settings.json
+++ b/webview-ui/src/i18n/locales/hi/settings.json
@@ -386,6 +386,15 @@
 			"placeholder": "डिफ़ॉल्ट: claude",
 			"maxTokensLabel": "अधिकतम आउटपुट टोकन",
 			"maxTokensDescription": "Claude Code प्रतिक्रियाओं के लिए आउटपुट टोकन की अधिकतम संख्या। डिफ़ॉल्ट 8000 है।"
+		},
+		"maxOutputTokens": {
+			"label": "अधिकतम आउटपुट टोकन",
+			"description": "मॉडल आउटपुट के लिए आरक्षित करने के लिए टोकन की अधिकतम संख्या। डिफ़ॉल्ट 8192 है।",
+			"validation": {
+				"tooHigh": "मान मॉडल के अधिकतम {{max}} टोकन से अधिक है",
+				"tooLow": "मान कम से कम {{min}} टोकन होना चाहिए"
+			},
+			"modelSupports": "यह मॉडल {{max}} टोकन तक का समर्थन करता है"
 		}
 	},
 	"browser": {
@@ -654,7 +663,6 @@
 		}
 	},
 	"thinkingBudget": {
-		"maxTokens": "अधिकतम tokens",
 		"maxThinkingTokens": "अधिकतम thinking tokens"
 	},
 	"validation": {
diff --git a/webview-ui/src/i18n/locales/id/settings.json b/webview-ui/src/i18n/locales/id/settings.json
index 24404dec3d..41707c3707 100644
--- a/webview-ui/src/i18n/locales/id/settings.json
+++ b/webview-ui/src/i18n/locales/id/settings.json
@@ -390,6 +390,15 @@
 			"placeholder": "Default: claude",
 			"maxTokensLabel": "Token Output Maks",
 			"maxTokensDescription": "Jumlah maksimum token output untuk respons Claude Code. Default adalah 8000."
+		},
+		"maxOutputTokens": {
+			"label": "Token Output Maksimum",
+			"description": "Jumlah maksimum token yang akan dipesan untuk output model. Default adalah 8192.",
+			"validation": {
+				"tooHigh": "Nilai melebihi maksimum model {{max}} token",
+				"tooLow": "Nilai harus minimal {{min}} token"
+			},
+			"modelSupports": "Model ini mendukung hingga {{max}} token"
 		}
 	},
 	"browser": {
@@ -683,7 +692,6 @@
 		}
 	},
 	"thinkingBudget": {
-		"maxTokens": "Token Maksimum",
 		"maxThinkingTokens": "Token Thinking Maksimum"
 	},
 	"validation": {
diff --git a/webview-ui/src/i18n/locales/it/settings.json b/webview-ui/src/i18n/locales/it/settings.json
index bacf7a40ef..72b5fb0cb4 100644
--- a/webview-ui/src/i18n/locales/it/settings.json
+++ b/webview-ui/src/i18n/locales/it/settings.json
@@ -386,6 +386,15 @@
 			"placeholder": "Predefinito: claude",
 			"maxTokensLabel": "Token di output massimi",
 			"maxTokensDescription": "Numero massimo di token di output per le risposte di Claude Code. Il valore predefinito è 8000."
+		},
+		"maxOutputTokens": {
+			"label": "Token di output massimi",
+			"description": "Numero massimo di token da riservare per l'output del modello. Il valore predefinito è 8192.",
+			"validation": {
+				"tooHigh": "Il valore supera il massimo del modello di {{max}} token",
+				"tooLow": "Il valore deve essere almeno {{min}} token"
+			},
+			"modelSupports": "Questo modello supporta fino a {{max}} token"
 		}
 	},
 	"browser": {
@@ -654,7 +663,6 @@
 		}
 	},
 	"thinkingBudget": {
-		"maxTokens": "Token massimi",
 		"maxThinkingTokens": "Token massimi di pensiero"
 	},
 	"validation": {
diff --git a/webview-ui/src/i18n/locales/ja/settings.json b/webview-ui/src/i18n/locales/ja/settings.json
index c588edead2..490c92c8c9 100644
--- a/webview-ui/src/i18n/locales/ja/settings.json
+++ b/webview-ui/src/i18n/locales/ja/settings.json
@@ -386,6 +386,15 @@
 			"placeholder": "デフォルト：claude",
 			"maxTokensLabel": "最大出力トークン",
 			"maxTokensDescription": "Claude Codeレスポンスの最大出力トークン数。デフォルトは8000です。"
+		},
+		"maxOutputTokens": {
+			"label": "最大出力トークン",
+			"description": "モデル出力用に予約するトークンの最大数。デフォルトは8192です。",
+			"validation": {
+				"tooHigh": "値がモデルの最大値{{max}}トークンを超えています",
+				"tooLow": "値は最低{{min}}トークンである必要があります"
+			},
+			"modelSupports": "このモデルは最大{{max}}トークンをサポートします"
 		}
 	},
 	"browser": {
@@ -654,7 +663,6 @@
 		}
 	},
 	"thinkingBudget": {
-		"maxTokens": "最大 tokens",
 		"maxThinkingTokens": "最大思考 tokens"
 	},
 	"validation": {
diff --git a/webview-ui/src/i18n/locales/ko/settings.json b/webview-ui/src/i18n/locales/ko/settings.json
index 84c329ffd7..024fe6b544 100644
--- a/webview-ui/src/i18n/locales/ko/settings.json
+++ b/webview-ui/src/i18n/locales/ko/settings.json
@@ -386,6 +386,15 @@
 			"placeholder": "기본값: claude",
 			"maxTokensLabel": "최대 출력 토큰",
 			"maxTokensDescription": "Claude Code 응답의 최대 출력 토큰 수. 기본값은 8000입니다."
+		},
+		"maxOutputTokens": {
+			"label": "최대 출력 토큰",
+			"description": "모델 출력을 위해 예약할 최대 토큰 수입니다. 기본값은 8192입니다.",
+			"validation": {
+				"tooHigh": "값이 모델의 최대값 {{max}} 토큰을 초과합니다",
+				"tooLow": "값은 최소 {{min}} 토큰이어야 합니다"
+			},
+			"modelSupports": "이 모델은 최대 {{max}} 토큰을 지원합니다"
 		}
 	},
 	"browser": {
@@ -654,7 +663,6 @@
 		}
 	},
 	"thinkingBudget": {
-		"maxTokens": "최대 tokens",
 		"maxThinkingTokens": "최대 사고 tokens"
 	},
 	"validation": {
diff --git a/webview-ui/src/i18n/locales/nl/settings.json b/webview-ui/src/i18n/locales/nl/settings.json
index 343008ae51..b9a239367d 100644
--- a/webview-ui/src/i18n/locales/nl/settings.json
+++ b/webview-ui/src/i18n/locales/nl/settings.json
@@ -386,6 +386,15 @@
 			"placeholder": "Standaard: claude",
 			"maxTokensLabel": "Max Output Tokens",
 			"maxTokensDescription": "Maximaal aantal output-tokens voor Claude Code-reacties. Standaard is 8000."
+		},
+		"maxOutputTokens": {
+			"label": "Maximale uitvoertokens",
+			"description": "Maximum aantal tokens om te reserveren voor modeluitvoer. Standaard is 8192.",
+			"validation": {
+				"tooHigh": "Waarde overschrijdt het maximum van het model van {{max}} tokens",
+				"tooLow": "Waarde moet minimaal {{min}} tokens zijn"
+			},
+			"modelSupports": "Dit model ondersteunt tot {{max}} tokens"
 		}
 	},
 	"browser": {
@@ -654,7 +663,6 @@
 		}
 	},
 	"thinkingBudget": {
-		"maxTokens": "Max tokens",
 		"maxThinkingTokens": "Max denk-tokens"
 	},
 	"validation": {
diff --git a/webview-ui/src/i18n/locales/pl/settings.json b/webview-ui/src/i18n/locales/pl/settings.json
index 2f300384e4..981fae0625 100644
--- a/webview-ui/src/i18n/locales/pl/settings.json
+++ b/webview-ui/src/i18n/locales/pl/settings.json
@@ -386,6 +386,15 @@
 			"placeholder": "Domyślnie: claude",
 			"maxTokensLabel": "Maksymalna liczba tokenów wyjściowych",
 			"maxTokensDescription": "Maksymalna liczba tokenów wyjściowych dla odpowiedzi Claude Code. Domyślnie 8000."
+		},
+		"maxOutputTokens": {
+			"label": "Maksymalne tokeny wyjściowe",
+			"description": "Maksymalna liczba tokenów do zarezerwowania dla wyjścia modelu. Domyślnie 8192.",
+			"validation": {
+				"tooHigh": "Wartość przekracza maksimum modelu {{max}} tokenów",
+				"tooLow": "Wartość musi wynosić co najmniej {{min}} tokenów"
+			},
+			"modelSupports": "Ten model obsługuje do {{max}} tokenów"
 		}
 	},
 	"browser": {
@@ -654,7 +663,6 @@
 		}
 	},
 	"thinkingBudget": {
-		"maxTokens": "Maksymalna liczba tokenów",
 		"maxThinkingTokens": "Maksymalna liczba tokenów myślenia"
 	},
 	"validation": {
diff --git a/webview-ui/src/i18n/locales/pt-BR/settings.json b/webview-ui/src/i18n/locales/pt-BR/settings.json
index 11e4d01aac..656f8ab1cb 100644
--- a/webview-ui/src/i18n/locales/pt-BR/settings.json
+++ b/webview-ui/src/i18n/locales/pt-BR/settings.json
@@ -386,6 +386,15 @@
 			"placeholder": "Padrão: claude",
 			"maxTokensLabel": "Tokens de saída máximos",
 			"maxTokensDescription": "Número máximo de tokens de saída para respostas do Claude Code. O padrão é 8000."
+		},
+		"maxOutputTokens": {
+			"label": "Tokens máximos de saída",
+			"description": "Número máximo de tokens a reservar para a saída do modelo. O padrão é 8192.",
+			"validation": {
+				"tooHigh": "O valor excede o máximo do modelo de {{max}} tokens",
+				"tooLow": "O valor deve ser pelo menos {{min}} tokens"
+			},
+			"modelSupports": "Este modelo suporta até {{max}} tokens"
 		}
 	},
 	"browser": {
@@ -654,7 +663,6 @@
 		}
 	},
 	"thinkingBudget": {
-		"maxTokens": "Tokens máximos",
 		"maxThinkingTokens": "Tokens máximos de pensamento"
 	},
 	"validation": {
diff --git a/webview-ui/src/i18n/locales/ru/settings.json b/webview-ui/src/i18n/locales/ru/settings.json
index 35eb9b1966..50b17808b9 100644
--- a/webview-ui/src/i18n/locales/ru/settings.json
+++ b/webview-ui/src/i18n/locales/ru/settings.json
@@ -386,6 +386,15 @@
 			"placeholder": "По умолчанию: claude",
 			"maxTokensLabel": "Макс. выходных токенов",
 			"maxTokensDescription": "Максимальное количество выходных токенов для ответов Claude Code. По умолчанию 8000."
+		},
+		"maxOutputTokens": {
+			"label": "Максимальное количество выходных токенов",
+			"description": "Максимальное количество токенов для резервирования для вывода модели. По умолчанию 8192.",
+			"validation": {
+				"tooHigh": "Значение превышает максимум модели {{max}} токенов",
+				"tooLow": "Значение должно быть не менее {{min}} токенов"
+			},
+			"modelSupports": "Эта модель поддерживает до {{max}} токенов"
 		}
 	},
 	"browser": {
@@ -654,7 +663,6 @@
 		}
 	},
 	"thinkingBudget": {
-		"maxTokens": "Максимум токенов",
 		"maxThinkingTokens": "Максимум токенов на размышления"
 	},
 	"validation": {
diff --git a/webview-ui/src/i18n/locales/tr/settings.json b/webview-ui/src/i18n/locales/tr/settings.json
index 9b90c97fb3..0765ad6d10 100644
--- a/webview-ui/src/i18n/locales/tr/settings.json
+++ b/webview-ui/src/i18n/locales/tr/settings.json
@@ -386,6 +386,15 @@
 			"placeholder": "Varsayılan: claude",
 			"maxTokensLabel": "Maksimum Çıktı Token sayısı",
 			"maxTokensDescription": "Claude Code yanıtları için maksimum çıktı token sayısı. Varsayılan 8000'dir."
+		},
+		"maxOutputTokens": {
+			"label": "Maksimum Çıktı Belirteci",
+			"description": "Model çıktısı için ayrılacak maksimum belirteç sayısı. Varsayılan 8192'dir.",
+			"validation": {
+				"tooHigh": "Değer, modelin maksimum {{max}} belirteçini aşıyor",
+				"tooLow": "Değer en az {{min}} belirteç olmalıdır"
+			},
+			"modelSupports": "Bu model {{max}} belirtece kadar destekler"
 		}
 	},
 	"browser": {
@@ -654,7 +663,6 @@
 		}
 	},
 	"thinkingBudget": {
-		"maxTokens": "Maksimum token",
 		"maxThinkingTokens": "Maksimum düşünme tokeni"
 	},
 	"validation": {
diff --git a/webview-ui/src/i18n/locales/vi/settings.json b/webview-ui/src/i18n/locales/vi/settings.json
index 19b574af36..49af60800f 100644
--- a/webview-ui/src/i18n/locales/vi/settings.json
+++ b/webview-ui/src/i18n/locales/vi/settings.json
@@ -386,6 +386,15 @@
 			"placeholder": "Mặc định: claude",
 			"maxTokensLabel": "Số token đầu ra tối đa",
 			"maxTokensDescription": "Số lượng token đầu ra tối đa cho các phản hồi của Claude Code. Mặc định là 8000."
+		},
+		"maxOutputTokens": {
+			"label": "Token đầu ra tối đa",
+			"description": "Số lượng token tối đa để dành cho đầu ra của mô hình. Mặc định là 8192.",
+			"validation": {
+				"tooHigh": "Giá trị vượt quá tối đa của mô hình là {{max}} token",
+				"tooLow": "Giá trị phải ít nhất {{min}} token"
+			},
+			"modelSupports": "Mô hình này hỗ trợ tối đa {{max}} token"
 		}
 	},
 	"browser": {
@@ -654,7 +663,6 @@
 		}
 	},
 	"thinkingBudget": {
-		"maxTokens": "Tokens tối đa",
 		"maxThinkingTokens": "Tokens suy nghĩ tối đa"
 	},
 	"validation": {
diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json
index 402de1e683..feb3bd76dc 100644
--- a/webview-ui/src/i18n/locales/zh-CN/settings.json
+++ b/webview-ui/src/i18n/locales/zh-CN/settings.json
@@ -386,6 +386,15 @@
 			"placeholder": "默认：claude",
 			"maxTokensLabel": "最大输出 Token",
 			"maxTokensDescription": "Claude Code 响应的最大输出 Token 数量。默认为 8000。"
+		},
+		"maxOutputTokens": {
+			"label": "最大输出令牌",
+			"description": "为模型输出保留的最大令牌数。默认值为 8192。",
+			"validation": {
+				"tooHigh": "值超过模型的最大值 {{max}} 个令牌",
+				"tooLow": "值必须至少为 {{min}} 个令牌"
+			},
+			"modelSupports": "此模型支持最多 {{max}} 个令牌"
 		}
 	},
 	"browser": {
@@ -654,7 +663,6 @@
 		}
 	},
 	"thinkingBudget": {
-		"maxTokens": "最大Token数",
 		"maxThinkingTokens": "最大思考Token数"
 	},
 	"validation": {
diff --git a/webview-ui/src/i18n/locales/zh-TW/settings.json b/webview-ui/src/i18n/locales/zh-TW/settings.json
index 068e64d21f..86fb14e82f 100644
--- a/webview-ui/src/i18n/locales/zh-TW/settings.json
+++ b/webview-ui/src/i18n/locales/zh-TW/settings.json
@@ -386,6 +386,15 @@
 			"placeholder": "預設：claude",
 			"maxTokensLabel": "最大輸出 Token",
 			"maxTokensDescription": "Claude Code 回應的最大輸出 Token 數量。預設為 8000。"
+		},
+		"maxOutputTokens": {
+			"label": "最大輸出令牌",
+			"description": "為模型輸出保留的最大令牌數。預設值為 8192。",
+			"validation": {
+				"tooHigh": "值超過模型的最大值 {{max}} 個令牌",
+				"tooLow": "值必須至少為 {{min}} 個令牌"
+			},
+			"modelSupports": "此模型支援最多 {{max}} 個令牌"
 		}
 	},
 	"browser": {
@@ -654,7 +663,6 @@
 		}
 	},
 	"thinkingBudget": {
-		"maxTokens": "最大 token 數",
 		"maxThinkingTokens": "最大思考 token 數"
 	},
 	"validation": {