diff --git a/src/core/Cline.ts b/src/core/Cline.ts
index 51fb5265d41..870db843b76 100644
--- a/src/core/Cline.ts
+++ b/src/core/Cline.ts
@@ -1124,9 +1124,12 @@ export class Cline {
 
 			const totalTokens = tokensIn + tokensOut + cacheWrites + cacheReads
 
+			// Default max tokens value for thinking models when no specific value is set
+			const DEFAULT_THINKING_MODEL_MAX_TOKENS = 16_384
+
 			const modelInfo = this.api.getModel().info
 			const maxTokens = modelInfo.thinking
-				? this.apiConfiguration.modelMaxTokens || modelInfo.maxTokens
+				? this.apiConfiguration.modelMaxTokens || DEFAULT_THINKING_MODEL_MAX_TOKENS
 				: modelInfo.maxTokens
 			const contextWindow = modelInfo.contextWindow
 			const trimmedMessages = await truncateConversationIfNeeded({
diff --git a/webview-ui/package-lock.json b/webview-ui/package-lock.json
index 2e765133f36..b52f8ab311d 100644
--- a/webview-ui/package-lock.json
+++ b/webview-ui/package-lock.json
@@ -14151,7 +14151,6 @@
 			"resolved": "https://registry.npmjs.org/jest-environment-jsdom/-/jest-environment-jsdom-29.7.0.tgz",
 			"integrity": "sha512-k9iQbsf9OyOfdzWH8HDmrRT0gSIcX+FLNW7IQq94tFX0gynPwqDTW0Ho6iMVNjGz/nb+l/vW3dWM2bbLLpkbXA==",
 			"dev": true,
-			"license": "MIT",
 			"dependencies": {
 				"@jest/environment": "^29.7.0",
 				"@jest/fake-timers": "^29.7.0",
diff --git a/webview-ui/src/__tests__/getMaxTokensForModel.test.tsx b/webview-ui/src/__tests__/getMaxTokensForModel.test.tsx
index cf8567bf232..2a55ca97229 100644
--- a/webview-ui/src/__tests__/getMaxTokensForModel.test.tsx
+++ b/webview-ui/src/__tests__/getMaxTokensForModel.test.tsx
@@ -1,4 +1,4 @@
-import { getMaxTokensForModel } from "@/utils/model-utils"
+import { DEFAULT_THINKING_MODEL_MAX_TOKENS, getMaxTokensForModel } from "@/utils/model-utils"
 
 describe("getMaxTokensForModel utility from model-utils", () => {
 	test("should return maxTokens from modelInfo when thinking is false", () => {
@@ -29,7 +29,7 @@ describe("getMaxTokensForModel utility from model-utils", () => {
 		expect(result).toBe(4096)
 	})
 
-	test("should fallback to modelInfo.maxTokens when thinking is true but apiConfig.modelMaxTokens is not defined", () => {
+	test("should fallback to DEFAULT_THINKING_MODEL_MAX_TOKENS when thinking is true but apiConfig.modelMaxTokens is not defined", () => {
 		const modelInfo = {
 			maxTokens: 2048,
 			thinking: true,
@@ -38,7 +38,7 @@ describe("getMaxTokensForModel utility from model-utils", () => {
 		const apiConfig = {}
 
 		const result = getMaxTokensForModel(modelInfo, apiConfig)
-		expect(result).toBe(2048)
+		expect(result).toBe(DEFAULT_THINKING_MODEL_MAX_TOKENS)
 	})
 
 	test("should handle undefined inputs gracefully", () => {
diff --git a/webview-ui/src/utils/__tests__/model-utils.test.ts b/webview-ui/src/utils/__tests__/model-utils.test.ts
new file mode 100644
index 00000000000..3f667dc9618
--- /dev/null
+++ b/webview-ui/src/utils/__tests__/model-utils.test.ts
@@ -0,0 +1,134 @@
+/**
+ * @fileoverview Tests for token and model utility functions
+ */
+
+import {
+	getMaxTokensForModel,
+	calculateTokenDistribution,
+	ModelInfo,
+	ApiConfig,
+	DEFAULT_THINKING_MODEL_MAX_TOKENS,
+} from "../model-utils"
+
+describe("Model utility functions", () => {
+	describe("getMaxTokensForModel", () => {
+		/**
+		 * Testing the specific fix in commit cc79178f:
+		 * For thinking models, use apiConfig.modelMaxTokens if available,
+		 * otherwise fall back to 16_384 (not modelInfo.maxTokens)
+		 */
+
+		it("should return apiConfig.modelMaxTokens for thinking models when provided", () => {
+			const modelInfo: ModelInfo = {
+				thinking: true,
+				maxTokens: 8000,
+			}
+
+			const apiConfig: ApiConfig = {
+				modelMaxTokens: 4000,
+			}
+
+			expect(getMaxTokensForModel(modelInfo, apiConfig)).toBe(4000)
+		})
+
+		it("should return 16_384 for thinking models when modelMaxTokens not provided", () => {
+			const modelInfo: ModelInfo = {
+				thinking: true,
+				maxTokens: 8000,
+			}
+
+			const apiConfig: ApiConfig = {}
+
+			// This tests the specific fix: now using DEFAULT_THINKING_MODEL_MAX_TOKENS instead of falling back to modelInfo.maxTokens
+			expect(getMaxTokensForModel(modelInfo, apiConfig)).toBe(DEFAULT_THINKING_MODEL_MAX_TOKENS)
+		})
+
+		it("should return 16_384 for thinking models when apiConfig is undefined", () => {
+			const modelInfo: ModelInfo = {
+				thinking: true,
+				maxTokens: 8000,
+			}
+
+			expect(getMaxTokensForModel(modelInfo, undefined)).toBe(DEFAULT_THINKING_MODEL_MAX_TOKENS)
+		})
+
+		it("should return modelInfo.maxTokens for non-thinking models", () => {
+			const modelInfo: ModelInfo = {
+				thinking: false,
+				maxTokens: 8000,
+			}
+
+			const apiConfig: ApiConfig = {
+				modelMaxTokens: 4000,
+			}
+
+			expect(getMaxTokensForModel(modelInfo, apiConfig)).toBe(8000)
+		})
+
+		it("should return undefined for non-thinking models with undefined maxTokens", () => {
+			const modelInfo: ModelInfo = {
+				thinking: false,
+			}
+
+			const apiConfig: ApiConfig = {
+				modelMaxTokens: 4000,
+			}
+
+			expect(getMaxTokensForModel(modelInfo, apiConfig)).toBeUndefined()
+		})
+
+		it("should return undefined when modelInfo is undefined", () => {
+			const apiConfig: ApiConfig = {
+				modelMaxTokens: 4000,
+			}
+
+			expect(getMaxTokensForModel(undefined, apiConfig)).toBeUndefined()
+		})
+	})
+
+	describe("calculateTokenDistribution", () => {
+		it("should calculate token distribution correctly", () => {
+			const contextWindow = 10000
+			const contextTokens = 5000
+			const maxTokens = 2000
+
+			const result = calculateTokenDistribution(contextWindow, contextTokens, maxTokens)
+
+			expect(result.reservedForOutput).toBe(maxTokens)
+			expect(result.availableSize).toBe(3000) // 10000 - 5000 - 2000
+
+			// Percentages should sum to 100%
+			expect(Math.round(result.currentPercent + result.reservedPercent + result.availablePercent)).toBe(100)
+		})
+
+		it("should default to 20% of context window when maxTokens not provided", () => {
+			const contextWindow = 10000
+			const contextTokens = 5000
+
+			const result = calculateTokenDistribution(contextWindow, contextTokens)
+
+			expect(result.reservedForOutput).toBe(2000) // 20% of 10000
+			expect(result.availableSize).toBe(3000) // 10000 - 5000 - 2000
+		})
+
+		it("should handle negative or zero inputs by using positive fallbacks", () => {
+			const result = calculateTokenDistribution(-1000, -500)
+
+			expect(result.currentPercent).toBe(0)
+			expect(result.reservedPercent).toBe(0)
+			expect(result.availablePercent).toBe(0)
+			expect(result.reservedForOutput).toBe(0) // With negative inputs, both context window and tokens become 0, so 20% of 0 is 0
+			expect(result.availableSize).toBe(0)
+		})
+
+		it("should handle zero total tokens without division by zero errors", () => {
+			const result = calculateTokenDistribution(0, 0, 0)
+
+			expect(result.currentPercent).toBe(0)
+			expect(result.reservedPercent).toBe(0)
+			expect(result.availablePercent).toBe(0)
+			expect(result.reservedForOutput).toBe(0)
+			expect(result.availableSize).toBe(0)
+		})
+	})
+})
diff --git a/webview-ui/src/utils/model-utils.ts b/webview-ui/src/utils/model-utils.ts
index c853bb79503..8380062eef8 100644
--- a/webview-ui/src/utils/model-utils.ts
+++ b/webview-ui/src/utils/model-utils.ts
@@ -2,6 +2,11 @@
  * Utility functions for working with language models and tokens
  */
 
+/**
+ * Default maximum tokens for thinking-capable models when no specific value is provided
+ */
+export const DEFAULT_THINKING_MODEL_MAX_TOKENS = 16_384
+
 /**
  * Model information interface with properties used in token calculations
  */
@@ -70,7 +75,7 @@ export const getMaxTokensForModel = (
 	apiConfig: ApiConfig | undefined,
 ): number | undefined => {
 	if (modelInfo?.thinking) {
-		return apiConfig?.modelMaxTokens || modelInfo?.maxTokens
+		return apiConfig?.modelMaxTokens || DEFAULT_THINKING_MODEL_MAX_TOKENS
 	}
 	return modelInfo?.maxTokens
 }