diff --git a/src/core/Cline.ts b/src/core/Cline.ts index 51fb5265d41..870db843b76 100644 --- a/src/core/Cline.ts +++ b/src/core/Cline.ts @@ -1124,9 +1124,12 @@ export class Cline { const totalTokens = tokensIn + tokensOut + cacheWrites + cacheReads + // Default max tokens value for thinking models when no specific value is set + const DEFAULT_THINKING_MODEL_MAX_TOKENS = 16_384 + const modelInfo = this.api.getModel().info const maxTokens = modelInfo.thinking - ? this.apiConfiguration.modelMaxTokens || modelInfo.maxTokens + ? this.apiConfiguration.modelMaxTokens || DEFAULT_THINKING_MODEL_MAX_TOKENS : modelInfo.maxTokens const contextWindow = modelInfo.contextWindow const trimmedMessages = await truncateConversationIfNeeded({ diff --git a/webview-ui/package-lock.json b/webview-ui/package-lock.json index 2e765133f36..b52f8ab311d 100644 --- a/webview-ui/package-lock.json +++ b/webview-ui/package-lock.json @@ -14151,7 +14151,6 @@ "resolved": "https://registry.npmjs.org/jest-environment-jsdom/-/jest-environment-jsdom-29.7.0.tgz", "integrity": "sha512-k9iQbsf9OyOfdzWH8HDmrRT0gSIcX+FLNW7IQq94tFX0gynPwqDTW0Ho6iMVNjGz/nb+l/vW3dWM2bbLLpkbXA==", "dev": true, - "license": "MIT", "dependencies": { "@jest/environment": "^29.7.0", "@jest/fake-timers": "^29.7.0", diff --git a/webview-ui/src/__tests__/getMaxTokensForModel.test.tsx b/webview-ui/src/__tests__/getMaxTokensForModel.test.tsx index cf8567bf232..2a55ca97229 100644 --- a/webview-ui/src/__tests__/getMaxTokensForModel.test.tsx +++ b/webview-ui/src/__tests__/getMaxTokensForModel.test.tsx @@ -1,4 +1,4 @@ -import { getMaxTokensForModel } from "@/utils/model-utils" +import { DEFAULT_THINKING_MODEL_MAX_TOKENS, getMaxTokensForModel } from "@/utils/model-utils" describe("getMaxTokensForModel utility from model-utils", () => { test("should return maxTokens from modelInfo when thinking is false", () => { @@ -29,7 +29,7 @@ describe("getMaxTokensForModel utility from model-utils", () => { expect(result).toBe(4096) }) - test("should fallback to modelInfo.maxTokens when thinking is true but apiConfig.modelMaxTokens is not defined", () => { + test("should fallback to DEFAULT_THINKING_MODEL_MAX_TOKENS when thinking is true but apiConfig.modelMaxTokens is not defined", () => { const modelInfo = { maxTokens: 2048, thinking: true, @@ -38,7 +38,7 @@ describe("getMaxTokensForModel utility from model-utils", () => { const apiConfig = {} const result = getMaxTokensForModel(modelInfo, apiConfig) - expect(result).toBe(2048) + expect(result).toBe(DEFAULT_THINKING_MODEL_MAX_TOKENS) }) test("should handle undefined inputs gracefully", () => { diff --git a/webview-ui/src/utils/__tests__/model-utils.test.ts b/webview-ui/src/utils/__tests__/model-utils.test.ts new file mode 100644 index 00000000000..3f667dc9618 --- /dev/null +++ b/webview-ui/src/utils/__tests__/model-utils.test.ts @@ -0,0 +1,134 @@ +/** + * @fileoverview Tests for token and model utility functions + */ + +import { + getMaxTokensForModel, + calculateTokenDistribution, + ModelInfo, + ApiConfig, + DEFAULT_THINKING_MODEL_MAX_TOKENS, +} from "../model-utils" + +describe("Model utility functions", () => { + describe("getMaxTokensForModel", () => { + /** + * Testing the specific fix in commit cc79178f: + * For thinking models, use apiConfig.modelMaxTokens if available, + * otherwise fall back to 16_384 (not modelInfo.maxTokens) + */ + + it("should return apiConfig.modelMaxTokens for thinking models when provided", () => { + const modelInfo: ModelInfo = { + thinking: true, + maxTokens: 8000, + } + + const apiConfig: ApiConfig = { + modelMaxTokens: 4000, + } + + expect(getMaxTokensForModel(modelInfo, apiConfig)).toBe(4000) + }) + + it("should return 16_384 for thinking models when modelMaxTokens not provided", () => { + const modelInfo: ModelInfo = { + thinking: true, + maxTokens: 8000, + } + + const apiConfig: ApiConfig = {} + + // This tests the specific fix: now using DEFAULT_THINKING_MODEL_MAX_TOKENS instead of falling back to modelInfo.maxTokens + expect(getMaxTokensForModel(modelInfo, apiConfig)).toBe(DEFAULT_THINKING_MODEL_MAX_TOKENS) + }) + + it("should return 16_384 for thinking models when apiConfig is undefined", () => { + const modelInfo: ModelInfo = { + thinking: true, + maxTokens: 8000, + } + + expect(getMaxTokensForModel(modelInfo, undefined)).toBe(DEFAULT_THINKING_MODEL_MAX_TOKENS) + }) + + it("should return modelInfo.maxTokens for non-thinking models", () => { + const modelInfo: ModelInfo = { + thinking: false, + maxTokens: 8000, + } + + const apiConfig: ApiConfig = { + modelMaxTokens: 4000, + } + + expect(getMaxTokensForModel(modelInfo, apiConfig)).toBe(8000) + }) + + it("should return undefined for non-thinking models with undefined maxTokens", () => { + const modelInfo: ModelInfo = { + thinking: false, + } + + const apiConfig: ApiConfig = { + modelMaxTokens: 4000, + } + + expect(getMaxTokensForModel(modelInfo, apiConfig)).toBeUndefined() + }) + + it("should return undefined when modelInfo is undefined", () => { + const apiConfig: ApiConfig = { + modelMaxTokens: 4000, + } + + expect(getMaxTokensForModel(undefined, apiConfig)).toBeUndefined() + }) + }) + + describe("calculateTokenDistribution", () => { + it("should calculate token distribution correctly", () => { + const contextWindow = 10000 + const contextTokens = 5000 + const maxTokens = 2000 + + const result = calculateTokenDistribution(contextWindow, contextTokens, maxTokens) + + expect(result.reservedForOutput).toBe(maxTokens) + expect(result.availableSize).toBe(3000) // 10000 - 5000 - 2000 + + // Percentages should sum to 100% + expect(Math.round(result.currentPercent + result.reservedPercent + result.availablePercent)).toBe(100) + }) + + it("should default to 20% of context window when maxTokens not provided", () => { + const contextWindow = 10000 + const contextTokens = 5000 + + const result = calculateTokenDistribution(contextWindow, contextTokens) + + expect(result.reservedForOutput).toBe(2000) // 20% of 10000 + expect(result.availableSize).toBe(3000) // 10000 - 5000 - 2000 + }) + + it("should handle negative or zero inputs by using positive fallbacks", () => { + const result = calculateTokenDistribution(-1000, -500) + + expect(result.currentPercent).toBe(0) + expect(result.reservedPercent).toBe(0) + expect(result.availablePercent).toBe(0) + expect(result.reservedForOutput).toBe(0) // With negative inputs, both context window and tokens become 0, so 20% of 0 is 0 + expect(result.availableSize).toBe(0) + }) + + it("should handle zero total tokens without division by zero errors", () => { + const result = calculateTokenDistribution(0, 0, 0) + + expect(result.currentPercent).toBe(0) + expect(result.reservedPercent).toBe(0) + expect(result.availablePercent).toBe(0) + expect(result.reservedForOutput).toBe(0) + expect(result.availableSize).toBe(0) + }) + }) +}) diff --git a/webview-ui/src/utils/model-utils.ts b/webview-ui/src/utils/model-utils.ts index c853bb79503..8380062eef8 100644 --- a/webview-ui/src/utils/model-utils.ts +++ b/webview-ui/src/utils/model-utils.ts @@ -2,6 +2,11 @@ * Utility functions for working with language models and tokens */ +/** + * Default maximum tokens for thinking-capable models when no specific value is provided + */ +export const DEFAULT_THINKING_MODEL_MAX_TOKENS = 16_384 + /** * Model information interface with properties used in token calculations */ @@ -70,7 +75,7 @@ export const getMaxTokensForModel = ( apiConfig: ApiConfig | undefined, ): number | undefined => { if (modelInfo?.thinking) { - return apiConfig?.modelMaxTokens || modelInfo?.maxTokens + return apiConfig?.modelMaxTokens || DEFAULT_THINKING_MODEL_MAX_TOKENS } return modelInfo?.maxTokens }