From 8acee5963d137673ee530db9a5dd9388fc03fa8c Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Fri, 3 Oct 2025 11:27:36 -0500 Subject: [PATCH] feat(shared): clamp GPT-5 max output tokens to 20% of context window; remove GPT-5 exception and update tests --- src/shared/__tests__/api.spec.ts | 29 ++++++++--------------------- src/shared/api.ts | 10 ---------- 2 files changed, 8 insertions(+), 31 deletions(-) diff --git a/src/shared/__tests__/api.spec.ts b/src/shared/__tests__/api.spec.ts index aaeb1bf444..9dc54e42d0 100644 --- a/src/shared/__tests__/api.spec.ts +++ b/src/shared/__tests__/api.spec.ts @@ -194,18 +194,17 @@ describe("getModelMaxOutputTokens", () => { expect(result).toBe(20_000) // Should use model.maxTokens since it's exactly at 20% }) - test("should bypass 20% cap for GPT-5 models and use exact configured max tokens", () => { + test("should apply 20% cap for GPT-5 models like other models", () => { const model: ModelInfo = { contextWindow: 200_000, supportsPromptCache: false, - maxTokens: 128_000, // 64% of context window, normally would be capped + maxTokens: 128_000, // 64% of context window, should be capped } const settings: ProviderSettings = { apiProvider: "openai", } - // Test various GPT-5 model IDs const gpt5ModelIds = ["gpt-5", "gpt-5-turbo", "GPT-5", "openai/gpt-5-preview", "gpt-5-32k", "GPT-5-TURBO"] gpt5ModelIds.forEach((modelId) => { @@ -215,8 +214,8 @@ describe("getModelMaxOutputTokens", () => { settings, format: "openai", }) - // Should use full 128k tokens, not capped to 20% (40k) - expect(result).toBe(128_000) + // Should be capped to 20% of context window: 200_000 * 0.2 = 40_000 + expect(result).toBe(40_000) }) }) @@ -246,23 +245,11 @@ describe("getModelMaxOutputTokens", () => { }) }) - test("should handle GPT-5 models with various max token configurations", () => { + test("should cap GPT-5 models to min(model.maxTokens, 20% of contextWindow)", () => { const testCases = [ - { - maxTokens: 128_000, - contextWindow: 200_000, - expected: 128_000, // Uses full 128k - }, - { - maxTokens: 64_000, - contextWindow: 200_000, - expected: 64_000, // Uses configured 64k - }, - { - maxTokens: 256_000, - contextWindow: 400_000, - expected: 256_000, // Uses full 256k even though it's 64% of context - }, + { maxTokens: 128_000, contextWindow: 200_000, expected: 40_000 }, + { maxTokens: 64_000, contextWindow: 200_000, expected: 40_000 }, + { maxTokens: 256_000, contextWindow: 400_000, expected: 80_000 }, ] testCases.forEach(({ maxTokens, contextWindow, expected }) => { diff --git a/src/shared/api.ts b/src/shared/api.ts index 79001cb0ad..eefbee2425 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -116,17 +116,7 @@ export const getModelMaxOutputTokens = ({ } // If model has explicit maxTokens, clamp it to 20% of the context window - // Exception: GPT-5 models should use their exact configured max output tokens if (model.maxTokens) { - // Check if this is a GPT-5 model (case-insensitive) - const isGpt5Model = modelId.toLowerCase().includes("gpt-5") - - // GPT-5 models bypass the 20% cap and use their full configured max tokens - if (isGpt5Model) { - return model.maxTokens - } - - // All other models are clamped to 20% of context window return Math.min(model.maxTokens, Math.ceil(model.contextWindow * 0.2)) }