Skip to content

Commit 507a600

Browse files
authored
Revert "Clamp GPT-5 max output tokens to 20% of context window" (#8582)
1 parent b011b63 commit 507a600

File tree

2 files changed

+31
-8
lines changed

2 files changed

+31
-8
lines changed

src/shared/__tests__/api.spec.ts

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -194,17 +194,18 @@ describe("getModelMaxOutputTokens", () => {
194194
expect(result).toBe(20_000) // Should use model.maxTokens since it's exactly at 20%
195195
})
196196

197-
test("should apply 20% cap for GPT-5 models like other models", () => {
197+
test("should bypass 20% cap for GPT-5 models and use exact configured max tokens", () => {
198198
const model: ModelInfo = {
199199
contextWindow: 200_000,
200200
supportsPromptCache: false,
201-
maxTokens: 128_000, // 64% of context window, should be capped
201+
maxTokens: 128_000, // 64% of context window, normally would be capped
202202
}
203203

204204
const settings: ProviderSettings = {
205205
apiProvider: "openai",
206206
}
207207

208+
// Test various GPT-5 model IDs
208209
const gpt5ModelIds = ["gpt-5", "gpt-5-turbo", "GPT-5", "openai/gpt-5-preview", "gpt-5-32k", "GPT-5-TURBO"]
209210

210211
gpt5ModelIds.forEach((modelId) => {
@@ -214,8 +215,8 @@ describe("getModelMaxOutputTokens", () => {
214215
settings,
215216
format: "openai",
216217
})
217-
// Should be capped to 20% of context window: 200_000 * 0.2 = 40_000
218-
expect(result).toBe(40_000)
218+
// Should use full 128k tokens, not capped to 20% (40k)
219+
expect(result).toBe(128_000)
219220
})
220221
})
221222

@@ -245,11 +246,23 @@ describe("getModelMaxOutputTokens", () => {
245246
})
246247
})
247248

248-
test("should cap GPT-5 models to min(model.maxTokens, 20% of contextWindow)", () => {
249+
test("should handle GPT-5 models with various max token configurations", () => {
249250
const testCases = [
250-
{ maxTokens: 128_000, contextWindow: 200_000, expected: 40_000 },
251-
{ maxTokens: 64_000, contextWindow: 200_000, expected: 40_000 },
252-
{ maxTokens: 256_000, contextWindow: 400_000, expected: 80_000 },
251+
{
252+
maxTokens: 128_000,
253+
contextWindow: 200_000,
254+
expected: 128_000, // Uses full 128k
255+
},
256+
{
257+
maxTokens: 64_000,
258+
contextWindow: 200_000,
259+
expected: 64_000, // Uses configured 64k
260+
},
261+
{
262+
maxTokens: 256_000,
263+
contextWindow: 400_000,
264+
expected: 256_000, // Uses full 256k even though it's 64% of context
265+
},
253266
]
254267

255268
testCases.forEach(({ maxTokens, contextWindow, expected }) => {

src/shared/api.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,17 @@ export const getModelMaxOutputTokens = ({
116116
}
117117

118118
// If model has explicit maxTokens, clamp it to 20% of the context window
119+
// Exception: GPT-5 models should use their exact configured max output tokens
119120
if (model.maxTokens) {
121+
// Check if this is a GPT-5 model (case-insensitive)
122+
const isGpt5Model = modelId.toLowerCase().includes("gpt-5")
123+
124+
// GPT-5 models bypass the 20% cap and use their full configured max tokens
125+
if (isGpt5Model) {
126+
return model.maxTokens
127+
}
128+
129+
// All other models are clamped to 20% of context window
120130
return Math.min(model.maxTokens, Math.ceil(model.contextWindow * 0.2))
121131
}
122132

0 commit comments

Comments
 (0)