Skip to content

Commit 9b3baaa

Browse files
committed
refactor: improve GPT-5 token limit implementation
- Extract GPT5_MAX_OUTPUT_TOKENS as a named constant (10,000) - Improve model detection with more specific regex pattern - Add comprehensive documentation explaining the context window overflow issue - Add tests for date-suffixed models and invalid model name patterns - Update all test assertions to use the named constant
1 parent 93de9fa commit 9b3baaa

File tree

2 files changed

+76
-13
lines changed

2 files changed

+76
-13
lines changed

src/shared/__tests__/api.spec.ts

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
import { describe, test, expect } from "vitest"
2-
import { getModelMaxOutputTokens, shouldUseReasoningBudget, shouldUseReasoningEffort } from "../api"
2+
import {
3+
getModelMaxOutputTokens,
4+
shouldUseReasoningBudget,
5+
shouldUseReasoningEffort,
6+
GPT5_MAX_OUTPUT_TOKENS,
7+
} from "../api"
38
import type { ModelInfo, ProviderSettings } from "@roo-code/types"
49
import { CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS, ANTHROPIC_DEFAULT_MAX_TOKENS } from "@roo-code/types"
510

@@ -233,7 +238,7 @@ describe("getModelMaxOutputTokens", () => {
233238
format: "openai",
234239
})
235240

236-
expect(result).toBe(10_000)
241+
expect(result).toBe(GPT5_MAX_OUTPUT_TOKENS)
237242
})
238243

239244
test("should limit GPT-5-mini models to 10k max output tokens", () => {
@@ -250,7 +255,7 @@ describe("getModelMaxOutputTokens", () => {
250255
format: "openai",
251256
})
252257

253-
expect(result).toBe(10_000)
258+
expect(result).toBe(GPT5_MAX_OUTPUT_TOKENS)
254259
})
255260

256261
test("should limit GPT-5-nano models to 10k max output tokens", () => {
@@ -267,17 +272,17 @@ describe("getModelMaxOutputTokens", () => {
267272
format: "openai",
268273
})
269274

270-
expect(result).toBe(10_000)
275+
expect(result).toBe(GPT5_MAX_OUTPUT_TOKENS)
271276
})
272277

273-
test("should respect user override for GPT-5 models but cap at 10k", () => {
278+
test("should respect user override for GPT-5 models but cap at GPT5_MAX_OUTPUT_TOKENS", () => {
274279
const gpt5Model: ModelInfo = {
275280
contextWindow: 400_000,
276281
maxTokens: 128_000,
277282
supportsPromptCache: true,
278283
}
279284

280-
// User tries to set 15k, should be capped at 10k
285+
// User tries to set 15k, should be capped at GPT5_MAX_OUTPUT_TOKENS
281286
const settings: ProviderSettings = {
282287
modelMaxTokens: 15_000,
283288
}
@@ -289,10 +294,10 @@ describe("getModelMaxOutputTokens", () => {
289294
format: "openai",
290295
})
291296

292-
expect(result).toBe(10_000)
297+
expect(result).toBe(GPT5_MAX_OUTPUT_TOKENS)
293298
})
294299

295-
test("should allow user to set lower than 10k for GPT-5 models", () => {
300+
test("should allow user to set lower than GPT5_MAX_OUTPUT_TOKENS for GPT-5 models", () => {
296301
const gpt5Model: ModelInfo = {
297302
contextWindow: 400_000,
298303
maxTokens: 128_000,
@@ -331,6 +336,54 @@ describe("getModelMaxOutputTokens", () => {
331336
// Should use model's maxTokens since it's within 20% of context window
332337
expect(result).toBe(16_384)
333338
})
339+
340+
test("should handle GPT-5 models with date suffixes", () => {
341+
const gpt5Model: ModelInfo = {
342+
contextWindow: 400_000,
343+
maxTokens: 128_000,
344+
supportsPromptCache: true,
345+
}
346+
347+
// Test various date-suffixed GPT-5 models
348+
const modelIds = ["gpt-5-2025-08-07", "gpt-5-mini-2025-08-07", "gpt-5-nano-2025-08-07"]
349+
350+
modelIds.forEach((modelId) => {
351+
const result = getModelMaxOutputTokens({
352+
modelId,
353+
model: gpt5Model,
354+
settings: {},
355+
format: "openai",
356+
})
357+
expect(result).toBe(GPT5_MAX_OUTPUT_TOKENS)
358+
})
359+
})
360+
361+
test("should not match invalid GPT-5 model names", () => {
362+
const model: ModelInfo = {
363+
contextWindow: 128_000,
364+
maxTokens: 16_384,
365+
supportsPromptCache: true,
366+
}
367+
368+
// These should NOT be treated as GPT-5 models
369+
const invalidModelIds = [
370+
"gpt-5-turbo", // Invalid variant
371+
"gpt-50", // Different number
372+
"gpt-5-", // Incomplete
373+
"gpt-5-mini-turbo", // Invalid variant combination
374+
]
375+
376+
invalidModelIds.forEach((modelId) => {
377+
const result = getModelMaxOutputTokens({
378+
modelId,
379+
model,
380+
settings: {},
381+
format: "openai",
382+
})
383+
// Should use model's maxTokens since it's within 20% of context window
384+
expect(result).toBe(16_384)
385+
})
386+
})
334387
})
335388
})
336389

src/shared/api.ts

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,15 @@ export const DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS = 16_384
6969
export const DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS = 8_192
7070
export const GEMINI_25_PRO_MIN_THINKING_TOKENS = 128
7171

72+
// GPT-5 specific constants
73+
/**
74+
* Maximum output tokens for GPT-5 models to prevent context window overflow.
75+
* When input approaches the 272k limit, the model's 128k max output can exceed
76+
* the total 400k context window, causing API errors.
77+
* @see https://github.com/cline/cline/issues/5474#issuecomment-3172109387
78+
*/
79+
export const GPT5_MAX_OUTPUT_TOKENS = 10_000
80+
7281
// Max Tokens
7382

7483
export const getModelMaxOutputTokens = ({
@@ -88,14 +97,15 @@ export const getModelMaxOutputTokens = ({
8897
}
8998

9099
// Special handling for GPT-5 models to prevent context window overflow
91-
// Limit max output to 10k tokens as per https://github.com/cline/cline/issues/5474#issuecomment-3172109387
92-
if (modelId.startsWith("gpt-5")) {
93-
// Allow user override via settings, but cap at 10k
100+
// GPT-5 models include: gpt-5, gpt-5-mini, gpt-5-nano, and dated variants
101+
const isGpt5Model = /^gpt-5(-mini|-nano)?(-\d{4}-\d{2}-\d{2})?$/i.test(modelId)
102+
if (isGpt5Model) {
103+
// Allow user override via settings, but cap at GPT5_MAX_OUTPUT_TOKENS
94104
const userMaxTokens = settings?.modelMaxTokens
95105
if (userMaxTokens) {
96-
return Math.min(userMaxTokens, 10000)
106+
return Math.min(userMaxTokens, GPT5_MAX_OUTPUT_TOKENS)
97107
}
98-
return 10000
108+
return GPT5_MAX_OUTPUT_TOKENS
99109
}
100110

101111
if (shouldUseReasoningBudget({ model, settings })) {

0 commit comments

Comments
 (0)