Skip to content

Commit 93de9fa

Browse files
committed
fix: limit GPT-5 models max output tokens to 10k to prevent context overflow
- Added special handling for GPT-5 models in getModelMaxOutputTokens() - Limits max output to 10k tokens as recommended in cline/cline#5474 (comment) - Prevents context window overflow when input approaches 272k token limit - Added comprehensive tests for GPT-5 token limiting behavior Fixes #6856
1 parent bd39fe6 commit 93de9fa

File tree

2 files changed

+126
-0
lines changed

2 files changed

+126
-0
lines changed

src/shared/__tests__/api.spec.ts

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,121 @@ describe("getModelMaxOutputTokens", () => {
217217

218218
expect(getModelMaxOutputTokens({ modelId: "test", model, settings })).toBe(16_384)
219219
})
220+
221+
describe("GPT-5 models token limit", () => {
222+
test("should limit GPT-5 models to 10k max output tokens", () => {
223+
const gpt5Model: ModelInfo = {
224+
contextWindow: 400_000,
225+
maxTokens: 128_000,
226+
supportsPromptCache: true,
227+
}
228+
229+
const result = getModelMaxOutputTokens({
230+
modelId: "gpt-5-2025-08-07",
231+
model: gpt5Model,
232+
settings: {},
233+
format: "openai",
234+
})
235+
236+
expect(result).toBe(10_000)
237+
})
238+
239+
test("should limit GPT-5-mini models to 10k max output tokens", () => {
240+
const gpt5MiniModel: ModelInfo = {
241+
contextWindow: 400_000,
242+
maxTokens: 128_000,
243+
supportsPromptCache: true,
244+
}
245+
246+
const result = getModelMaxOutputTokens({
247+
modelId: "gpt-5-mini-2025-08-07",
248+
model: gpt5MiniModel,
249+
settings: {},
250+
format: "openai",
251+
})
252+
253+
expect(result).toBe(10_000)
254+
})
255+
256+
test("should limit GPT-5-nano models to 10k max output tokens", () => {
257+
const gpt5NanoModel: ModelInfo = {
258+
contextWindow: 400_000,
259+
maxTokens: 128_000,
260+
supportsPromptCache: true,
261+
}
262+
263+
const result = getModelMaxOutputTokens({
264+
modelId: "gpt-5-nano-2025-08-07",
265+
model: gpt5NanoModel,
266+
settings: {},
267+
format: "openai",
268+
})
269+
270+
expect(result).toBe(10_000)
271+
})
272+
273+
test("should respect user override for GPT-5 models but cap at 10k", () => {
274+
const gpt5Model: ModelInfo = {
275+
contextWindow: 400_000,
276+
maxTokens: 128_000,
277+
supportsPromptCache: true,
278+
}
279+
280+
// User tries to set 15k, should be capped at 10k
281+
const settings: ProviderSettings = {
282+
modelMaxTokens: 15_000,
283+
}
284+
285+
const result = getModelMaxOutputTokens({
286+
modelId: "gpt-5-2025-08-07",
287+
model: gpt5Model,
288+
settings,
289+
format: "openai",
290+
})
291+
292+
expect(result).toBe(10_000)
293+
})
294+
295+
test("should allow user to set lower than 10k for GPT-5 models", () => {
296+
const gpt5Model: ModelInfo = {
297+
contextWindow: 400_000,
298+
maxTokens: 128_000,
299+
supportsPromptCache: true,
300+
}
301+
302+
// User sets 5k, should be respected
303+
const settings: ProviderSettings = {
304+
modelMaxTokens: 5_000,
305+
}
306+
307+
const result = getModelMaxOutputTokens({
308+
modelId: "gpt-5-2025-08-07",
309+
model: gpt5Model,
310+
settings,
311+
format: "openai",
312+
})
313+
314+
expect(result).toBe(5_000)
315+
})
316+
317+
test("should not affect non-GPT-5 models", () => {
318+
const gpt4Model: ModelInfo = {
319+
contextWindow: 128_000,
320+
maxTokens: 16_384,
321+
supportsPromptCache: true,
322+
}
323+
324+
const result = getModelMaxOutputTokens({
325+
modelId: "gpt-4o",
326+
model: gpt4Model,
327+
settings: {},
328+
format: "openai",
329+
})
330+
331+
// Should use model's maxTokens since it's within 20% of context window
332+
expect(result).toBe(16_384)
333+
})
334+
})
220335
})
221336

222337
describe("shouldUseReasoningBudget", () => {

src/shared/api.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,17 @@ export const getModelMaxOutputTokens = ({
8787
return settings.claudeCodeMaxOutputTokens || CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS
8888
}
8989

90+
// Special handling for GPT-5 models to prevent context window overflow
91+
// Limit max output to 10k tokens as per https://github.com/cline/cline/issues/5474#issuecomment-3172109387
92+
if (modelId.startsWith("gpt-5")) {
93+
// Allow user override via settings, but cap at 10k
94+
const userMaxTokens = settings?.modelMaxTokens
95+
if (userMaxTokens) {
96+
return Math.min(userMaxTokens, 10000)
97+
}
98+
return 10000
99+
}
100+
90101
if (shouldUseReasoningBudget({ model, settings })) {
91102
return settings?.modelMaxTokens || DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS
92103
}

0 commit comments

Comments
 (0)