Skip to content

Commit 5e07bc4

Browse files
feat: exclude GPT-5 models from 20% context window output token cap (#6963)
Co-authored-by: Roo Code <[email protected]>
1 parent 4eb4075 commit 5e07bc4

File tree

2 files changed

+99
-0
lines changed

2 files changed

+99
-0
lines changed

src/shared/__tests__/api.spec.ts

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,95 @@ describe("getModelMaxOutputTokens", () => {
190190
expect(result).toBe(20_000) // Should use model.maxTokens since it's exactly at 20%
191191
})
192192

193+
test("should bypass 20% cap for GPT-5 models and use exact configured max tokens", () => {
194+
const model: ModelInfo = {
195+
contextWindow: 200_000,
196+
supportsPromptCache: false,
197+
maxTokens: 128_000, // 64% of context window, normally would be capped
198+
}
199+
200+
const settings: ProviderSettings = {
201+
apiProvider: "openai",
202+
}
203+
204+
// Test various GPT-5 model IDs
205+
const gpt5ModelIds = ["gpt-5", "gpt-5-turbo", "GPT-5", "openai/gpt-5-preview", "gpt-5-32k", "GPT-5-TURBO"]
206+
207+
gpt5ModelIds.forEach((modelId) => {
208+
const result = getModelMaxOutputTokens({
209+
modelId,
210+
model,
211+
settings,
212+
format: "openai",
213+
})
214+
// Should use full 128k tokens, not capped to 20% (40k)
215+
expect(result).toBe(128_000)
216+
})
217+
})
218+
219+
test("should still apply 20% cap to non-GPT-5 models", () => {
220+
const model: ModelInfo = {
221+
contextWindow: 200_000,
222+
supportsPromptCache: false,
223+
maxTokens: 128_000, // 64% of context window, should be capped
224+
}
225+
226+
const settings: ProviderSettings = {
227+
apiProvider: "openai",
228+
}
229+
230+
// Test non-GPT-5 model IDs
231+
const nonGpt5ModelIds = ["gpt-4", "gpt-4-turbo", "gpt-3.5-turbo", "claude-3-5-sonnet", "gemini-pro"]
232+
233+
nonGpt5ModelIds.forEach((modelId) => {
234+
const result = getModelMaxOutputTokens({
235+
modelId,
236+
model,
237+
settings,
238+
format: "openai",
239+
})
240+
// Should be capped to 20% of context window: 200_000 * 0.2 = 40_000
241+
expect(result).toBe(40_000)
242+
})
243+
})
244+
245+
test("should handle GPT-5 models with various max token configurations", () => {
246+
const testCases = [
247+
{
248+
maxTokens: 128_000,
249+
contextWindow: 200_000,
250+
expected: 128_000, // Uses full 128k
251+
},
252+
{
253+
maxTokens: 64_000,
254+
contextWindow: 200_000,
255+
expected: 64_000, // Uses configured 64k
256+
},
257+
{
258+
maxTokens: 256_000,
259+
contextWindow: 400_000,
260+
expected: 256_000, // Uses full 256k even though it's 64% of context
261+
},
262+
]
263+
264+
testCases.forEach(({ maxTokens, contextWindow, expected }) => {
265+
const model: ModelInfo = {
266+
contextWindow,
267+
supportsPromptCache: false,
268+
maxTokens,
269+
}
270+
271+
const result = getModelMaxOutputTokens({
272+
modelId: "gpt-5-turbo",
273+
model,
274+
settings: { apiProvider: "openai" },
275+
format: "openai",
276+
})
277+
278+
expect(result).toBe(expected)
279+
})
280+
})
281+
193282
test("should return modelMaxTokens from settings when reasoning budget is required", () => {
194283
const model: ModelInfo = {
195284
contextWindow: 200_000,

src/shared/api.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,17 @@ export const getModelMaxOutputTokens = ({
107107
}
108108

109109
// If model has explicit maxTokens, clamp it to 20% of the context window
110+
// Exception: GPT-5 models should use their exact configured max output tokens
110111
if (model.maxTokens) {
112+
// Check if this is a GPT-5 model (case-insensitive)
113+
const isGpt5Model = modelId.toLowerCase().includes("gpt-5")
114+
115+
// GPT-5 models bypass the 20% cap and use their full configured max tokens
116+
if (isGpt5Model) {
117+
return model.maxTokens
118+
}
119+
120+
// All other models are clamped to 20% of context window
111121
return Math.min(model.maxTokens, Math.ceil(model.contextWindow * 0.2))
112122
}
113123

0 commit comments

Comments
 (0)