@@ -217,6 +217,121 @@ describe("getModelMaxOutputTokens", () => {
217217
218218 expect ( getModelMaxOutputTokens ( { modelId : "test" , model, settings } ) ) . toBe ( 16_384 )
219219 } )
220+
221+ describe ( "GPT-5 models token limit" , ( ) => {
222+ test ( "should limit GPT-5 models to 10k max output tokens" , ( ) => {
223+ const gpt5Model : ModelInfo = {
224+ contextWindow : 400_000 ,
225+ maxTokens : 128_000 ,
226+ supportsPromptCache : true ,
227+ }
228+
229+ const result = getModelMaxOutputTokens ( {
230+ modelId : "gpt-5-2025-08-07" ,
231+ model : gpt5Model ,
232+ settings : { } ,
233+ format : "openai" ,
234+ } )
235+
236+ expect ( result ) . toBe ( 10_000 )
237+ } )
238+
239+ test ( "should limit GPT-5-mini models to 10k max output tokens" , ( ) => {
240+ const gpt5MiniModel : ModelInfo = {
241+ contextWindow : 400_000 ,
242+ maxTokens : 128_000 ,
243+ supportsPromptCache : true ,
244+ }
245+
246+ const result = getModelMaxOutputTokens ( {
247+ modelId : "gpt-5-mini-2025-08-07" ,
248+ model : gpt5MiniModel ,
249+ settings : { } ,
250+ format : "openai" ,
251+ } )
252+
253+ expect ( result ) . toBe ( 10_000 )
254+ } )
255+
256+ test ( "should limit GPT-5-nano models to 10k max output tokens" , ( ) => {
257+ const gpt5NanoModel : ModelInfo = {
258+ contextWindow : 400_000 ,
259+ maxTokens : 128_000 ,
260+ supportsPromptCache : true ,
261+ }
262+
263+ const result = getModelMaxOutputTokens ( {
264+ modelId : "gpt-5-nano-2025-08-07" ,
265+ model : gpt5NanoModel ,
266+ settings : { } ,
267+ format : "openai" ,
268+ } )
269+
270+ expect ( result ) . toBe ( 10_000 )
271+ } )
272+
273+ test ( "should respect user override for GPT-5 models but cap at 10k" , ( ) => {
274+ const gpt5Model : ModelInfo = {
275+ contextWindow : 400_000 ,
276+ maxTokens : 128_000 ,
277+ supportsPromptCache : true ,
278+ }
279+
280+ // User tries to set 15k, should be capped at 10k
281+ const settings : ProviderSettings = {
282+ modelMaxTokens : 15_000 ,
283+ }
284+
285+ const result = getModelMaxOutputTokens ( {
286+ modelId : "gpt-5-2025-08-07" ,
287+ model : gpt5Model ,
288+ settings,
289+ format : "openai" ,
290+ } )
291+
292+ expect ( result ) . toBe ( 10_000 )
293+ } )
294+
295+ test ( "should allow user to set lower than 10k for GPT-5 models" , ( ) => {
296+ const gpt5Model : ModelInfo = {
297+ contextWindow : 400_000 ,
298+ maxTokens : 128_000 ,
299+ supportsPromptCache : true ,
300+ }
301+
302+ // User sets 5k, should be respected
303+ const settings : ProviderSettings = {
304+ modelMaxTokens : 5_000 ,
305+ }
306+
307+ const result = getModelMaxOutputTokens ( {
308+ modelId : "gpt-5-2025-08-07" ,
309+ model : gpt5Model ,
310+ settings,
311+ format : "openai" ,
312+ } )
313+
314+ expect ( result ) . toBe ( 5_000 )
315+ } )
316+
317+ test ( "should not affect non-GPT-5 models" , ( ) => {
318+ const gpt4Model : ModelInfo = {
319+ contextWindow : 128_000 ,
320+ maxTokens : 16_384 ,
321+ supportsPromptCache : true ,
322+ }
323+
324+ const result = getModelMaxOutputTokens ( {
325+ modelId : "gpt-4o" ,
326+ model : gpt4Model ,
327+ settings : { } ,
328+ format : "openai" ,
329+ } )
330+
331+ // Should use model's maxTokens since it's within 20% of context window
332+ expect ( result ) . toBe ( 16_384 )
333+ } )
334+ } )
220335} )
221336
222337describe ( "shouldUseReasoningBudget" , ( ) => {
0 commit comments