@@ -190,6 +190,95 @@ describe("getModelMaxOutputTokens", () => {
190190 expect ( result ) . toBe ( 20_000 ) // Should use model.maxTokens since it's exactly at 20%
191191 } )
192192
193+ test ( "should bypass 20% cap for GPT-5 models and use exact configured max tokens" , ( ) => {
194+ const model : ModelInfo = {
195+ contextWindow : 200_000 ,
196+ supportsPromptCache : false ,
197+ maxTokens : 128_000 , // 64% of context window, normally would be capped
198+ }
199+
200+ const settings : ProviderSettings = {
201+ apiProvider : "openai" ,
202+ }
203+
204+ // Test various GPT-5 model IDs
205+ const gpt5ModelIds = [ "gpt-5" , "gpt-5-turbo" , "GPT-5" , "openai/gpt-5-preview" , "gpt-5-32k" , "GPT-5-TURBO" ]
206+
207+ gpt5ModelIds . forEach ( ( modelId ) => {
208+ const result = getModelMaxOutputTokens ( {
209+ modelId,
210+ model,
211+ settings,
212+ format : "openai" ,
213+ } )
214+ // Should use full 128k tokens, not capped to 20% (40k)
215+ expect ( result ) . toBe ( 128_000 )
216+ } )
217+ } )
218+
219+ test ( "should still apply 20% cap to non-GPT-5 models" , ( ) => {
220+ const model : ModelInfo = {
221+ contextWindow : 200_000 ,
222+ supportsPromptCache : false ,
223+ maxTokens : 128_000 , // 64% of context window, should be capped
224+ }
225+
226+ const settings : ProviderSettings = {
227+ apiProvider : "openai" ,
228+ }
229+
230+ // Test non-GPT-5 model IDs
231+ const nonGpt5ModelIds = [ "gpt-4" , "gpt-4-turbo" , "gpt-3.5-turbo" , "claude-3-5-sonnet" , "gemini-pro" ]
232+
233+ nonGpt5ModelIds . forEach ( ( modelId ) => {
234+ const result = getModelMaxOutputTokens ( {
235+ modelId,
236+ model,
237+ settings,
238+ format : "openai" ,
239+ } )
240+ // Should be capped to 20% of context window: 200_000 * 0.2 = 40_000
241+ expect ( result ) . toBe ( 40_000 )
242+ } )
243+ } )
244+
245+ test ( "should handle GPT-5 models with various max token configurations" , ( ) => {
246+ const testCases = [
247+ {
248+ maxTokens : 128_000 ,
249+ contextWindow : 200_000 ,
250+ expected : 128_000 , // Uses full 128k
251+ } ,
252+ {
253+ maxTokens : 64_000 ,
254+ contextWindow : 200_000 ,
255+ expected : 64_000 , // Uses configured 64k
256+ } ,
257+ {
258+ maxTokens : 256_000 ,
259+ contextWindow : 400_000 ,
260+ expected : 256_000 , // Uses full 256k even though it's 64% of context
261+ } ,
262+ ]
263+
264+ testCases . forEach ( ( { maxTokens, contextWindow, expected } ) => {
265+ const model : ModelInfo = {
266+ contextWindow,
267+ supportsPromptCache : false ,
268+ maxTokens,
269+ }
270+
271+ const result = getModelMaxOutputTokens ( {
272+ modelId : "gpt-5-turbo" ,
273+ model,
274+ settings : { apiProvider : "openai" } ,
275+ format : "openai" ,
276+ } )
277+
278+ expect ( result ) . toBe ( expected )
279+ } )
280+ } )
281+
193282 test ( "should return modelMaxTokens from settings when reasoning budget is required" , ( ) => {
194283 const model : ModelInfo = {
195284 contextWindow : 200_000 ,
0 commit comments