@@ -194,17 +194,18 @@ describe("getModelMaxOutputTokens", () => {
194194 expect ( result ) . toBe ( 20_000 ) // Should use model.maxTokens since it's exactly at 20%
195195 } )
196196
197- test ( "should apply 20% cap for GPT-5 models like other models " , ( ) => {
197+ test ( "should bypass 20% cap for GPT-5 models and use exact configured max tokens " , ( ) => {
198198 const model : ModelInfo = {
199199 contextWindow : 200_000 ,
200200 supportsPromptCache : false ,
201- maxTokens : 128_000 , // 64% of context window, should be capped
201+ maxTokens : 128_000 , // 64% of context window, normally would be capped
202202 }
203203
204204 const settings : ProviderSettings = {
205205 apiProvider : "openai" ,
206206 }
207207
208+ // Test various GPT-5 model IDs
208209 const gpt5ModelIds = [ "gpt-5" , "gpt-5-turbo" , "GPT-5" , "openai/gpt-5-preview" , "gpt-5-32k" , "GPT-5-TURBO" ]
209210
210211 gpt5ModelIds . forEach ( ( modelId ) => {
@@ -214,8 +215,8 @@ describe("getModelMaxOutputTokens", () => {
214215 settings,
215216 format : "openai" ,
216217 } )
217- // Should be capped to 20% of context window: 200_000 * 0.2 = 40_000
218- expect ( result ) . toBe ( 40_000 )
218+ // Should use full 128k tokens, not capped to 20% (40k)
219+ expect ( result ) . toBe ( 128_000 )
219220 } )
220221 } )
221222
@@ -245,11 +246,23 @@ describe("getModelMaxOutputTokens", () => {
245246 } )
246247 } )
247248
248- test ( "should cap GPT-5 models to min(model.maxTokens, 20% of contextWindow) " , ( ) => {
249+ test ( "should handle GPT-5 models with various max token configurations " , ( ) => {
249250 const testCases = [
250- { maxTokens : 128_000 , contextWindow : 200_000 , expected : 40_000 } ,
251- { maxTokens : 64_000 , contextWindow : 200_000 , expected : 40_000 } ,
252- { maxTokens : 256_000 , contextWindow : 400_000 , expected : 80_000 } ,
251+ {
252+ maxTokens : 128_000 ,
253+ contextWindow : 200_000 ,
254+ expected : 128_000 , // Uses full 128k
255+ } ,
256+ {
257+ maxTokens : 64_000 ,
258+ contextWindow : 200_000 ,
259+ expected : 64_000 , // Uses configured 64k
260+ } ,
261+ {
262+ maxTokens : 256_000 ,
263+ contextWindow : 400_000 ,
264+ expected : 256_000 , // Uses full 256k even though it's 64% of context
265+ } ,
253266 ]
254267
255268 testCases . forEach ( ( { maxTokens, contextWindow, expected } ) => {
0 commit comments