@@ -263,14 +263,14 @@ describe("OpenAiHandler", () => {
263263 expect ( callArgs . max_completion_tokens ) . toBeUndefined ( )
264264 } )
265265
266- it ( "should use user-configured modelMaxTokens instead of model default maxTokens " , async ( ) => {
266+ it ( "should use user-configured modelMaxTokens but cap it to model's max capability " , async ( ) => {
267267 const optionsWithUserMaxTokens : ApiHandlerOptions = {
268268 ...mockOptions ,
269269 includeMaxTokens : true ,
270- modelMaxTokens : 32000 , // User-configured value
270+ modelMaxTokens : 32000 , // User tries to set higher than model supports
271271 openAiCustomModelInfo : {
272272 contextWindow : 128_000 ,
273- maxTokens : 4096 , // Model's default value (should not be used)
273+ maxTokens : 4096 , // Model's actual max capability
274274 supportsPromptCache : false ,
275275 } ,
276276 }
@@ -279,10 +279,32 @@ describe("OpenAiHandler", () => {
279279 // Consume the stream to trigger the API call
280280 for await ( const _chunk of stream ) {
281281 }
282- // Assert the mockCreate was called with user-configured modelMaxTokens (32000 ), not model default maxTokens (4096 )
282+ // Assert the mockCreate was called with the model's max capability (4096 ), not the user's request (32000 )
283283 expect ( mockCreate ) . toHaveBeenCalled ( )
284284 const callArgs = mockCreate . mock . calls [ 0 ] [ 0 ]
285- expect ( callArgs . max_completion_tokens ) . toBe ( 32000 )
285+ expect ( callArgs . max_completion_tokens ) . toBe ( 4096 )
286+ } )
287+
288+ it ( "should use user-configured modelMaxTokens when it's less than model's max" , async ( ) => {
289+ const optionsWithLowerUserMaxTokens : ApiHandlerOptions = {
290+ ...mockOptions ,
291+ includeMaxTokens : true ,
292+ modelMaxTokens : 2000 , // User sets lower than model's max
293+ openAiCustomModelInfo : {
294+ contextWindow : 128_000 ,
295+ maxTokens : 4096 , // Model's max capability
296+ supportsPromptCache : false ,
297+ } ,
298+ }
299+ const handlerWithLowerMaxTokens = new OpenAiHandler ( optionsWithLowerUserMaxTokens )
300+ const stream = handlerWithLowerMaxTokens . createMessage ( systemPrompt , messages )
301+ // Consume the stream to trigger the API call
302+ for await ( const _chunk of stream ) {
303+ }
304+ // Assert the mockCreate was called with user's setting (2000)
305+ expect ( mockCreate ) . toHaveBeenCalled ( )
306+ const callArgs = mockCreate . mock . calls [ 0 ] [ 0 ]
307+ expect ( callArgs . max_completion_tokens ) . toBe ( 2000 )
286308 } )
287309
288310 it ( "should fallback to model default maxTokens when user modelMaxTokens is not set" , async ( ) => {
0 commit comments