@@ -242,10 +242,10 @@ describe("OpenAiHandler", () => {
242242 expect ( callArgs . max_completion_tokens ) . toBeUndefined ( )
243243 } )
244244
245- it ( "should not include max_tokens when includeMaxTokens is undefined" , async ( ) => {
245+ it ( "should include max_completion_tokens when includeMaxTokens is undefined (default behavior) " , async ( ) => {
246246 const optionsWithUndefinedMaxTokens : ApiHandlerOptions = {
247247 ...mockOptions ,
248- // includeMaxTokens is not set, should not include max_tokens
248+ // includeMaxTokens is not set, should default to including max_completion_tokens
249249 openAiCustomModelInfo : {
250250 contextWindow : 128_000 ,
251251 maxTokens : 4096 ,
@@ -257,10 +257,10 @@ describe("OpenAiHandler", () => {
257257 // Consume the stream to trigger the API call
258258 for await ( const _chunk of stream ) {
259259 }
260- // Assert the mockCreate was called without max_tokens
260+ // Assert the mockCreate was called with max_completion_tokens (default behavior)
261261 expect ( mockCreate ) . toHaveBeenCalled ( )
262262 const callArgs = mockCreate . mock . calls [ 0 ] [ 0 ]
263- expect ( callArgs . max_completion_tokens ) . toBeUndefined ( )
263+ expect ( callArgs . max_completion_tokens ) . toBe ( 4096 )
264264 } )
265265
266266 it ( "should use user-configured modelMaxTokens instead of model default maxTokens" , async ( ) => {
@@ -306,6 +306,54 @@ describe("OpenAiHandler", () => {
306306 const callArgs = mockCreate . mock . calls [ 0 ] [ 0 ]
307307 expect ( callArgs . max_completion_tokens ) . toBe ( 4096 )
308308 } )
309+
310+ it ( "should include max_completion_tokens by default for OpenAI compatible providers" , async ( ) => {
311+ const optionsForCompatibleProvider : ApiHandlerOptions = {
312+ ...mockOptions ,
313+ // includeMaxTokens is not set, simulating OpenAI compatible provider usage
314+ openAiBaseUrl : "https://api.koboldcpp.example.com/v1" ,
315+ openAiCustomModelInfo : {
316+ contextWindow : 32_000 ,
317+ maxTokens : 4096 ,
318+ supportsPromptCache : false ,
319+ } ,
320+ }
321+ const compatibleHandler = new OpenAiHandler ( optionsForCompatibleProvider )
322+ const stream = compatibleHandler . createMessage ( systemPrompt , messages )
323+
324+ const chunks = [ ]
325+ for await ( const chunk of stream ) {
326+ chunks . push ( chunk )
327+ }
328+
329+ // Verify max_completion_tokens is included by default
330+ const callArgs = mockCreate . mock . calls [ 0 ] [ 0 ]
331+ expect ( callArgs ) . toHaveProperty ( "max_completion_tokens" , 4096 )
332+ } )
333+
334+ it ( "should respect includeMaxTokens=false even for OpenAI compatible providers" , async ( ) => {
335+ const optionsWithExplicitFalse : ApiHandlerOptions = {
336+ ...mockOptions ,
337+ includeMaxTokens : false , // Explicitly set to false
338+ openAiBaseUrl : "https://api.koboldcpp.example.com/v1" ,
339+ openAiCustomModelInfo : {
340+ contextWindow : 32_000 ,
341+ maxTokens : 4096 ,
342+ supportsPromptCache : false ,
343+ } ,
344+ }
345+ const handlerWithExplicitFalse = new OpenAiHandler ( optionsWithExplicitFalse )
346+ const stream = handlerWithExplicitFalse . createMessage ( systemPrompt , messages )
347+
348+ const chunks = [ ]
349+ for await ( const chunk of stream ) {
350+ chunks . push ( chunk )
351+ }
352+
353+ // Verify max_completion_tokens is NOT included when explicitly set to false
354+ const callArgs = mockCreate . mock . calls [ 0 ] [ 0 ]
355+ expect ( callArgs ) . not . toHaveProperty ( "max_completion_tokens" )
356+ } )
309357 } )
310358
311359 describe ( "error handling" , ( ) => {
@@ -402,6 +450,11 @@ describe("OpenAiHandler", () => {
402450 openAiBaseUrl : "https://test.services.ai.azure.com" ,
403451 openAiModelId : "deepseek-v3" ,
404452 azureApiVersion : "2024-05-01-preview" ,
453+ openAiCustomModelInfo : {
454+ contextWindow : 128_000 ,
455+ maxTokens : 4096 ,
456+ supportsPromptCache : false ,
457+ } ,
405458 }
406459
407460 it ( "should initialize with Azure AI Inference Service configuration" , ( ) => {
@@ -442,13 +495,14 @@ describe("OpenAiHandler", () => {
442495 stream : true ,
443496 stream_options : { include_usage : true } ,
444497 temperature : 0 ,
498+ max_completion_tokens : 4096 ,
445499 } ,
446500 { path : "/models/chat/completions" } ,
447501 )
448502
449- // Verify max_tokens is NOT included when includeMaxTokens is not set
503+ // Verify max_completion_tokens IS included when includeMaxTokens is not set (default behavior)
450504 const callArgs = mockCreate . mock . calls [ 0 ] [ 0 ]
451- expect ( callArgs ) . not . toHaveProperty ( "max_completion_tokens" )
505+ expect ( callArgs ) . toHaveProperty ( "max_completion_tokens" )
452506 } )
453507
454508 it ( "should handle non-streaming responses with Azure AI Inference Service" , async ( ) => {
@@ -488,13 +542,14 @@ describe("OpenAiHandler", () => {
488542 { role : "user" , content : systemPrompt } ,
489543 { role : "user" , content : "Hello!" } ,
490544 ] ,
545+ max_completion_tokens : 4096 ,
491546 } ,
492547 { path : "/models/chat/completions" } ,
493548 )
494549
495- // Verify max_tokens is NOT included when includeMaxTokens is not set
550+ // Verify max_completion_tokens IS included when includeMaxTokens is not set (default behavior)
496551 const callArgs = mockCreate . mock . calls [ 0 ] [ 0 ]
497- expect ( callArgs ) . not . toHaveProperty ( "max_completion_tokens" )
552+ expect ( callArgs ) . toHaveProperty ( "max_completion_tokens" )
498553 } )
499554
500555 it ( "should handle completePrompt with Azure AI Inference Service" , async ( ) => {
@@ -505,13 +560,14 @@ describe("OpenAiHandler", () => {
505560 {
506561 model : azureOptions . openAiModelId ,
507562 messages : [ { role : "user" , content : "Test prompt" } ] ,
563+ max_completion_tokens : 4096 ,
508564 } ,
509565 { path : "/models/chat/completions" } ,
510566 )
511567
512- // Verify max_tokens is NOT included when includeMaxTokens is not set
568+ // Verify max_completion_tokens IS included when includeMaxTokens is not set (default behavior)
513569 const callArgs = mockCreate . mock . calls [ 0 ] [ 0 ]
514- expect ( callArgs ) . not . toHaveProperty ( "max_completion_tokens" )
570+ expect ( callArgs ) . toHaveProperty ( "max_completion_tokens" )
515571 } )
516572 } )
517573
0 commit comments