@@ -34,6 +34,9 @@ vi.mock("../fetchers/modelCache", () => ({
3434 "GPT-5" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
3535 "gpt-5-turbo" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
3636 "gpt5-preview" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
37+ "gpt-5o" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
38+ "gpt-5.1" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
39+ "gpt-5-mini" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
3740 "gpt-4" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
3841 "claude-3-opus" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
3942 "llama-3" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
@@ -200,7 +203,16 @@ describe("LiteLLMHandler", () => {
200203 } )
201204
202205 it ( "should use max_completion_tokens for various GPT-5 model variations" , async ( ) => {
203- const gpt5Variations = [ "gpt-5" , "gpt5" , "GPT-5" , "gpt-5-turbo" , "gpt5-preview" ]
206+ const gpt5Variations = [
207+ "gpt-5" ,
208+ "gpt5" ,
209+ "GPT-5" ,
210+ "gpt-5-turbo" ,
211+ "gpt5-preview" ,
212+ "gpt-5o" ,
213+ "gpt-5.1" ,
214+ "gpt-5-mini" ,
215+ ]
204216
205217 for ( const modelId of gpt5Variations ) {
206218 vi . clearAllMocks ( )
@@ -308,5 +320,72 @@ describe("LiteLLMHandler", () => {
308320 expect ( createCall . max_completion_tokens ) . toBeDefined ( )
309321 expect ( createCall . max_tokens ) . toBeUndefined ( )
310322 } )
323+
324+ it ( "should not set any max token fields when maxTokens is undefined (GPT-5 streaming)" , async ( ) => {
325+ const optionsWithGPT5 : ApiHandlerOptions = {
326+ ...mockOptions ,
327+ litellmModelId : "gpt-5" ,
328+ }
329+ handler = new LiteLLMHandler ( optionsWithGPT5 )
330+
331+ // Force fetchModel to return undefined maxTokens
332+ vi . spyOn ( handler as any , "fetchModel" ) . mockResolvedValue ( {
333+ id : "gpt-5" ,
334+ info : { ...litellmDefaultModelInfo , maxTokens : undefined } ,
335+ } )
336+
337+ // Mock the stream response
338+ const mockStream = {
339+ async * [ Symbol . asyncIterator ] ( ) {
340+ yield {
341+ choices : [ { delta : { content : "Hello!" } } ] ,
342+ usage : {
343+ prompt_tokens : 10 ,
344+ completion_tokens : 5 ,
345+ } ,
346+ }
347+ } ,
348+ }
349+
350+ mockCreate . mockReturnValue ( {
351+ withResponse : vi . fn ( ) . mockResolvedValue ( { data : mockStream } ) ,
352+ } )
353+
354+ const generator = handler . createMessage ( "You are a helpful assistant" , [
355+ { role : "user" , content : "Hello" } as unknown as Anthropic . Messages . MessageParam ,
356+ ] )
357+ for await ( const _chunk of generator ) {
358+ // consume
359+ }
360+
361+ // Should not include either token field
362+ const createCall = mockCreate . mock . calls [ 0 ] [ 0 ]
363+ expect ( createCall . max_tokens ) . toBeUndefined ( )
364+ expect ( createCall . max_completion_tokens ) . toBeUndefined ( )
365+ } )
366+
367+ it ( "should not set any max token fields when maxTokens is undefined (GPT-5 completePrompt)" , async ( ) => {
368+ const optionsWithGPT5 : ApiHandlerOptions = {
369+ ...mockOptions ,
370+ litellmModelId : "gpt-5" ,
371+ }
372+ handler = new LiteLLMHandler ( optionsWithGPT5 )
373+
374+ // Force fetchModel to return undefined maxTokens
375+ vi . spyOn ( handler as any , "fetchModel" ) . mockResolvedValue ( {
376+ id : "gpt-5" ,
377+ info : { ...litellmDefaultModelInfo , maxTokens : undefined } ,
378+ } )
379+
380+ mockCreate . mockResolvedValue ( {
381+ choices : [ { message : { content : "Ok" } } ] ,
382+ } )
383+
384+ await handler . completePrompt ( "Test prompt" )
385+
386+ const createCall = mockCreate . mock . calls [ 0 ] [ 0 ]
387+ expect ( createCall . max_tokens ) . toBeUndefined ( )
388+ expect ( createCall . max_completion_tokens ) . toBeUndefined ( )
389+ } )
311390 } )
312391} )
0 commit comments