@@ -9,15 +9,9 @@ import { litellmDefaultModelId, litellmDefaultModelInfo } from "@roo-code/types"
99vi . mock ( "vscode" , ( ) => ( { } ) )
1010
1111// Mock OpenAI
12- vi . mock ( "openai" , ( ) => {
13- const mockStream = {
14- [ Symbol . asyncIterator ] : vi . fn ( ) ,
15- }
16-
17- const mockCreate = vi . fn ( ) . mockReturnValue ( {
18- withResponse : vi . fn ( ) . mockResolvedValue ( { data : mockStream } ) ,
19- } )
12+ const mockCreate = vi . fn ( )
2013
14+ vi . mock ( "openai" , ( ) => {
2115 return {
2216 default : vi . fn ( ) . mockImplementation ( ( ) => ( {
2317 chat : {
@@ -34,14 +28,25 @@ vi.mock("../fetchers/modelCache", () => ({
3428 getModels : vi . fn ( ) . mockImplementation ( ( ) => {
3529 return Promise . resolve ( {
3630 [ litellmDefaultModelId ] : litellmDefaultModelInfo ,
31+ "gpt-5" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
32+ gpt5 : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
33+ "GPT-5" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
34+ "gpt-5-turbo" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
35+ "gpt5-preview" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
36+ "gpt-5o" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
37+ "gpt-5.1" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
38+ "gpt-5-mini" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
39+ "gpt-4" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
40+ "claude-3-opus" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
41+ "llama-3" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
42+ "gpt-4-turbo" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
3743 } )
3844 } ) ,
3945} ) )
4046
4147describe ( "LiteLLMHandler" , ( ) => {
4248 let handler : LiteLLMHandler
4349 let mockOptions : ApiHandlerOptions
44- let mockOpenAIClient : any
4550
4651 beforeEach ( ( ) => {
4752 vi . clearAllMocks ( )
@@ -51,7 +56,6 @@ describe("LiteLLMHandler", () => {
5156 litellmModelId : litellmDefaultModelId ,
5257 }
5358 handler = new LiteLLMHandler ( mockOptions )
54- mockOpenAIClient = new OpenAI ( )
5559 } )
5660
5761 describe ( "prompt caching" , ( ) => {
@@ -84,7 +88,7 @@ describe("LiteLLMHandler", () => {
8488 } ,
8589 }
8690
87- mockOpenAIClient . chat . completions . create . mockReturnValue ( {
91+ mockCreate . mockReturnValue ( {
8892 withResponse : vi . fn ( ) . mockResolvedValue ( { data : mockStream } ) ,
8993 } )
9094
@@ -95,7 +99,7 @@ describe("LiteLLMHandler", () => {
9599 }
96100
97101 // Verify that create was called with cache control headers
98- const createCall = mockOpenAIClient . chat . completions . create . mock . calls [ 0 ] [ 0 ]
102+ const createCall = mockCreate . mock . calls [ 0 ] [ 0 ]
99103
100104 // Check system message has cache control in the proper format
101105 expect ( createCall . messages [ 0 ] ) . toMatchObject ( {
@@ -154,4 +158,233 @@ describe("LiteLLMHandler", () => {
154158 } )
155159 } )
156160 } )
161+
162+ describe ( "GPT-5 model handling" , ( ) => {
163+ it ( "should use max_completion_tokens instead of max_tokens for GPT-5 models" , async ( ) => {
164+ const optionsWithGPT5 : ApiHandlerOptions = {
165+ ...mockOptions ,
166+ litellmModelId : "gpt-5" ,
167+ }
168+ handler = new LiteLLMHandler ( optionsWithGPT5 )
169+
170+ const systemPrompt = "You are a helpful assistant"
171+ const messages : Anthropic . Messages . MessageParam [ ] = [ { role : "user" , content : "Hello" } ]
172+
173+ // Mock the stream response
174+ const mockStream = {
175+ async * [ Symbol . asyncIterator ] ( ) {
176+ yield {
177+ choices : [ { delta : { content : "Hello!" } } ] ,
178+ usage : {
179+ prompt_tokens : 10 ,
180+ completion_tokens : 5 ,
181+ } ,
182+ }
183+ } ,
184+ }
185+
186+ mockCreate . mockReturnValue ( {
187+ withResponse : vi . fn ( ) . mockResolvedValue ( { data : mockStream } ) ,
188+ } )
189+
190+ const generator = handler . createMessage ( systemPrompt , messages )
191+ const results = [ ]
192+ for await ( const chunk of generator ) {
193+ results . push ( chunk )
194+ }
195+
196+ // Verify that create was called with max_completion_tokens instead of max_tokens
197+ const createCall = mockCreate . mock . calls [ 0 ] [ 0 ]
198+
199+ // Should have max_completion_tokens, not max_tokens
200+ expect ( createCall . max_completion_tokens ) . toBeDefined ( )
201+ expect ( createCall . max_tokens ) . toBeUndefined ( )
202+ } )
203+
204+ it ( "should use max_completion_tokens for various GPT-5 model variations" , async ( ) => {
205+ const gpt5Variations = [
206+ "gpt-5" ,
207+ "gpt5" ,
208+ "GPT-5" ,
209+ "gpt-5-turbo" ,
210+ "gpt5-preview" ,
211+ "gpt-5o" ,
212+ "gpt-5.1" ,
213+ "gpt-5-mini" ,
214+ ]
215+
216+ for ( const modelId of gpt5Variations ) {
217+ vi . clearAllMocks ( )
218+
219+ const optionsWithGPT5 : ApiHandlerOptions = {
220+ ...mockOptions ,
221+ litellmModelId : modelId ,
222+ }
223+ handler = new LiteLLMHandler ( optionsWithGPT5 )
224+
225+ const systemPrompt = "You are a helpful assistant"
226+ const messages : Anthropic . Messages . MessageParam [ ] = [ { role : "user" , content : "Test" } ]
227+
228+ // Mock the stream response
229+ const mockStream = {
230+ async * [ Symbol . asyncIterator ] ( ) {
231+ yield {
232+ choices : [ { delta : { content : "Response" } } ] ,
233+ usage : {
234+ prompt_tokens : 10 ,
235+ completion_tokens : 5 ,
236+ } ,
237+ }
238+ } ,
239+ }
240+
241+ mockCreate . mockReturnValue ( {
242+ withResponse : vi . fn ( ) . mockResolvedValue ( { data : mockStream } ) ,
243+ } )
244+
245+ const generator = handler . createMessage ( systemPrompt , messages )
246+ for await ( const chunk of generator ) {
247+ // Consume the generator
248+ }
249+
250+ // Verify that create was called with max_completion_tokens for this model variation
251+ const createCall = mockCreate . mock . calls [ 0 ] [ 0 ]
252+
253+ expect ( createCall . max_completion_tokens ) . toBeDefined ( )
254+ expect ( createCall . max_tokens ) . toBeUndefined ( )
255+ }
256+ } )
257+
258+ it ( "should still use max_tokens for non-GPT-5 models" , async ( ) => {
259+ const nonGPT5Models = [ "gpt-4" , "claude-3-opus" , "llama-3" , "gpt-4-turbo" ]
260+
261+ for ( const modelId of nonGPT5Models ) {
262+ vi . clearAllMocks ( )
263+
264+ const options : ApiHandlerOptions = {
265+ ...mockOptions ,
266+ litellmModelId : modelId ,
267+ }
268+ handler = new LiteLLMHandler ( options )
269+
270+ const systemPrompt = "You are a helpful assistant"
271+ const messages : Anthropic . Messages . MessageParam [ ] = [ { role : "user" , content : "Test" } ]
272+
273+ // Mock the stream response
274+ const mockStream = {
275+ async * [ Symbol . asyncIterator ] ( ) {
276+ yield {
277+ choices : [ { delta : { content : "Response" } } ] ,
278+ usage : {
279+ prompt_tokens : 10 ,
280+ completion_tokens : 5 ,
281+ } ,
282+ }
283+ } ,
284+ }
285+
286+ mockCreate . mockReturnValue ( {
287+ withResponse : vi . fn ( ) . mockResolvedValue ( { data : mockStream } ) ,
288+ } )
289+
290+ const generator = handler . createMessage ( systemPrompt , messages )
291+ for await ( const chunk of generator ) {
292+ // Consume the generator
293+ }
294+
295+ // Verify that create was called with max_tokens for non-GPT-5 models
296+ const createCall = mockCreate . mock . calls [ 0 ] [ 0 ]
297+
298+ expect ( createCall . max_tokens ) . toBeDefined ( )
299+ expect ( createCall . max_completion_tokens ) . toBeUndefined ( )
300+ }
301+ } )
302+
303+ it ( "should use max_completion_tokens in completePrompt for GPT-5 models" , async ( ) => {
304+ const optionsWithGPT5 : ApiHandlerOptions = {
305+ ...mockOptions ,
306+ litellmModelId : "gpt-5" ,
307+ }
308+ handler = new LiteLLMHandler ( optionsWithGPT5 )
309+
310+ mockCreate . mockResolvedValue ( {
311+ choices : [ { message : { content : "Test response" } } ] ,
312+ } )
313+
314+ await handler . completePrompt ( "Test prompt" )
315+
316+ // Verify that create was called with max_completion_tokens
317+ const createCall = mockCreate . mock . calls [ 0 ] [ 0 ]
318+
319+ expect ( createCall . max_completion_tokens ) . toBeDefined ( )
320+ expect ( createCall . max_tokens ) . toBeUndefined ( )
321+ } )
322+
323+ it ( "should not set any max token fields when maxTokens is undefined (GPT-5 streaming)" , async ( ) => {
324+ const optionsWithGPT5 : ApiHandlerOptions = {
325+ ...mockOptions ,
326+ litellmModelId : "gpt-5" ,
327+ }
328+ handler = new LiteLLMHandler ( optionsWithGPT5 )
329+
330+ // Force fetchModel to return undefined maxTokens
331+ vi . spyOn ( handler as any , "fetchModel" ) . mockResolvedValue ( {
332+ id : "gpt-5" ,
333+ info : { ...litellmDefaultModelInfo , maxTokens : undefined } ,
334+ } )
335+
336+ // Mock the stream response
337+ const mockStream = {
338+ async * [ Symbol . asyncIterator ] ( ) {
339+ yield {
340+ choices : [ { delta : { content : "Hello!" } } ] ,
341+ usage : {
342+ prompt_tokens : 10 ,
343+ completion_tokens : 5 ,
344+ } ,
345+ }
346+ } ,
347+ }
348+
349+ mockCreate . mockReturnValue ( {
350+ withResponse : vi . fn ( ) . mockResolvedValue ( { data : mockStream } ) ,
351+ } )
352+
353+ const generator = handler . createMessage ( "You are a helpful assistant" , [
354+ { role : "user" , content : "Hello" } as unknown as Anthropic . Messages . MessageParam ,
355+ ] )
356+ for await ( const _chunk of generator ) {
357+ // consume
358+ }
359+
360+ // Should not include either token field
361+ const createCall = mockCreate . mock . calls [ 0 ] [ 0 ]
362+ expect ( createCall . max_tokens ) . toBeUndefined ( )
363+ expect ( createCall . max_completion_tokens ) . toBeUndefined ( )
364+ } )
365+
366+ it ( "should not set any max token fields when maxTokens is undefined (GPT-5 completePrompt)" , async ( ) => {
367+ const optionsWithGPT5 : ApiHandlerOptions = {
368+ ...mockOptions ,
369+ litellmModelId : "gpt-5" ,
370+ }
371+ handler = new LiteLLMHandler ( optionsWithGPT5 )
372+
373+ // Force fetchModel to return undefined maxTokens
374+ vi . spyOn ( handler as any , "fetchModel" ) . mockResolvedValue ( {
375+ id : "gpt-5" ,
376+ info : { ...litellmDefaultModelInfo , maxTokens : undefined } ,
377+ } )
378+
379+ mockCreate . mockResolvedValue ( {
380+ choices : [ { message : { content : "Ok" } } ] ,
381+ } )
382+
383+ await handler . completePrompt ( "Test prompt" )
384+
385+ const createCall = mockCreate . mock . calls [ 0 ] [ 0 ]
386+ expect ( createCall . max_tokens ) . toBeUndefined ( )
387+ expect ( createCall . max_completion_tokens ) . toBeUndefined ( )
388+ } )
389+ } )
157390} )
0 commit comments