@@ -10,15 +10,9 @@ import { litellmDefaultModelId, litellmDefaultModelInfo } from "@roo-code/types"
1010vi . mock ( "vscode" , ( ) => ( { } ) )
1111
1212// Mock OpenAI
13- vi . mock ( "openai" , ( ) => {
14- const mockStream = {
15- [ Symbol . asyncIterator ] : vi . fn ( ) ,
16- }
17-
18- const mockCreate = vi . fn ( ) . mockReturnValue ( {
19- withResponse : vi . fn ( ) . mockResolvedValue ( { data : mockStream } ) ,
20- } )
13+ const mockCreate = vi . fn ( )
2114
15+ vi . mock ( "openai" , ( ) => {
2216 return {
2317 default : vi . fn ( ) . mockImplementation ( ( ) => ( {
2418 chat : {
@@ -35,14 +29,22 @@ vi.mock("../fetchers/modelCache", () => ({
3529 getModels : vi . fn ( ) . mockImplementation ( ( ) => {
3630 return Promise . resolve ( {
3731 [ litellmDefaultModelId ] : litellmDefaultModelInfo ,
32+ "gpt-5" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
33+ gpt5 : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
34+ "GPT-5" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
35+ "gpt-5-turbo" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
36+ "gpt5-preview" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
37+ "gpt-4" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
38+ "claude-3-opus" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
39+ "llama-3" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
40+ "gpt-4-turbo" : { ...litellmDefaultModelInfo , maxTokens : 8192 } ,
3841 } )
3942 } ) ,
4043} ) )
4144
4245describe ( "LiteLLMHandler" , ( ) => {
4346 let handler : LiteLLMHandler
4447 let mockOptions : ApiHandlerOptions
45- let mockOpenAIClient : any
4648
4749 beforeEach ( ( ) => {
4850 vi . clearAllMocks ( )
@@ -52,7 +54,6 @@ describe("LiteLLMHandler", () => {
5254 litellmModelId : litellmDefaultModelId ,
5355 }
5456 handler = new LiteLLMHandler ( mockOptions )
55- mockOpenAIClient = new OpenAI ( )
5657 } )
5758
5859 describe ( "prompt caching" , ( ) => {
@@ -85,7 +86,7 @@ describe("LiteLLMHandler", () => {
8586 } ,
8687 }
8788
88- mockOpenAIClient . chat . completions . create . mockReturnValue ( {
89+ mockCreate . mockReturnValue ( {
8990 withResponse : vi . fn ( ) . mockResolvedValue ( { data : mockStream } ) ,
9091 } )
9192
@@ -96,7 +97,7 @@ describe("LiteLLMHandler", () => {
9697 }
9798
9899 // Verify that create was called with cache control headers
99- const createCall = mockOpenAIClient . chat . completions . create . mock . calls [ 0 ] [ 0 ]
100+ const createCall = mockCreate . mock . calls [ 0 ] [ 0 ]
100101
101102 // Check system message has cache control in the proper format
102103 expect ( createCall . messages [ 0 ] ) . toMatchObject ( {
@@ -155,4 +156,157 @@ describe("LiteLLMHandler", () => {
155156 } )
156157 } )
157158 } )
159+
160+ describe ( "GPT-5 model handling" , ( ) => {
161+ it ( "should use max_completion_tokens instead of max_tokens for GPT-5 models" , async ( ) => {
162+ const optionsWithGPT5 : ApiHandlerOptions = {
163+ ...mockOptions ,
164+ litellmModelId : "gpt-5" ,
165+ }
166+ handler = new LiteLLMHandler ( optionsWithGPT5 )
167+
168+ const systemPrompt = "You are a helpful assistant"
169+ const messages : Anthropic . Messages . MessageParam [ ] = [ { role : "user" , content : "Hello" } ]
170+
171+ // Mock the stream response
172+ const mockStream = {
173+ async * [ Symbol . asyncIterator ] ( ) {
174+ yield {
175+ choices : [ { delta : { content : "Hello!" } } ] ,
176+ usage : {
177+ prompt_tokens : 10 ,
178+ completion_tokens : 5 ,
179+ } ,
180+ }
181+ } ,
182+ }
183+
184+ mockCreate . mockReturnValue ( {
185+ withResponse : vi . fn ( ) . mockResolvedValue ( { data : mockStream } ) ,
186+ } )
187+
188+ const generator = handler . createMessage ( systemPrompt , messages )
189+ const results = [ ]
190+ for await ( const chunk of generator ) {
191+ results . push ( chunk )
192+ }
193+
194+ // Verify that create was called with max_completion_tokens instead of max_tokens
195+ const createCall = mockCreate . mock . calls [ 0 ] [ 0 ]
196+
197+ // Should have max_completion_tokens, not max_tokens
198+ expect ( createCall . max_completion_tokens ) . toBeDefined ( )
199+ expect ( createCall . max_tokens ) . toBeUndefined ( )
200+ } )
201+
202+ it ( "should use max_completion_tokens for various GPT-5 model variations" , async ( ) => {
203+ const gpt5Variations = [ "gpt-5" , "gpt5" , "GPT-5" , "gpt-5-turbo" , "gpt5-preview" ]
204+
205+ for ( const modelId of gpt5Variations ) {
206+ vi . clearAllMocks ( )
207+
208+ const optionsWithGPT5 : ApiHandlerOptions = {
209+ ...mockOptions ,
210+ litellmModelId : modelId ,
211+ }
212+ handler = new LiteLLMHandler ( optionsWithGPT5 )
213+
214+ const systemPrompt = "You are a helpful assistant"
215+ const messages : Anthropic . Messages . MessageParam [ ] = [ { role : "user" , content : "Test" } ]
216+
217+ // Mock the stream response
218+ const mockStream = {
219+ async * [ Symbol . asyncIterator ] ( ) {
220+ yield {
221+ choices : [ { delta : { content : "Response" } } ] ,
222+ usage : {
223+ prompt_tokens : 10 ,
224+ completion_tokens : 5 ,
225+ } ,
226+ }
227+ } ,
228+ }
229+
230+ mockCreate . mockReturnValue ( {
231+ withResponse : vi . fn ( ) . mockResolvedValue ( { data : mockStream } ) ,
232+ } )
233+
234+ const generator = handler . createMessage ( systemPrompt , messages )
235+ for await ( const chunk of generator ) {
236+ // Consume the generator
237+ }
238+
239+ // Verify that create was called with max_completion_tokens for this model variation
240+ const createCall = mockCreate . mock . calls [ 0 ] [ 0 ]
241+
242+ expect ( createCall . max_completion_tokens ) . toBeDefined ( )
243+ expect ( createCall . max_tokens ) . toBeUndefined ( )
244+ }
245+ } )
246+
247+ it ( "should still use max_tokens for non-GPT-5 models" , async ( ) => {
248+ const nonGPT5Models = [ "gpt-4" , "claude-3-opus" , "llama-3" , "gpt-4-turbo" ]
249+
250+ for ( const modelId of nonGPT5Models ) {
251+ vi . clearAllMocks ( )
252+
253+ const options : ApiHandlerOptions = {
254+ ...mockOptions ,
255+ litellmModelId : modelId ,
256+ }
257+ handler = new LiteLLMHandler ( options )
258+
259+ const systemPrompt = "You are a helpful assistant"
260+ const messages : Anthropic . Messages . MessageParam [ ] = [ { role : "user" , content : "Test" } ]
261+
262+ // Mock the stream response
263+ const mockStream = {
264+ async * [ Symbol . asyncIterator ] ( ) {
265+ yield {
266+ choices : [ { delta : { content : "Response" } } ] ,
267+ usage : {
268+ prompt_tokens : 10 ,
269+ completion_tokens : 5 ,
270+ } ,
271+ }
272+ } ,
273+ }
274+
275+ mockCreate . mockReturnValue ( {
276+ withResponse : vi . fn ( ) . mockResolvedValue ( { data : mockStream } ) ,
277+ } )
278+
279+ const generator = handler . createMessage ( systemPrompt , messages )
280+ for await ( const chunk of generator ) {
281+ // Consume the generator
282+ }
283+
284+ // Verify that create was called with max_tokens for non-GPT-5 models
285+ const createCall = mockCreate . mock . calls [ 0 ] [ 0 ]
286+
287+ expect ( createCall . max_tokens ) . toBeDefined ( )
288+ expect ( createCall . max_completion_tokens ) . toBeUndefined ( )
289+ }
290+ } )
291+
292+ it ( "should use max_completion_tokens in completePrompt for GPT-5 models" , async ( ) => {
293+ const optionsWithGPT5 : ApiHandlerOptions = {
294+ ...mockOptions ,
295+ litellmModelId : "gpt-5" ,
296+ }
297+ handler = new LiteLLMHandler ( optionsWithGPT5 )
298+
299+ mockCreate . mockResolvedValue ( {
300+ choices : [ { message : { content : "Test response" } } ] ,
301+ } )
302+
303+ await handler . completePrompt ( "Test prompt" )
304+
305+ // Verify that create was called with max_completion_tokens
306+ const createCall = mockCreate . mock . calls [ 0 ] [ 0 ]
307+
308+ expect ( createCall . max_completion_tokens ) . toBeDefined ( )
309+ expect ( createCall . max_tokens ) . toBeUndefined ( )
310+ } )
311+ } )
158312} )
0 commit comments