Skip to content

Commit f89b23c

Browse files
committed
refactor(litellm): centralize GPT-5 detection; expand variants; add undefined maxTokens guards and tests
1 parent 3d576b1 commit f89b23c

File tree

2 files changed

+88
-3
lines changed

2 files changed

+88
-3
lines changed

src/api/providers/__tests__/lite-llm.spec.ts

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ vi.mock("../fetchers/modelCache", () => ({
3434
"GPT-5": { ...litellmDefaultModelInfo, maxTokens: 8192 },
3535
"gpt-5-turbo": { ...litellmDefaultModelInfo, maxTokens: 8192 },
3636
"gpt5-preview": { ...litellmDefaultModelInfo, maxTokens: 8192 },
37+
"gpt-5o": { ...litellmDefaultModelInfo, maxTokens: 8192 },
38+
"gpt-5.1": { ...litellmDefaultModelInfo, maxTokens: 8192 },
39+
"gpt-5-mini": { ...litellmDefaultModelInfo, maxTokens: 8192 },
3740
"gpt-4": { ...litellmDefaultModelInfo, maxTokens: 8192 },
3841
"claude-3-opus": { ...litellmDefaultModelInfo, maxTokens: 8192 },
3942
"llama-3": { ...litellmDefaultModelInfo, maxTokens: 8192 },
@@ -200,7 +203,16 @@ describe("LiteLLMHandler", () => {
200203
})
201204

202205
it("should use max_completion_tokens for various GPT-5 model variations", async () => {
203-
const gpt5Variations = ["gpt-5", "gpt5", "GPT-5", "gpt-5-turbo", "gpt5-preview"]
206+
const gpt5Variations = [
207+
"gpt-5",
208+
"gpt5",
209+
"GPT-5",
210+
"gpt-5-turbo",
211+
"gpt5-preview",
212+
"gpt-5o",
213+
"gpt-5.1",
214+
"gpt-5-mini",
215+
]
204216

205217
for (const modelId of gpt5Variations) {
206218
vi.clearAllMocks()
@@ -308,5 +320,72 @@ describe("LiteLLMHandler", () => {
308320
expect(createCall.max_completion_tokens).toBeDefined()
309321
expect(createCall.max_tokens).toBeUndefined()
310322
})
323+
324+
it("should not set any max token fields when maxTokens is undefined (GPT-5 streaming)", async () => {
325+
const optionsWithGPT5: ApiHandlerOptions = {
326+
...mockOptions,
327+
litellmModelId: "gpt-5",
328+
}
329+
handler = new LiteLLMHandler(optionsWithGPT5)
330+
331+
// Force fetchModel to return undefined maxTokens
332+
vi.spyOn(handler as any, "fetchModel").mockResolvedValue({
333+
id: "gpt-5",
334+
info: { ...litellmDefaultModelInfo, maxTokens: undefined },
335+
})
336+
337+
// Mock the stream response
338+
const mockStream = {
339+
async *[Symbol.asyncIterator]() {
340+
yield {
341+
choices: [{ delta: { content: "Hello!" } }],
342+
usage: {
343+
prompt_tokens: 10,
344+
completion_tokens: 5,
345+
},
346+
}
347+
},
348+
}
349+
350+
mockCreate.mockReturnValue({
351+
withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
352+
})
353+
354+
const generator = handler.createMessage("You are a helpful assistant", [
355+
{ role: "user", content: "Hello" } as unknown as Anthropic.Messages.MessageParam,
356+
])
357+
for await (const _chunk of generator) {
358+
// consume
359+
}
360+
361+
// Should not include either token field
362+
const createCall = mockCreate.mock.calls[0][0]
363+
expect(createCall.max_tokens).toBeUndefined()
364+
expect(createCall.max_completion_tokens).toBeUndefined()
365+
})
366+
367+
it("should not set any max token fields when maxTokens is undefined (GPT-5 completePrompt)", async () => {
368+
const optionsWithGPT5: ApiHandlerOptions = {
369+
...mockOptions,
370+
litellmModelId: "gpt-5",
371+
}
372+
handler = new LiteLLMHandler(optionsWithGPT5)
373+
374+
// Force fetchModel to return undefined maxTokens
375+
vi.spyOn(handler as any, "fetchModel").mockResolvedValue({
376+
id: "gpt-5",
377+
info: { ...litellmDefaultModelInfo, maxTokens: undefined },
378+
})
379+
380+
mockCreate.mockResolvedValue({
381+
choices: [{ message: { content: "Ok" } }],
382+
})
383+
384+
await handler.completePrompt("Test prompt")
385+
386+
const createCall = mockCreate.mock.calls[0][0]
387+
expect(createCall.max_tokens).toBeUndefined()
388+
expect(createCall.max_completion_tokens).toBeUndefined()
389+
})
311390
})
312391
})

src/api/providers/lite-llm.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
3232
})
3333
}
3434

35+
private isGpt5(modelId: string): boolean {
36+
// Match gpt-5, gpt5, and variants like gpt-5o, gpt-5-turbo, gpt5-preview, gpt-5.1
37+
// Avoid matching gpt-50, gpt-500, etc.
38+
return /\bgpt-?5(?!\d)/i.test(modelId)
39+
}
40+
3541
override async *createMessage(
3642
systemPrompt: string,
3743
messages: Anthropic.Messages.MessageParam[],
@@ -108,7 +114,7 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
108114
let maxTokens: number | undefined = info.maxTokens ?? undefined
109115

110116
// Check if this is a GPT-5 model that requires max_completion_tokens instead of max_tokens
111-
const isGPT5Model = modelId.toLowerCase().includes("gpt-5") || modelId.toLowerCase().includes("gpt5")
117+
const isGPT5Model = this.isGpt5(modelId)
112118

113119
const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
114120
model: modelId,
@@ -190,7 +196,7 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
190196
const { id: modelId, info } = await this.fetchModel()
191197

192198
// Check if this is a GPT-5 model that requires max_completion_tokens instead of max_tokens
193-
const isGPT5Model = modelId.toLowerCase().includes("gpt-5") || modelId.toLowerCase().includes("gpt5")
199+
const isGPT5Model = this.isGpt5(modelId)
194200

195201
try {
196202
const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {

0 commit comments

Comments
 (0)