Skip to content

Commit a113acc

Browse files
committed
fix: use max_output_tokens when available in LiteLLM fetcher
- Prefer max_output_tokens over max_tokens for maxTokens field - Fixes issue where Claude Sonnet 4.5 via Google Vertex was using incorrect token limit - Added comprehensive test coverage for the new behavior Fixes #8454
1 parent 13534cc commit a113acc

File tree

2 files changed

+88
-1
lines changed

2 files changed

+88
-1
lines changed

src/api/providers/fetchers/__tests__/litellm.spec.ts

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,4 +602,91 @@ describe("getLiteLLMModels", () => {
602602
expect(result["openrouter-claude"].supportsComputerUse).toBe(true)
603603
expect(result["bedrock-claude"].supportsComputerUse).toBe(true)
604604
})
605+
606+
it("prefers max_output_tokens over max_tokens when both are present", async () => {
607+
const mockResponse = {
608+
data: {
609+
data: [
610+
{
611+
model_name: "claude-3-5-sonnet-4-5",
612+
model_info: {
613+
max_tokens: 200000, // This should be ignored
614+
max_output_tokens: 64000, // This should be used
615+
max_input_tokens: 200000,
616+
supports_vision: true,
617+
supports_prompt_caching: false,
618+
supports_computer_use: true,
619+
},
620+
litellm_params: {
621+
model: "anthropic/claude-3-5-sonnet-4-5",
622+
},
623+
},
624+
{
625+
model_name: "model-with-only-max-tokens",
626+
model_info: {
627+
max_tokens: 8192, // This should be used as fallback
628+
// No max_output_tokens
629+
max_input_tokens: 128000,
630+
supports_vision: false,
631+
},
632+
litellm_params: {
633+
model: "test/model-with-only-max-tokens",
634+
},
635+
},
636+
{
637+
model_name: "model-with-only-max-output-tokens",
638+
model_info: {
639+
// No max_tokens
640+
max_output_tokens: 16384, // This should be used
641+
max_input_tokens: 100000,
642+
supports_vision: false,
643+
},
644+
litellm_params: {
645+
model: "test/model-with-only-max-output-tokens",
646+
},
647+
},
648+
],
649+
},
650+
}
651+
652+
mockedAxios.get.mockResolvedValue(mockResponse)
653+
654+
const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")
655+
656+
// Should use max_output_tokens (64000) instead of max_tokens (200000)
657+
expect(result["claude-3-5-sonnet-4-5"]).toEqual({
658+
maxTokens: 64000,
659+
contextWindow: 200000,
660+
supportsImages: true,
661+
supportsComputerUse: true,
662+
supportsPromptCache: false,
663+
inputPrice: undefined,
664+
outputPrice: undefined,
665+
description: "claude-3-5-sonnet-4-5 via LiteLLM proxy",
666+
})
667+
668+
// Should fall back to max_tokens when max_output_tokens is not present
669+
expect(result["model-with-only-max-tokens"]).toEqual({
670+
maxTokens: 8192,
671+
contextWindow: 128000,
672+
supportsImages: false,
673+
supportsComputerUse: false,
674+
supportsPromptCache: false,
675+
inputPrice: undefined,
676+
outputPrice: undefined,
677+
description: "model-with-only-max-tokens via LiteLLM proxy",
678+
})
679+
680+
// Should use max_output_tokens when max_tokens is not present
681+
expect(result["model-with-only-max-output-tokens"]).toEqual({
682+
maxTokens: 16384,
683+
contextWindow: 100000,
684+
supportsImages: false,
685+
supportsComputerUse: false,
686+
supportsPromptCache: false,
687+
inputPrice: undefined,
688+
outputPrice: undefined,
689+
description: "model-with-only-max-output-tokens via LiteLLM proxy",
690+
})
691+
})
605692
})

src/api/providers/fetchers/litellm.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise
5656
}
5757

5858
models[modelName] = {
59-
maxTokens: modelInfo.max_tokens || 8192,
59+
maxTokens: modelInfo.max_output_tokens || modelInfo.max_tokens || 8192,
6060
contextWindow: modelInfo.max_input_tokens || 200000,
6161
supportsImages: Boolean(modelInfo.supports_vision),
6262
// litellm_params.model may have a prefix like openrouter/

0 commit comments

Comments
 (0)