From a113acc76984c953584982b26fab05ba7c0fa385 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Thu, 2 Oct 2025 08:11:53 +0000 Subject: [PATCH] fix: use max_output_tokens when available in LiteLLM fetcher - Prefer max_output_tokens over max_tokens for maxTokens field - Fixes issue where Claude Sonnet 4.5 via Google Vertex was using incorrect token limit - Added comprehensive test coverage for the new behavior Fixes #8454 --- .../fetchers/__tests__/litellm.spec.ts | 87 +++++++++++++++++++ src/api/providers/fetchers/litellm.ts | 2 +- 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/src/api/providers/fetchers/__tests__/litellm.spec.ts b/src/api/providers/fetchers/__tests__/litellm.spec.ts index f3a9d9971e..3500b0bd18 100644 --- a/src/api/providers/fetchers/__tests__/litellm.spec.ts +++ b/src/api/providers/fetchers/__tests__/litellm.spec.ts @@ -602,4 +602,91 @@ describe("getLiteLLMModels", () => { expect(result["openrouter-claude"].supportsComputerUse).toBe(true) expect(result["bedrock-claude"].supportsComputerUse).toBe(true) }) + + it("prefers max_output_tokens over max_tokens when both are present", async () => { + const mockResponse = { + data: { + data: [ + { + model_name: "claude-3-5-sonnet-4-5", + model_info: { + max_tokens: 200000, // This should be ignored + max_output_tokens: 64000, // This should be used + max_input_tokens: 200000, + supports_vision: true, + supports_prompt_caching: false, + supports_computer_use: true, + }, + litellm_params: { + model: "anthropic/claude-3-5-sonnet-4-5", + }, + }, + { + model_name: "model-with-only-max-tokens", + model_info: { + max_tokens: 8192, // This should be used as fallback + // No max_output_tokens + max_input_tokens: 128000, + supports_vision: false, + }, + litellm_params: { + model: "test/model-with-only-max-tokens", + }, + }, + { + model_name: "model-with-only-max-output-tokens", + model_info: { + // No max_tokens + max_output_tokens: 16384, // This should be used + max_input_tokens: 100000, + supports_vision: false, + }, + litellm_params: { + model: "test/model-with-only-max-output-tokens", + }, + }, + ], + }, + } + + mockedAxios.get.mockResolvedValue(mockResponse) + + const result = await getLiteLLMModels("test-api-key", "http://localhost:4000") + + // Should use max_output_tokens (64000) instead of max_tokens (200000) + expect(result["claude-3-5-sonnet-4-5"]).toEqual({ + maxTokens: 64000, + contextWindow: 200000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: false, + inputPrice: undefined, + outputPrice: undefined, + description: "claude-3-5-sonnet-4-5 via LiteLLM proxy", + }) + + // Should fall back to max_tokens when max_output_tokens is not present + expect(result["model-with-only-max-tokens"]).toEqual({ + maxTokens: 8192, + contextWindow: 128000, + supportsImages: false, + supportsComputerUse: false, + supportsPromptCache: false, + inputPrice: undefined, + outputPrice: undefined, + description: "model-with-only-max-tokens via LiteLLM proxy", + }) + + // Should use max_output_tokens when max_tokens is not present + expect(result["model-with-only-max-output-tokens"]).toEqual({ + maxTokens: 16384, + contextWindow: 100000, + supportsImages: false, + supportsComputerUse: false, + supportsPromptCache: false, + inputPrice: undefined, + outputPrice: undefined, + description: "model-with-only-max-output-tokens via LiteLLM proxy", + }) + }) }) diff --git a/src/api/providers/fetchers/litellm.ts b/src/api/providers/fetchers/litellm.ts index e4e16c30e5..45d3eb06b9 100644 --- a/src/api/providers/fetchers/litellm.ts +++ b/src/api/providers/fetchers/litellm.ts @@ -56,7 +56,7 @@ export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise } models[modelName] = { - maxTokens: modelInfo.max_tokens || 8192, + maxTokens: modelInfo.max_output_tokens || modelInfo.max_tokens || 8192, contextWindow: modelInfo.max_input_tokens || 200000, supportsImages: Boolean(modelInfo.supports_vision), // litellm_params.model may have a prefix like openrouter/