fix: use max_output_tokens when available in LiteLLM fetcher (#8455)

roomote[bot] · roomote · web-flow · commit bde2c3cec6a2 · 2025-10-27T16:58:45.000-04:00
Co-authored-by: Roo Code &lt;roomote@roocode.com&gt;
diff --git a/src/api/providers/fetchers/__tests__/litellm.spec.ts b/src/api/providers/fetchers/__tests__/litellm.spec.ts
@@ -589,4 +589,91 @@ describe("getLiteLLMModels", () => {
 
 		const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")
 	})
+
+	it("prefers max_output_tokens over max_tokens when both are present", async () => {
+		const mockResponse = {
+			data: {
+				data: [
+					{
+						model_name: "claude-3-5-sonnet-4-5",
+						model_info: {
+							max_tokens: 200000, // This should be ignored
+							max_output_tokens: 64000, // This should be used
+							max_input_tokens: 200000,
+							supports_vision: true,
+							supports_prompt_caching: false,
+							supports_computer_use: true,
+						},
+						litellm_params: {
+							model: "anthropic/claude-3-5-sonnet-4-5",
+						},
+					},
+					{
+						model_name: "model-with-only-max-tokens",
+						model_info: {
+							max_tokens: 8192, // This should be used as fallback
+							// No max_output_tokens
+							max_input_tokens: 128000,
+							supports_vision: false,
+						},
+						litellm_params: {
+							model: "test/model-with-only-max-tokens",
+						},
+					},
+					{
+						model_name: "model-with-only-max-output-tokens",
+						model_info: {
+							// No max_tokens
+							max_output_tokens: 16384, // This should be used
+							max_input_tokens: 100000,
+							supports_vision: false,
+						},
+						litellm_params: {
+							model: "test/model-with-only-max-output-tokens",
+						},
+					},
+				],
+			},
+		}
+
+		mockedAxios.get.mockResolvedValue(mockResponse)
+
+		const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")
+
+		// Should use max_output_tokens (64000) instead of max_tokens (200000)
+		expect(result["claude-3-5-sonnet-4-5"]).toEqual({
+			maxTokens: 64000,
+			contextWindow: 200000,
+			supportsImages: true,
+			supportsComputerUse: true,
+			supportsPromptCache: false,
+			inputPrice: undefined,
+			outputPrice: undefined,
+			description: "claude-3-5-sonnet-4-5 via LiteLLM proxy",
+		})
+
+		// Should fall back to max_tokens when max_output_tokens is not present
+		expect(result["model-with-only-max-tokens"]).toEqual({
+			maxTokens: 8192,
+			contextWindow: 128000,
+			supportsImages: false,
+			supportsComputerUse: false,
+			supportsPromptCache: false,
+			inputPrice: undefined,
+			outputPrice: undefined,
+			description: "model-with-only-max-tokens via LiteLLM proxy",
+		})
+
+		// Should use max_output_tokens when max_tokens is not present
+		expect(result["model-with-only-max-output-tokens"]).toEqual({
+			maxTokens: 16384,
+			contextWindow: 100000,
+			supportsImages: false,
+			supportsComputerUse: false,
+			supportsPromptCache: false,
+			inputPrice: undefined,
+			outputPrice: undefined,
+			description: "model-with-only-max-output-tokens via LiteLLM proxy",
+		})
+	})
 })
diff --git a/src/api/providers/fetchers/litellm.ts b/src/api/providers/fetchers/litellm.ts
@@ -41,7 +41,7 @@ export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise
 				if (!modelName || !modelInfo || !litellmModelName) continue
 
 				models[modelName] = {
-					maxTokens: modelInfo.max_tokens || 8192,
+					maxTokens: modelInfo.max_output_tokens || modelInfo.max_tokens || 8192,
 					contextWindow: modelInfo.max_input_tokens || 200000,
 					supportsImages: Boolean(modelInfo.supports_vision),
 					// litellm_params.model may have a prefix like openrouter/