RooCodeInc · mrubens · Oct 27, 2025 · Oct 2, 2025 · roomote · Oct 2, 2025
@@ -602,4 +602,91 @@ describe("getLiteLLMModels", () => {
 		expect(result["openrouter-claude"].supportsComputerUse).toBe(true)
 		expect(result["bedrock-claude"].supportsComputerUse).toBe(true)
 	})
+
+	it("prefers max_output_tokens over max_tokens when both are present", async () => {
+		const mockResponse = {
+			data: {
+				data: [
+					{
+						model_name: "claude-3-5-sonnet-4-5",
+						model_info: {
+							max_tokens: 200000, // This should be ignored
+							max_output_tokens: 64000, // This should be used
+							max_input_tokens: 200000,
+							supports_vision: true,
+							supports_prompt_caching: false,
+							supports_computer_use: true,
+						},
+						litellm_params: {
+							model: "anthropic/claude-3-5-sonnet-4-5",
+						},
+					},
+					{
+						model_name: "model-with-only-max-tokens",
+						model_info: {
+							max_tokens: 8192, // This should be used as fallback
+							// No max_output_tokens
+							max_input_tokens: 128000,
+							supports_vision: false,
+						},
+						litellm_params: {
+							model: "test/model-with-only-max-tokens",
+						},
+					},
+					{
+						model_name: "model-with-only-max-output-tokens",
+						model_info: {
+							// No max_tokens
+							max_output_tokens: 16384, // This should be used
+							max_input_tokens: 100000,
+							supports_vision: false,
+						},
+						litellm_params: {
+							model: "test/model-with-only-max-output-tokens",
+						},
+					},
+				],
+			},
+		}
+
+		mockedAxios.get.mockResolvedValue(mockResponse)
+
+		const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")
+
+		// Should use max_output_tokens (64000) instead of max_tokens (200000)
+		expect(result["claude-3-5-sonnet-4-5"]).toEqual({
+			maxTokens: 64000,
+			contextWindow: 200000,
+			supportsImages: true,
+			supportsComputerUse: true,
+			supportsPromptCache: false,
+			inputPrice: undefined,
+			outputPrice: undefined,
+			description: "claude-3-5-sonnet-4-5 via LiteLLM proxy",
+		})
+
+		// Should fall back to max_tokens when max_output_tokens is not present
+		expect(result["model-with-only-max-tokens"]).toEqual({
+			maxTokens: 8192,
+			contextWindow: 128000,
+			supportsImages: false,
+			supportsComputerUse: false,
+			supportsPromptCache: false,
+			inputPrice: undefined,
+			outputPrice: undefined,
+			description: "model-with-only-max-tokens via LiteLLM proxy",
+		})
+
+		// Should use max_output_tokens when max_tokens is not present
+		expect(result["model-with-only-max-output-tokens"]).toEqual({
+			maxTokens: 16384,
+			contextWindow: 100000,
+			supportsImages: false,
+			supportsComputerUse: false,
+			supportsPromptCache: false,
+			inputPrice: undefined,
+			outputPrice: undefined,
+			description: "model-with-only-max-output-tokens via LiteLLM proxy",
+		})
+	})
 })
@@ -56,7 +56,7 @@ export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise
 				}
 
 				models[modelName] = {
-					maxTokens: modelInfo.max_tokens || 8192,
+					maxTokens: modelInfo.max_output_tokens || modelInfo.max_tokens || 8192,
 					contextWindow: modelInfo.max_input_tokens || 200000,
 					supportsImages: Boolean(modelInfo.supports_vision),
 					// litellm_params.model may have a prefix like openrouter/