RooCodeInc · roomote · Aug 9, 2025 · roomote · Aug 9, 2025 · roomote
@@ -15,39 +15,6 @@ export const litellmDefaultModelInfo: ModelInfo = {
 	cacheReadsPrice: 0.3,
 }
 
-export const LITELLM_COMPUTER_USE_MODELS = new Set([
-	"claude-3-5-sonnet-latest",
-	"claude-opus-4-1-20250805",
-	"claude-opus-4-20250514",
-	"claude-sonnet-4-20250514",
-	"claude-3-7-sonnet-latest",
-	"claude-3-7-sonnet-20250219",
-	"claude-3-5-sonnet-20241022",
-	"vertex_ai/claude-3-5-sonnet",
-	"vertex_ai/claude-3-5-sonnet-v2",
-	"vertex_ai/claude-3-5-sonnet-v2@20241022",
-	"vertex_ai/claude-3-7-sonnet@20250219",
-	"vertex_ai/claude-opus-4-1@20250805",
-	"vertex_ai/claude-opus-4@20250514",
-	"vertex_ai/claude-sonnet-4@20250514",
-	"openrouter/anthropic/claude-3.5-sonnet",
-	"openrouter/anthropic/claude-3.5-sonnet:beta",
-	"openrouter/anthropic/claude-3.7-sonnet",
-	"openrouter/anthropic/claude-3.7-sonnet:beta",
-	"anthropic.claude-opus-4-1-20250805-v1:0",
-	"anthropic.claude-opus-4-20250514-v1:0",
-	"anthropic.claude-sonnet-4-20250514-v1:0",
-	"anthropic.claude-3-7-sonnet-20250219-v1:0",
-	"anthropic.claude-3-5-sonnet-20241022-v2:0",
-	"us.anthropic.claude-3-5-sonnet-20241022-v2:0",
-	"us.anthropic.claude-3-7-sonnet-20250219-v1:0",
-	"us.anthropic.claude-opus-4-1-20250805-v1:0",
-	"us.anthropic.claude-opus-4-20250514-v1:0",
-	"us.anthropic.claude-sonnet-4-20250514-v1:0",
-	"eu.anthropic.claude-3-5-sonnet-20241022-v2:0",
-	"eu.anthropic.claude-3-7-sonnet-20250219-v1:0",
-	"eu.anthropic.claude-opus-4-1-20250805-v1:0",
-	"eu.anthropic.claude-opus-4-20250514-v1:0",
-	"eu.anthropic.claude-sonnet-4-20250514-v1:0",
-	"snowflake/claude-3-5-sonnet",
-])
+// Computer use capability is now determined by image support
+// Any model that supports images can theoretically use browser tools
+// This approach is simpler and more inclusive than maintaining hardcoded lists
@@ -51,17 +51,9 @@ export const OPEN_ROUTER_PROMPT_CACHING_MODELS = new Set([
 	"google/gemini-flash-1.5-8b",
 ])
 
-// https://www.anthropic.com/news/3-5-models-and-computer-use
-export const OPEN_ROUTER_COMPUTER_USE_MODELS = new Set([
-	"anthropic/claude-3.5-sonnet",
-	"anthropic/claude-3.5-sonnet:beta",
-	"anthropic/claude-3.7-sonnet",
-	"anthropic/claude-3.7-sonnet:beta",
-	"anthropic/claude-3.7-sonnet:thinking",
-	"anthropic/claude-sonnet-4",
-	"anthropic/claude-opus-4",
-	"anthropic/claude-opus-4.1",
-])
+// Computer use capability is now determined by image support
+// Any model that supports images can theoretically use browser tools
+// This approach is simpler and more inclusive than maintaining hardcoded lists
 
 // When we first launched these models we didn't have support for
 // enabling/disabling the reasoning budget for hybrid models. Now that we

@@ -404,35 +404,29 @@ describe("getLiteLLMModels", () => {
 		expect(result).toEqual({})
 	})
 
-	it("uses fallback computer use detection when supports_computer_use is not available", async () => {
+	it("uses image support as fallback for computer use when supports_computer_use is not available", async () => {
 		const mockResponse = {
 			data: {
 				data: [
 					{
-						model_name: "claude-3-5-sonnet-latest",
+						model_name: "model-with-vision",
 						model_info: {
 							max_tokens: 4096,
 							max_input_tokens: 200000,
 							supports_vision: true,
 							supports_prompt_caching: false,
 							// Note: no supports_computer_use field
 						},
-						litellm_params: {
-							model: "anthropic/claude-3-5-sonnet-latest", // This should match the fallback list
-						},
 					},
 					{
-						model_name: "gpt-4-turbo",
+						model_name: "model-without-vision",
 						model_info: {
 							max_tokens: 8192,
 							max_input_tokens: 128000,
 							supports_vision: false,
 							supports_prompt_caching: false,
 							// Note: no supports_computer_use field
 						},
-						litellm_params: {
-							model: "openai/gpt-4-turbo", // This should NOT match the fallback list
-						},
 					},
 				],
 			},
@@ -442,71 +436,62 @@ describe("getLiteLLMModels", () => {
 
 		const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")
 
-		expect(result["claude-3-5-sonnet-latest"]).toEqual({
+		expect(result["model-with-vision"]).toEqual({
 			maxTokens: 4096,
 			contextWindow: 200000,
 			supportsImages: true,
-			supportsComputerUse: true, // Should be true due to fallback
+			supportsComputerUse: true, // Should be true because supports_vision is true
 			supportsPromptCache: false,
 			inputPrice: undefined,
 			outputPrice: undefined,
-			description: "claude-3-5-sonnet-latest via LiteLLM proxy",
+			description: "model-with-vision via LiteLLM proxy",
 		})
 
-		expect(result["gpt-4-turbo"]).toEqual({
+		expect(result["model-without-vision"]).toEqual({
 			maxTokens: 8192,
 			contextWindow: 128000,
 			supportsImages: false,
-			supportsComputerUse: false, // Should be false as it's not in fallback list
+			supportsComputerUse: false, // Should be false because supports_vision is false
 			supportsPromptCache: false,
 			inputPrice: undefined,
 			outputPrice: undefined,
-			description: "gpt-4-turbo via LiteLLM proxy",
+			description: "model-without-vision via LiteLLM proxy",
 		})
 	})
 
-	it("prioritizes explicit supports_computer_use over fallback detection", async () => {
+	it("prioritizes explicit supports_computer_use over image-based fallback", async () => {
 		const mockResponse = {
 			data: {
 				data: [
 					{
-						model_name: "claude-3-5-sonnet-latest",
+						model_name: "model-with-vision-but-no-computer",
 						model_info: {
 							max_tokens: 4096,
 							max_input_tokens: 200000,
 							supports_vision: true,
 							supports_prompt_caching: false,
-							supports_computer_use: false, // Explicitly set to false
-						},
-						litellm_params: {
-							model: "anthropic/claude-3-5-sonnet-latest", // This matches fallback list but should be ignored
+							supports_computer_use: false, // Explicitly set to false despite vision support
 						},
 					},
 					{
-						model_name: "custom-model",
+						model_name: "model-without-vision-but-computer",
 						model_info: {
 							max_tokens: 8192,
 							max_input_tokens: 128000,
 							supports_vision: false,
 							supports_prompt_caching: false,
-							supports_computer_use: true, // Explicitly set to true
-						},
-						litellm_params: {
-							model: "custom/custom-model", // This would NOT match fallback list
+							supports_computer_use: true, // Explicitly set to true despite no vision support
 						},
 					},
 					{
-						model_name: "another-custom-model",
+						model_name: "model-with-both-false",
 						model_info: {
 							max_tokens: 8192,
 							max_input_tokens: 128000,
 							supports_vision: false,
 							supports_prompt_caching: false,
 							supports_computer_use: false, // Explicitly set to false
 						},
-						litellm_params: {
-							model: "custom/another-custom-model", // This would NOT match fallback list
-						},
 					},
 				],
 			},
@@ -516,79 +501,70 @@ describe("getLiteLLMModels", () => {
 
 		const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")
 
-		expect(result["claude-3-5-sonnet-latest"]).toEqual({
+		expect(result["model-with-vision-but-no-computer"]).toEqual({
 			maxTokens: 4096,
 			contextWindow: 200000,
 			supportsImages: true,
-			supportsComputerUse: false, // False because explicitly set to false (fallback ignored)
+			supportsComputerUse: false, // False because explicitly set to false (image fallback ignored)
 			supportsPromptCache: false,
 			inputPrice: undefined,
 			outputPrice: undefined,
-			description: "claude-3-5-sonnet-latest via LiteLLM proxy",
+			description: "model-with-vision-but-no-computer via LiteLLM proxy",
 		})
 
-		expect(result["custom-model"]).toEqual({
+		expect(result["model-without-vision-but-computer"]).toEqual({
 			maxTokens: 8192,
 			contextWindow: 128000,
 			supportsImages: false,
 			supportsComputerUse: true, // True because explicitly set to true
 			supportsPromptCache: false,
 			inputPrice: undefined,
 			outputPrice: undefined,
-			description: "custom-model via LiteLLM proxy",
+			description: "model-without-vision-but-computer via LiteLLM proxy",
 		})
 
-		expect(result["another-custom-model"]).toEqual({
+		expect(result["model-with-both-false"]).toEqual({
 			maxTokens: 8192,
 			contextWindow: 128000,
 			supportsImages: false,
 			supportsComputerUse: false, // False because explicitly set to false
 			supportsPromptCache: false,
 			inputPrice: undefined,
 			outputPrice: undefined,
-			description: "another-custom-model via LiteLLM proxy",
+			description: "model-with-both-false via LiteLLM proxy",
 		})
 	})
 
-	it("handles fallback detection with various model name formats", async () => {
+	it("handles image-based computer use detection for various models", async () => {
 		const mockResponse = {
 			data: {
 				data: [
 					{
-						model_name: "vertex-claude",
+						model_name: "vertex-model",
 						model_info: {
 							max_tokens: 4096,
 							max_input_tokens: 200000,
 							supports_vision: true,
 							supports_prompt_caching: false,
 						},
-						litellm_params: {
-							model: "vertex_ai/claude-3-5-sonnet", // Should match fallback list
-						},
 					},
 					{
-						model_name: "openrouter-claude",
+						model_name: "openrouter-model",
 						model_info: {
 							max_tokens: 4096,
 							max_input_tokens: 200000,
 							supports_vision: true,
 							supports_prompt_caching: false,
 						},
-						litellm_params: {
-							model: "openrouter/anthropic/claude-3.5-sonnet", // Should match fallback list
-						},
 					},
 					{
-						model_name: "bedrock-claude",
+						model_name: "bedrock-model",
 						model_info: {
 							max_tokens: 4096,
 							max_input_tokens: 200000,
-							supports_vision: true,
+							supports_vision: false,
 							supports_prompt_caching: false,
 						},
-						litellm_params: {
-							model: "anthropic.claude-3-5-sonnet-20241022-v2:0", // Should match fallback list
-						},
 					},
 				],
 			},
@@ -598,8 +574,10 @@ describe("getLiteLLMModels", () => {
 
 		const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")
 
-		expect(result["vertex-claude"].supportsComputerUse).toBe(true)
-		expect(result["openrouter-claude"].supportsComputerUse).toBe(true)
-		expect(result["bedrock-claude"].supportsComputerUse).toBe(true)
+		// Models with vision support should have computer use enabled
+		expect(result["vertex-model"].supportsComputerUse).toBe(true)
+		expect(result["openrouter-model"].supportsComputerUse).toBe(true)
+		// Model without vision support should not have computer use enabled
+		expect(result["bedrock-model"].supportsComputerUse).toBe(false)
 	})
 })
@@ -6,7 +6,6 @@ import { back as nockBack } from "nock"
 
 import {
 	OPEN_ROUTER_PROMPT_CACHING_MODELS,
-	OPEN_ROUTER_COMPUTER_USE_MODELS,
 	OPEN_ROUTER_REASONING_BUDGET_MODELS,
 	OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS,
 } from "@roo-code/types"
@@ -49,20 +48,20 @@ describe("OpenRouter API", () => {
 
 			expect(ourCachingModels.sort()).toEqual(expectedCachingModels)
 
-			const excludedComputerUseModels = new Set([
-				"anthropic/claude-opus-4.1", // Not yet available in OpenRouter API
-			])
+			// Computer use is now determined by image support
+			// Verify that models with image support have computer use enabled
+			const modelsWithImages = Object.entries(models)
+				.filter(([_, model]) => model.supportsImages)
+				.map(([id, _]) => id)
 
-			const expectedComputerUseModels = Array.from(OPEN_ROUTER_COMPUTER_USE_MODELS)
-				.filter((id) => !excludedComputerUseModels.has(id))
-				.sort()
+			const modelsWithComputerUse = Object.entries(models)
+				.filter(([_, model]) => model.supportsComputerUse)
+				.map(([id, _]) => id)
 
-			expect(
-				Object.entries(models)
-					.filter(([_, model]) => model.supportsComputerUse)
-					.map(([id, _]) => id)
-					.sort(),
-			).toEqual(expectedComputerUseModels)
+			// All models with image support should have computer use enabled
+			for (const modelId of modelsWithImages) {
+				expect(modelsWithComputerUse).toContain(modelId)
+			}
 
 			expect(
 				Object.entries(models)
@@ -233,6 +232,7 @@ describe("OpenRouter API", () => {
 					maxTokens: 65535,
 					contextWindow: 1048576,
 					supportsImages: true,
+					supportsComputerUse: true, // Added because supportsImages is true
 					supportsPromptCache: true,
 					supportsReasoningBudget: true,
 					inputPrice: 1.25,
@@ -247,6 +247,7 @@ describe("OpenRouter API", () => {
 					maxTokens: 65536,
 					contextWindow: 1048576,
 					supportsImages: true,
+					supportsComputerUse: true, // Added because supportsImages is true
 					supportsPromptCache: true,
 					supportsReasoningBudget: true,
 					inputPrice: 1.25,

@@ -1,7 +1,5 @@
 import axios from "axios"
 
-import { LITELLM_COMPUTER_USE_MODELS } from "@roo-code/types"
-
 import type { ModelRecord } from "../../../shared/api"
 
 import { DEFAULT_HEADERS } from "../constants"
@@ -33,33 +31,28 @@ export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise
 		const response = await axios.get(url, { headers, timeout: 5000 })
 		const models: ModelRecord = {}
 
-		const computerModels = Array.from(LITELLM_COMPUTER_USE_MODELS)
-
 		// Process the model info from the response
 		if (response.data && response.data.data && Array.isArray(response.data.data)) {
 			for (const model of response.data.data) {
 				const modelName = model.model_name
 				const modelInfo = model.model_info
-				const litellmModelName = model?.litellm_params?.model as string | undefined
 
-				if (!modelName || !modelInfo || !litellmModelName) continue
+				if (!modelName || !modelInfo) continue
 
-				// Use explicit supports_computer_use if available, otherwise fall back to hardcoded list
+				// Use explicit supports_computer_use if available, otherwise use image support
 				let supportsComputerUse: boolean
 				if (modelInfo.supports_computer_use !== undefined) {
 					supportsComputerUse = Boolean(modelInfo.supports_computer_use)
 				} else {
-					// Fallback for older LiteLLM versions that don't have supports_computer_use field
-					supportsComputerUse = computerModels.some((computer_model) =>
-						litellmModelName.endsWith(computer_model),
-					)
+					// Browser automation requires screenshot analysis, which requires image/vision capabilities
+					// Any model that can process images can theoretically use the browser tool
+					supportsComputerUse = Boolean(modelInfo.supports_vision)
 				}
 
 				models[modelName] = {
 					maxTokens: modelInfo.max_tokens || 8192,
 					contextWindow: modelInfo.max_input_tokens || 200000,
 					supportsImages: Boolean(modelInfo.supports_vision),
-					// litellm_params.model may have a prefix like openrouter/
 					supportsComputerUse,
 					supportsPromptCache: Boolean(modelInfo.supports_prompt_caching),
 					inputPrice: modelInfo.input_cost_per_token ? modelInfo.input_cost_per_token * 1000000 : undefined,