Skip to content

Commit 7ba48a1

Browse files
committed
feat: simplify browser use detection to use image support
- Remove hardcoded OPEN_ROUTER_COMPUTER_USE_MODELS and LITELLM_COMPUTER_USE_MODELS lists - Update logic to enable browser/computer use for any model that supports images - This approach is simpler and more inclusive, as browser automation requires screenshot analysis which needs image/vision capabilities - Update tests to reflect the new image-based detection logic This change aligns with Cline's approach where any model with image support can theoretically use browser tools, making the system more maintainable and avoiding the need to constantly update hardcoded model lists.
1 parent ad0e33e commit 7ba48a1

File tree

6 files changed

+61
-130
lines changed

6 files changed

+61
-130
lines changed

packages/types/src/providers/lite-llm.ts

Lines changed: 3 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -15,39 +15,6 @@ export const litellmDefaultModelInfo: ModelInfo = {
1515
cacheReadsPrice: 0.3,
1616
}
1717

18-
export const LITELLM_COMPUTER_USE_MODELS = new Set([
19-
"claude-3-5-sonnet-latest",
20-
"claude-opus-4-1-20250805",
21-
"claude-opus-4-20250514",
22-
"claude-sonnet-4-20250514",
23-
"claude-3-7-sonnet-latest",
24-
"claude-3-7-sonnet-20250219",
25-
"claude-3-5-sonnet-20241022",
26-
"vertex_ai/claude-3-5-sonnet",
27-
"vertex_ai/claude-3-5-sonnet-v2",
28-
"vertex_ai/claude-3-5-sonnet-v2@20241022",
29-
"vertex_ai/claude-3-7-sonnet@20250219",
30-
"vertex_ai/claude-opus-4-1@20250805",
31-
"vertex_ai/claude-opus-4@20250514",
32-
"vertex_ai/claude-sonnet-4@20250514",
33-
"openrouter/anthropic/claude-3.5-sonnet",
34-
"openrouter/anthropic/claude-3.5-sonnet:beta",
35-
"openrouter/anthropic/claude-3.7-sonnet",
36-
"openrouter/anthropic/claude-3.7-sonnet:beta",
37-
"anthropic.claude-opus-4-1-20250805-v1:0",
38-
"anthropic.claude-opus-4-20250514-v1:0",
39-
"anthropic.claude-sonnet-4-20250514-v1:0",
40-
"anthropic.claude-3-7-sonnet-20250219-v1:0",
41-
"anthropic.claude-3-5-sonnet-20241022-v2:0",
42-
"us.anthropic.claude-3-5-sonnet-20241022-v2:0",
43-
"us.anthropic.claude-3-7-sonnet-20250219-v1:0",
44-
"us.anthropic.claude-opus-4-1-20250805-v1:0",
45-
"us.anthropic.claude-opus-4-20250514-v1:0",
46-
"us.anthropic.claude-sonnet-4-20250514-v1:0",
47-
"eu.anthropic.claude-3-5-sonnet-20241022-v2:0",
48-
"eu.anthropic.claude-3-7-sonnet-20250219-v1:0",
49-
"eu.anthropic.claude-opus-4-1-20250805-v1:0",
50-
"eu.anthropic.claude-opus-4-20250514-v1:0",
51-
"eu.anthropic.claude-sonnet-4-20250514-v1:0",
52-
"snowflake/claude-3-5-sonnet",
53-
])
18+
// Computer use capability is now determined by image support
19+
// Any model that supports images can theoretically use browser tools
20+
// This approach is simpler and more inclusive than maintaining hardcoded lists

packages/types/src/providers/openrouter.ts

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -51,17 +51,9 @@ export const OPEN_ROUTER_PROMPT_CACHING_MODELS = new Set([
5151
"google/gemini-flash-1.5-8b",
5252
])
5353

54-
// https://www.anthropic.com/news/3-5-models-and-computer-use
55-
export const OPEN_ROUTER_COMPUTER_USE_MODELS = new Set([
56-
"anthropic/claude-3.5-sonnet",
57-
"anthropic/claude-3.5-sonnet:beta",
58-
"anthropic/claude-3.7-sonnet",
59-
"anthropic/claude-3.7-sonnet:beta",
60-
"anthropic/claude-3.7-sonnet:thinking",
61-
"anthropic/claude-sonnet-4",
62-
"anthropic/claude-opus-4",
63-
"anthropic/claude-opus-4.1",
64-
])
54+
// Computer use capability is now determined by image support
55+
// Any model that supports images can theoretically use browser tools
56+
// This approach is simpler and more inclusive than maintaining hardcoded lists
6557

6658
// When we first launched these models we didn't have support for
6759
// enabling/disabling the reasoning budget for hybrid models. Now that we

src/api/providers/fetchers/__tests__/litellm.spec.ts

Lines changed: 32 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -404,35 +404,29 @@ describe("getLiteLLMModels", () => {
404404
expect(result).toEqual({})
405405
})
406406

407-
it("uses fallback computer use detection when supports_computer_use is not available", async () => {
407+
it("uses image support as fallback for computer use when supports_computer_use is not available", async () => {
408408
const mockResponse = {
409409
data: {
410410
data: [
411411
{
412-
model_name: "claude-3-5-sonnet-latest",
412+
model_name: "model-with-vision",
413413
model_info: {
414414
max_tokens: 4096,
415415
max_input_tokens: 200000,
416416
supports_vision: true,
417417
supports_prompt_caching: false,
418418
// Note: no supports_computer_use field
419419
},
420-
litellm_params: {
421-
model: "anthropic/claude-3-5-sonnet-latest", // This should match the fallback list
422-
},
423420
},
424421
{
425-
model_name: "gpt-4-turbo",
422+
model_name: "model-without-vision",
426423
model_info: {
427424
max_tokens: 8192,
428425
max_input_tokens: 128000,
429426
supports_vision: false,
430427
supports_prompt_caching: false,
431428
// Note: no supports_computer_use field
432429
},
433-
litellm_params: {
434-
model: "openai/gpt-4-turbo", // This should NOT match the fallback list
435-
},
436430
},
437431
],
438432
},
@@ -442,71 +436,62 @@ describe("getLiteLLMModels", () => {
442436

443437
const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")
444438

445-
expect(result["claude-3-5-sonnet-latest"]).toEqual({
439+
expect(result["model-with-vision"]).toEqual({
446440
maxTokens: 4096,
447441
contextWindow: 200000,
448442
supportsImages: true,
449-
supportsComputerUse: true, // Should be true due to fallback
443+
supportsComputerUse: true, // Should be true because supports_vision is true
450444
supportsPromptCache: false,
451445
inputPrice: undefined,
452446
outputPrice: undefined,
453-
description: "claude-3-5-sonnet-latest via LiteLLM proxy",
447+
description: "model-with-vision via LiteLLM proxy",
454448
})
455449

456-
expect(result["gpt-4-turbo"]).toEqual({
450+
expect(result["model-without-vision"]).toEqual({
457451
maxTokens: 8192,
458452
contextWindow: 128000,
459453
supportsImages: false,
460-
supportsComputerUse: false, // Should be false as it's not in fallback list
454+
supportsComputerUse: false, // Should be false because supports_vision is false
461455
supportsPromptCache: false,
462456
inputPrice: undefined,
463457
outputPrice: undefined,
464-
description: "gpt-4-turbo via LiteLLM proxy",
458+
description: "model-without-vision via LiteLLM proxy",
465459
})
466460
})
467461

468-
it("prioritizes explicit supports_computer_use over fallback detection", async () => {
462+
it("prioritizes explicit supports_computer_use over image-based fallback", async () => {
469463
const mockResponse = {
470464
data: {
471465
data: [
472466
{
473-
model_name: "claude-3-5-sonnet-latest",
467+
model_name: "model-with-vision-but-no-computer",
474468
model_info: {
475469
max_tokens: 4096,
476470
max_input_tokens: 200000,
477471
supports_vision: true,
478472
supports_prompt_caching: false,
479-
supports_computer_use: false, // Explicitly set to false
480-
},
481-
litellm_params: {
482-
model: "anthropic/claude-3-5-sonnet-latest", // This matches fallback list but should be ignored
473+
supports_computer_use: false, // Explicitly set to false despite vision support
483474
},
484475
},
485476
{
486-
model_name: "custom-model",
477+
model_name: "model-without-vision-but-computer",
487478
model_info: {
488479
max_tokens: 8192,
489480
max_input_tokens: 128000,
490481
supports_vision: false,
491482
supports_prompt_caching: false,
492-
supports_computer_use: true, // Explicitly set to true
493-
},
494-
litellm_params: {
495-
model: "custom/custom-model", // This would NOT match fallback list
483+
supports_computer_use: true, // Explicitly set to true despite no vision support
496484
},
497485
},
498486
{
499-
model_name: "another-custom-model",
487+
model_name: "model-with-both-false",
500488
model_info: {
501489
max_tokens: 8192,
502490
max_input_tokens: 128000,
503491
supports_vision: false,
504492
supports_prompt_caching: false,
505493
supports_computer_use: false, // Explicitly set to false
506494
},
507-
litellm_params: {
508-
model: "custom/another-custom-model", // This would NOT match fallback list
509-
},
510495
},
511496
],
512497
},
@@ -516,79 +501,70 @@ describe("getLiteLLMModels", () => {
516501

517502
const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")
518503

519-
expect(result["claude-3-5-sonnet-latest"]).toEqual({
504+
expect(result["model-with-vision-but-no-computer"]).toEqual({
520505
maxTokens: 4096,
521506
contextWindow: 200000,
522507
supportsImages: true,
523-
supportsComputerUse: false, // False because explicitly set to false (fallback ignored)
508+
supportsComputerUse: false, // False because explicitly set to false (image fallback ignored)
524509
supportsPromptCache: false,
525510
inputPrice: undefined,
526511
outputPrice: undefined,
527-
description: "claude-3-5-sonnet-latest via LiteLLM proxy",
512+
description: "model-with-vision-but-no-computer via LiteLLM proxy",
528513
})
529514

530-
expect(result["custom-model"]).toEqual({
515+
expect(result["model-without-vision-but-computer"]).toEqual({
531516
maxTokens: 8192,
532517
contextWindow: 128000,
533518
supportsImages: false,
534519
supportsComputerUse: true, // True because explicitly set to true
535520
supportsPromptCache: false,
536521
inputPrice: undefined,
537522
outputPrice: undefined,
538-
description: "custom-model via LiteLLM proxy",
523+
description: "model-without-vision-but-computer via LiteLLM proxy",
539524
})
540525

541-
expect(result["another-custom-model"]).toEqual({
526+
expect(result["model-with-both-false"]).toEqual({
542527
maxTokens: 8192,
543528
contextWindow: 128000,
544529
supportsImages: false,
545530
supportsComputerUse: false, // False because explicitly set to false
546531
supportsPromptCache: false,
547532
inputPrice: undefined,
548533
outputPrice: undefined,
549-
description: "another-custom-model via LiteLLM proxy",
534+
description: "model-with-both-false via LiteLLM proxy",
550535
})
551536
})
552537

553-
it("handles fallback detection with various model name formats", async () => {
538+
it("handles image-based computer use detection for various models", async () => {
554539
const mockResponse = {
555540
data: {
556541
data: [
557542
{
558-
model_name: "vertex-claude",
543+
model_name: "vertex-model",
559544
model_info: {
560545
max_tokens: 4096,
561546
max_input_tokens: 200000,
562547
supports_vision: true,
563548
supports_prompt_caching: false,
564549
},
565-
litellm_params: {
566-
model: "vertex_ai/claude-3-5-sonnet", // Should match fallback list
567-
},
568550
},
569551
{
570-
model_name: "openrouter-claude",
552+
model_name: "openrouter-model",
571553
model_info: {
572554
max_tokens: 4096,
573555
max_input_tokens: 200000,
574556
supports_vision: true,
575557
supports_prompt_caching: false,
576558
},
577-
litellm_params: {
578-
model: "openrouter/anthropic/claude-3.5-sonnet", // Should match fallback list
579-
},
580559
},
581560
{
582-
model_name: "bedrock-claude",
561+
model_name: "bedrock-model",
583562
model_info: {
584563
max_tokens: 4096,
585564
max_input_tokens: 200000,
586-
supports_vision: true,
565+
supports_vision: false,
587566
supports_prompt_caching: false,
588567
},
589-
litellm_params: {
590-
model: "anthropic.claude-3-5-sonnet-20241022-v2:0", // Should match fallback list
591-
},
592568
},
593569
],
594570
},
@@ -598,8 +574,10 @@ describe("getLiteLLMModels", () => {
598574

599575
const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")
600576

601-
expect(result["vertex-claude"].supportsComputerUse).toBe(true)
602-
expect(result["openrouter-claude"].supportsComputerUse).toBe(true)
603-
expect(result["bedrock-claude"].supportsComputerUse).toBe(true)
577+
// Models with vision support should have computer use enabled
578+
expect(result["vertex-model"].supportsComputerUse).toBe(true)
579+
expect(result["openrouter-model"].supportsComputerUse).toBe(true)
580+
// Model without vision support should not have computer use enabled
581+
expect(result["bedrock-model"].supportsComputerUse).toBe(false)
604582
})
605583
})

src/api/providers/fetchers/__tests__/openrouter.spec.ts

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import { back as nockBack } from "nock"
66

77
import {
88
OPEN_ROUTER_PROMPT_CACHING_MODELS,
9-
OPEN_ROUTER_COMPUTER_USE_MODELS,
109
OPEN_ROUTER_REASONING_BUDGET_MODELS,
1110
OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS,
1211
} from "@roo-code/types"
@@ -49,20 +48,20 @@ describe("OpenRouter API", () => {
4948

5049
expect(ourCachingModels.sort()).toEqual(expectedCachingModels)
5150

52-
const excludedComputerUseModels = new Set([
53-
"anthropic/claude-opus-4.1", // Not yet available in OpenRouter API
54-
])
51+
// Computer use is now determined by image support
52+
// Verify that models with image support have computer use enabled
53+
const modelsWithImages = Object.entries(models)
54+
.filter(([_, model]) => model.supportsImages)
55+
.map(([id, _]) => id)
5556

56-
const expectedComputerUseModels = Array.from(OPEN_ROUTER_COMPUTER_USE_MODELS)
57-
.filter((id) => !excludedComputerUseModels.has(id))
58-
.sort()
57+
const modelsWithComputerUse = Object.entries(models)
58+
.filter(([_, model]) => model.supportsComputerUse)
59+
.map(([id, _]) => id)
5960

60-
expect(
61-
Object.entries(models)
62-
.filter(([_, model]) => model.supportsComputerUse)
63-
.map(([id, _]) => id)
64-
.sort(),
65-
).toEqual(expectedComputerUseModels)
61+
// All models with image support should have computer use enabled
62+
for (const modelId of modelsWithImages) {
63+
expect(modelsWithComputerUse).toContain(modelId)
64+
}
6665

6766
expect(
6867
Object.entries(models)
@@ -233,6 +232,7 @@ describe("OpenRouter API", () => {
233232
maxTokens: 65535,
234233
contextWindow: 1048576,
235234
supportsImages: true,
235+
supportsComputerUse: true, // Added because supportsImages is true
236236
supportsPromptCache: true,
237237
supportsReasoningBudget: true,
238238
inputPrice: 1.25,
@@ -247,6 +247,7 @@ describe("OpenRouter API", () => {
247247
maxTokens: 65536,
248248
contextWindow: 1048576,
249249
supportsImages: true,
250+
supportsComputerUse: true, // Added because supportsImages is true
250251
supportsPromptCache: true,
251252
supportsReasoningBudget: true,
252253
inputPrice: 1.25,

src/api/providers/fetchers/litellm.ts

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
import axios from "axios"
22

3-
import { LITELLM_COMPUTER_USE_MODELS } from "@roo-code/types"
4-
53
import type { ModelRecord } from "../../../shared/api"
64

75
import { DEFAULT_HEADERS } from "../constants"
@@ -33,33 +31,28 @@ export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise
3331
const response = await axios.get(url, { headers, timeout: 5000 })
3432
const models: ModelRecord = {}
3533

36-
const computerModels = Array.from(LITELLM_COMPUTER_USE_MODELS)
37-
3834
// Process the model info from the response
3935
if (response.data && response.data.data && Array.isArray(response.data.data)) {
4036
for (const model of response.data.data) {
4137
const modelName = model.model_name
4238
const modelInfo = model.model_info
43-
const litellmModelName = model?.litellm_params?.model as string | undefined
4439

45-
if (!modelName || !modelInfo || !litellmModelName) continue
40+
if (!modelName || !modelInfo) continue
4641

47-
// Use explicit supports_computer_use if available, otherwise fall back to hardcoded list
42+
// Use explicit supports_computer_use if available, otherwise use image support
4843
let supportsComputerUse: boolean
4944
if (modelInfo.supports_computer_use !== undefined) {
5045
supportsComputerUse = Boolean(modelInfo.supports_computer_use)
5146
} else {
52-
// Fallback for older LiteLLM versions that don't have supports_computer_use field
53-
supportsComputerUse = computerModels.some((computer_model) =>
54-
litellmModelName.endsWith(computer_model),
55-
)
47+
// Browser automation requires screenshot analysis, which requires image/vision capabilities
48+
// Any model that can process images can theoretically use the browser tool
49+
supportsComputerUse = Boolean(modelInfo.supports_vision)
5650
}
5751

5852
models[modelName] = {
5953
maxTokens: modelInfo.max_tokens || 8192,
6054
contextWindow: modelInfo.max_input_tokens || 200000,
6155
supportsImages: Boolean(modelInfo.supports_vision),
62-
// litellm_params.model may have a prefix like openrouter/
6356
supportsComputerUse,
6457
supportsPromptCache: Boolean(modelInfo.supports_prompt_caching),
6558
inputPrice: modelInfo.input_cost_per_token ? modelInfo.input_cost_per_token * 1000000 : undefined,

0 commit comments

Comments
 (0)