Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 3 additions & 36 deletions packages/types/src/providers/lite-llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,39 +15,6 @@ export const litellmDefaultModelInfo: ModelInfo = {
cacheReadsPrice: 0.3,
}

export const LITELLM_COMPUTER_USE_MODELS = new Set([
"claude-3-5-sonnet-latest",
"claude-opus-4-1-20250805",
"claude-opus-4-20250514",
"claude-sonnet-4-20250514",
"claude-3-7-sonnet-latest",
"claude-3-7-sonnet-20250219",
"claude-3-5-sonnet-20241022",
"vertex_ai/claude-3-5-sonnet",
"vertex_ai/claude-3-5-sonnet-v2",
"vertex_ai/claude-3-5-sonnet-v2@20241022",
"vertex_ai/claude-3-7-sonnet@20250219",
"vertex_ai/claude-opus-4-1@20250805",
"vertex_ai/claude-opus-4@20250514",
"vertex_ai/claude-sonnet-4@20250514",
"openrouter/anthropic/claude-3.5-sonnet",
"openrouter/anthropic/claude-3.5-sonnet:beta",
"openrouter/anthropic/claude-3.7-sonnet",
"openrouter/anthropic/claude-3.7-sonnet:beta",
"anthropic.claude-opus-4-1-20250805-v1:0",
"anthropic.claude-opus-4-20250514-v1:0",
"anthropic.claude-sonnet-4-20250514-v1:0",
"anthropic.claude-3-7-sonnet-20250219-v1:0",
"anthropic.claude-3-5-sonnet-20241022-v2:0",
"us.anthropic.claude-3-5-sonnet-20241022-v2:0",
"us.anthropic.claude-3-7-sonnet-20250219-v1:0",
"us.anthropic.claude-opus-4-1-20250805-v1:0",
"us.anthropic.claude-opus-4-20250514-v1:0",
"us.anthropic.claude-sonnet-4-20250514-v1:0",
"eu.anthropic.claude-3-5-sonnet-20241022-v2:0",
"eu.anthropic.claude-3-7-sonnet-20250219-v1:0",
"eu.anthropic.claude-opus-4-1-20250805-v1:0",
"eu.anthropic.claude-opus-4-20250514-v1:0",
"eu.anthropic.claude-sonnet-4-20250514-v1:0",
"snowflake/claude-3-5-sonnet",
])
// Computer use capability is now determined by image support
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These comments are identical in both files. Could we make them slightly more specific to each context? For example, here we could mention that LiteLLM can override this with an explicit supports_computer_use field.

// Any model that supports images can theoretically use browser tools
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this approach perhaps too permissive? We're enabling browser use for ALL models with image support, including models that may not have been designed or tested for browser automation (e.g., image generation models, basic vision models). Could we consider adding a denylist for known incompatible models or requiring models to opt-in rather than being automatically enabled?

// This approach is simpler and more inclusive than maintaining hardcoded lists
14 changes: 3 additions & 11 deletions packages/types/src/providers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,9 @@ export const OPEN_ROUTER_PROMPT_CACHING_MODELS = new Set([
"google/gemini-flash-1.5-8b",
])

// https://www.anthropic.com/news/3-5-models-and-computer-use
export const OPEN_ROUTER_COMPUTER_USE_MODELS = new Set([
"anthropic/claude-3.5-sonnet",
"anthropic/claude-3.5-sonnet:beta",
"anthropic/claude-3.7-sonnet",
"anthropic/claude-3.7-sonnet:beta",
"anthropic/claude-3.7-sonnet:thinking",
"anthropic/claude-sonnet-4",
"anthropic/claude-opus-4",
"anthropic/claude-opus-4.1",
])
// Computer use capability is now determined by image support
// Any model that supports images can theoretically use browser tools
// This approach is simpler and more inclusive than maintaining hardcoded lists

// When we first launched these models we didn't have support for
// enabling/disabling the reasoning budget for hybrid models. Now that we
Expand Down
86 changes: 32 additions & 54 deletions src/api/providers/fetchers/__tests__/litellm.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -404,35 +404,29 @@ describe("getLiteLLMModels", () => {
expect(result).toEqual({})
})

it("uses fallback computer use detection when supports_computer_use is not available", async () => {
it("uses image support as fallback for computer use when supports_computer_use is not available", async () => {
const mockResponse = {
data: {
data: [
{
model_name: "claude-3-5-sonnet-latest",
model_name: "model-with-vision",
model_info: {
max_tokens: 4096,
max_input_tokens: 200000,
supports_vision: true,
supports_prompt_caching: false,
// Note: no supports_computer_use field
},
litellm_params: {
model: "anthropic/claude-3-5-sonnet-latest", // This should match the fallback list
},
},
{
model_name: "gpt-4-turbo",
model_name: "model-without-vision",
model_info: {
max_tokens: 8192,
max_input_tokens: 128000,
supports_vision: false,
supports_prompt_caching: false,
// Note: no supports_computer_use field
},
litellm_params: {
model: "openai/gpt-4-turbo", // This should NOT match the fallback list
},
},
],
},
Expand All @@ -442,71 +436,62 @@ describe("getLiteLLMModels", () => {

const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")

expect(result["claude-3-5-sonnet-latest"]).toEqual({
expect(result["model-with-vision"]).toEqual({
maxTokens: 4096,
contextWindow: 200000,
supportsImages: true,
supportsComputerUse: true, // Should be true due to fallback
supportsComputerUse: true, // Should be true because supports_vision is true
supportsPromptCache: false,
inputPrice: undefined,
outputPrice: undefined,
description: "claude-3-5-sonnet-latest via LiteLLM proxy",
description: "model-with-vision via LiteLLM proxy",
})

expect(result["gpt-4-turbo"]).toEqual({
expect(result["model-without-vision"]).toEqual({
maxTokens: 8192,
contextWindow: 128000,
supportsImages: false,
supportsComputerUse: false, // Should be false as it's not in fallback list
supportsComputerUse: false, // Should be false because supports_vision is false
supportsPromptCache: false,
inputPrice: undefined,
outputPrice: undefined,
description: "gpt-4-turbo via LiteLLM proxy",
description: "model-without-vision via LiteLLM proxy",
})
})

it("prioritizes explicit supports_computer_use over fallback detection", async () => {
it("prioritizes explicit supports_computer_use over image-based fallback", async () => {
const mockResponse = {
data: {
data: [
{
model_name: "claude-3-5-sonnet-latest",
model_name: "model-with-vision-but-no-computer",
model_info: {
max_tokens: 4096,
max_input_tokens: 200000,
supports_vision: true,
supports_prompt_caching: false,
supports_computer_use: false, // Explicitly set to false
},
litellm_params: {
model: "anthropic/claude-3-5-sonnet-latest", // This matches fallback list but should be ignored
supports_computer_use: false, // Explicitly set to false despite vision support
},
},
{
model_name: "custom-model",
model_name: "model-without-vision-but-computer",
model_info: {
max_tokens: 8192,
max_input_tokens: 128000,
supports_vision: false,
supports_prompt_caching: false,
supports_computer_use: true, // Explicitly set to true
},
litellm_params: {
model: "custom/custom-model", // This would NOT match fallback list
supports_computer_use: true, // Explicitly set to true despite no vision support
},
},
{
model_name: "another-custom-model",
model_name: "model-with-both-false",
model_info: {
max_tokens: 8192,
max_input_tokens: 128000,
supports_vision: false,
supports_prompt_caching: false,
supports_computer_use: false, // Explicitly set to false
},
litellm_params: {
model: "custom/another-custom-model", // This would NOT match fallback list
},
},
],
},
Expand All @@ -516,79 +501,70 @@ describe("getLiteLLMModels", () => {

const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")

expect(result["claude-3-5-sonnet-latest"]).toEqual({
expect(result["model-with-vision-but-no-computer"]).toEqual({
maxTokens: 4096,
contextWindow: 200000,
supportsImages: true,
supportsComputerUse: false, // False because explicitly set to false (fallback ignored)
supportsComputerUse: false, // False because explicitly set to false (image fallback ignored)
supportsPromptCache: false,
inputPrice: undefined,
outputPrice: undefined,
description: "claude-3-5-sonnet-latest via LiteLLM proxy",
description: "model-with-vision-but-no-computer via LiteLLM proxy",
})

expect(result["custom-model"]).toEqual({
expect(result["model-without-vision-but-computer"]).toEqual({
maxTokens: 8192,
contextWindow: 128000,
supportsImages: false,
supportsComputerUse: true, // True because explicitly set to true
supportsPromptCache: false,
inputPrice: undefined,
outputPrice: undefined,
description: "custom-model via LiteLLM proxy",
description: "model-without-vision-but-computer via LiteLLM proxy",
})

expect(result["another-custom-model"]).toEqual({
expect(result["model-with-both-false"]).toEqual({
maxTokens: 8192,
contextWindow: 128000,
supportsImages: false,
supportsComputerUse: false, // False because explicitly set to false
supportsPromptCache: false,
inputPrice: undefined,
outputPrice: undefined,
description: "another-custom-model via LiteLLM proxy",
description: "model-with-both-false via LiteLLM proxy",
})
})

it("handles fallback detection with various model name formats", async () => {
it("handles image-based computer use detection for various models", async () => {
const mockResponse = {
data: {
data: [
{
model_name: "vertex-claude",
model_name: "vertex-model",
model_info: {
max_tokens: 4096,
max_input_tokens: 200000,
supports_vision: true,
supports_prompt_caching: false,
},
litellm_params: {
model: "vertex_ai/claude-3-5-sonnet", // Should match fallback list
},
},
{
model_name: "openrouter-claude",
model_name: "openrouter-model",
model_info: {
max_tokens: 4096,
max_input_tokens: 200000,
supports_vision: true,
supports_prompt_caching: false,
},
litellm_params: {
model: "openrouter/anthropic/claude-3.5-sonnet", // Should match fallback list
},
},
{
model_name: "bedrock-claude",
model_name: "bedrock-model",
model_info: {
max_tokens: 4096,
max_input_tokens: 200000,
supports_vision: true,
supports_vision: false,
supports_prompt_caching: false,
},
litellm_params: {
model: "anthropic.claude-3-5-sonnet-20241022-v2:0", // Should match fallback list
},
},
],
},
Expand All @@ -598,8 +574,10 @@ describe("getLiteLLMModels", () => {

const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")

expect(result["vertex-claude"].supportsComputerUse).toBe(true)
expect(result["openrouter-claude"].supportsComputerUse).toBe(true)
expect(result["bedrock-claude"].supportsComputerUse).toBe(true)
// Models with vision support should have computer use enabled
expect(result["vertex-model"].supportsComputerUse).toBe(true)
expect(result["openrouter-model"].supportsComputerUse).toBe(true)
// Model without vision support should not have computer use enabled
expect(result["bedrock-model"].supportsComputerUse).toBe(false)
})
})
27 changes: 14 additions & 13 deletions src/api/providers/fetchers/__tests__/openrouter.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import { back as nockBack } from "nock"

import {
OPEN_ROUTER_PROMPT_CACHING_MODELS,
OPEN_ROUTER_COMPUTER_USE_MODELS,
OPEN_ROUTER_REASONING_BUDGET_MODELS,
OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS,
} from "@roo-code/types"
Expand Down Expand Up @@ -49,20 +48,20 @@ describe("OpenRouter API", () => {

expect(ourCachingModels.sort()).toEqual(expectedCachingModels)

const excludedComputerUseModels = new Set([
"anthropic/claude-opus-4.1", // Not yet available in OpenRouter API
])
// Computer use is now determined by image support
// Verify that models with image support have computer use enabled
const modelsWithImages = Object.entries(models)
.filter(([_, model]) => model.supportsImages)
.map(([id, _]) => id)

const expectedComputerUseModels = Array.from(OPEN_ROUTER_COMPUTER_USE_MODELS)
.filter((id) => !excludedComputerUseModels.has(id))
.sort()
const modelsWithComputerUse = Object.entries(models)
.filter(([_, model]) => model.supportsComputerUse)
.map(([id, _]) => id)

expect(
Object.entries(models)
.filter(([_, model]) => model.supportsComputerUse)
.map(([id, _]) => id)
.sort(),
).toEqual(expectedComputerUseModels)
// All models with image support should have computer use enabled
for (const modelId of modelsWithImages) {
expect(modelsWithComputerUse).toContain(modelId)
}

expect(
Object.entries(models)
Expand Down Expand Up @@ -233,6 +232,7 @@ describe("OpenRouter API", () => {
maxTokens: 65535,
contextWindow: 1048576,
supportsImages: true,
supportsComputerUse: true, // Added because supportsImages is true
supportsPromptCache: true,
supportsReasoningBudget: true,
inputPrice: 1.25,
Expand All @@ -247,6 +247,7 @@ describe("OpenRouter API", () => {
maxTokens: 65536,
contextWindow: 1048576,
supportsImages: true,
supportsComputerUse: true, // Added because supportsImages is true
supportsPromptCache: true,
supportsReasoningBudget: true,
inputPrice: 1.25,
Expand Down
17 changes: 5 additions & 12 deletions src/api/providers/fetchers/litellm.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import axios from "axios"

import { LITELLM_COMPUTER_USE_MODELS } from "@roo-code/types"

import type { ModelRecord } from "../../../shared/api"

import { DEFAULT_HEADERS } from "../constants"
Expand Down Expand Up @@ -33,33 +31,28 @@ export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise
const response = await axios.get(url, { headers, timeout: 5000 })
const models: ModelRecord = {}

const computerModels = Array.from(LITELLM_COMPUTER_USE_MODELS)

// Process the model info from the response
if (response.data && response.data.data && Array.isArray(response.data.data)) {
for (const model of response.data.data) {
const modelName = model.model_name
const modelInfo = model.model_info
const litellmModelName = model?.litellm_params?.model as string | undefined

if (!modelName || !modelInfo || !litellmModelName) continue
if (!modelName || !modelInfo) continue

// Use explicit supports_computer_use if available, otherwise fall back to hardcoded list
// Use explicit supports_computer_use if available, otherwise use image support
let supportsComputerUse: boolean
if (modelInfo.supports_computer_use !== undefined) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good implementation of the fallback logic. The explicit supports_computer_use field takes precedence, which maintains backward compatibility while adopting the new image-based approach.

supportsComputerUse = Boolean(modelInfo.supports_computer_use)
} else {
// Fallback for older LiteLLM versions that don't have supports_computer_use field
supportsComputerUse = computerModels.some((computer_model) =>
litellmModelName.endsWith(computer_model),
)
// Browser automation requires screenshot analysis, which requires image/vision capabilities
// Any model that can process images can theoretically use the browser tool
supportsComputerUse = Boolean(modelInfo.supports_vision)
}

models[modelName] = {
maxTokens: modelInfo.max_tokens || 8192,
contextWindow: modelInfo.max_input_tokens || 200000,
supportsImages: Boolean(modelInfo.supports_vision),
// litellm_params.model may have a prefix like openrouter/
supportsComputerUse,
supportsPromptCache: Boolean(modelInfo.supports_prompt_caching),
inputPrice: modelInfo.input_cost_per_token ? modelInfo.input_cost_per_token * 1000000 : undefined,
Expand Down
Loading
Loading