Skip to content

Commit dda52f1

Browse files
miguelg719tkattkatKylejeong2sameelarif
authored
Support for new Gemini Computer Use Models (#1110)
# why Adding support for Gemini's new Computer Use model # what changed We partnered with Google Deepmind to help integrate and test their new Computer Use models. <img width="1238" height="655" alt="Screenshot 2025-10-07 at 1 14 44 PM" src="https://github.com/user-attachments/assets/af0d854a-8e55-4937-a071-10335497f686" /> The new model tag `gemini-2.5-pro-computer-use-preview-10-2025` is available for Stagehand Agent. You can try it today with the example `cua-example.ts` To learn more, check out the blog post [https://www.browserbase.com/blog/evaluating-browser-agents](https://www.browserbase.com/blog/evaluating-browser-agents) --------- Co-authored-by: tkattkat <[email protected]> Co-authored-by: Kylejeong2 <[email protected]> Co-authored-by: Sameel <[email protected]>
1 parent 3ccf335 commit dda52f1

15 files changed

+2071
-871
lines changed

.changeset/wicked-ducks-share.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand": patch
3+
---
4+
5+
Add support for new Gemini Computer Use models

examples/cua-example.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,15 @@ async function main() {
2626

2727
// Create a computer use agent
2828
const agent = stagehand.agent({
29-
provider: "openai",
30-
// For Anthropic, use claude-sonnet-4-20250514 or claude-3-7-sonnet-latest
31-
model: "computer-use-preview",
29+
provider: "google",
30+
// For Anthropic, use claude-sonnet-4-20250514 or claude-sonnet-4-5-20250929
31+
// For OpenAI, use computer-use-preview-03-11
32+
model: "gemini-2.5-computer-use-preview-10-2025",
3233
instructions: `You are a helpful assistant that can use a web browser.
3334
You are currently on the following page: ${page.url()}.
3435
Do not ask follow up questions, the user will trust your judgement.`,
3536
options: {
36-
apiKey: process.env.OPENAI_API_KEY,
37+
apiKey: process.env.GOOGLE_API_KEY,
3738
},
3839
});
3940

lib/agent/AgentProvider.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import { ToolSet } from "ai/dist";
88
import { AgentClient } from "./AgentClient";
99
import { AnthropicCUAClient } from "./AnthropicCUAClient";
1010
import { OpenAICUAClient } from "./OpenAICUAClient";
11+
import { GoogleCUAClient } from "./GoogleCUAClient";
1112

1213
// Map model names to their provider types
1314
export const modelToAgentProviderMap: Record<string, AgentType> = {
@@ -16,6 +17,7 @@ export const modelToAgentProviderMap: Record<string, AgentType> = {
1617
"claude-3-7-sonnet-latest": "anthropic",
1718
"claude-sonnet-4-20250514": "anthropic",
1819
"claude-sonnet-4-5-20250929": "anthropic",
20+
"gemini-2.5-computer-use-preview-10-2025": "google",
1921
};
2022

2123
/**
@@ -64,9 +66,16 @@ export class AgentProvider {
6466
clientOptions,
6567
tools,
6668
);
69+
case "google":
70+
return new GoogleCUAClient(
71+
type,
72+
modelName,
73+
userProvidedInstructions,
74+
clientOptions,
75+
);
6776
default:
6877
throw new UnsupportedModelProviderError(
69-
["openai", "anthropic"],
78+
["openai", "anthropic", "google"],
7079
"Computer Use Agent",
7180
);
7281
}

lib/agent/AnthropicCUAClient.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ export class AnthropicCUAClient extends AgentClient {
2929
private baseURL?: string;
3030
private client: Anthropic;
3131
public lastMessageId?: string;
32-
private currentViewport = { width: 1024, height: 768 };
32+
private currentViewport = { width: 1288, height: 711 };
3333
private currentUrl?: string;
3434
private screenshotProvider?: () => Promise<string>;
3535
private actionHandler?: (action: AgentAction) => Promise<void>;
@@ -290,7 +290,7 @@ export class AnthropicCUAClient extends AgentClient {
290290

291291
logger({
292292
category: "agent",
293-
message: `Found text block: ${textBlock.text.substring(0, 50)}...`,
293+
message: `Found text block: ${textBlock.text}`,
294294
level: 2,
295295
});
296296
} else {

0 commit comments

Comments
 (0)