Skip to content

Commit f2e66d5

Browse files
authored
Merge pull request #63 from browserbase/miguel/stg-444-google-cua
support for google
2 parents 735750d + 1b41b53 commit f2e66d5

File tree

10 files changed

+947
-59
lines changed

10 files changed

+947
-59
lines changed

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ playwright>=1.42.1
66
requests>=2.31.0
77
rich
88
browserbase
9-
litellm==1.67.1
9+
litellm==1.67.1
10+
./temp/google_genai-1.14.0-py3-none-any.whl

stagehand/agent/agent.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@
99
)
1010
from .anthropic_cua import AnthropicCUAClient
1111
from .client import AgentClient
12+
from .google_cua import GoogleCUAClient
1213
from .openai_cua import OpenAICUAClient
1314

1415
MODEL_TO_CLIENT_CLASS_MAP: dict[str, type[AgentClient]] = {
1516
"computer-use-preview": OpenAICUAClient,
16-
"claude-3-5-sonnet-20240620": AnthropicCUAClient,
17-
"claude-3-7-sonnet-20250219": AnthropicCUAClient,
17+
"claude-3-5-sonnet-latest": AnthropicCUAClient,
18+
"claude-3-7-sonnet-latest": AnthropicCUAClient,
19+
"models/computer-use-exp": GoogleCUAClient,
1820
}
1921

2022
AGENT_METRIC_FUNCTION_NAME = "AGENT_EXECUTE_TASK"
@@ -39,6 +41,8 @@ def __init__(self, stagehand_client, **kwargs):
3941
stagehand=self.stagehand, page=self.stagehand.page._page, logger=self.logger
4042
)
4143

44+
self.viewport = self.stagehand.page._page.viewport_size
45+
# self.viewport = {"width": 1024, "height": 768}
4246
self.client: AgentClient = self._get_client()
4347

4448
def _get_client(self) -> AgentClient:
@@ -61,6 +65,7 @@ def _get_client(self) -> AgentClient:
6165
config=self.config,
6266
logger=self.logger,
6367
handler=self.cua_handler,
68+
viewport=self.viewport,
6469
)
6570

6671
async def execute(

stagehand/agent/anthropic_cua.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,21 +50,33 @@ def __init__(
5050
config: Optional[AgentConfig] = None,
5151
logger: Optional[Any] = None,
5252
handler: Optional[CUAHandler] = None,
53+
viewport: Optional[dict[str, int]] = None,
5354
**kwargs,
5455
):
5556
super().__init__(model, instructions, config, logger, handler)
5657
self.anthropic_sdk_client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
5758

58-
dimensions = [1024, 768] # Default dimensions
59+
dimensions = (
60+
(viewport["width"], viewport["height"]) if viewport else (1024, 768)
61+
) # Default dimensions
5962
if self.config:
6063
if hasattr(self.config, "display_width") and self.config.display_width is not None: # type: ignore
6164
dimensions[0] = self.config.display_width # type: ignore
6265
if hasattr(self.config, "display_height") and self.config.display_height is not None: # type: ignore
6366
dimensions[1] = self.config.display_height # type: ignore
64-
67+
computer_tool_type = (
68+
"computer_20250124"
69+
if model == "claude-3-7-sonnet-latest"
70+
else "computer_20241022"
71+
)
72+
self.beta_flag = (
73+
["computer-use-2025-01-24"]
74+
if model == "claude-3-7-sonnet-latest"
75+
else ["computer-use-2024-10-22"]
76+
)
6577
self.tools = [
6678
{
67-
"type": "computer_20250124",
79+
"type": computer_tool_type,
6880
"name": "computer",
6981
"display_width_px": dimensions[0],
7082
"display_height_px": dimensions[1],
@@ -155,7 +167,7 @@ async def run_task(
155167
+ "Remember to call the computer tools, and only goto or navigate_back if you need to. Screenshots, clicks, etc, will be parsed from computer tool calls", # System prompt
156168
messages=current_messages,
157169
tools=self.tools,
158-
betas=["computer-use-2025-01-24"],
170+
betas=self.beta_flag,
159171
)
160172
end_time = asyncio.get_event_loop().time()
161173
total_inference_time_ms += int((end_time - start_time) * 1000)

0 commit comments

Comments
 (0)