Skip to content

Commit 2c2580b

Browse files
committed
fix cua example, remove root model
1 parent de7d883 commit 2c2580b

File tree

7 files changed

+136
-45
lines changed

7 files changed

+136
-45
lines changed

examples/agent_example.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,8 @@ async def main():
3636
# Build a unified configuration object for Stagehand
3737
config = StagehandConfig(
3838
env="BROWSERBASE",
39-
# env="LOCAL",
4039
api_key=os.getenv("BROWSERBASE_API_KEY"),
4140
project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
42-
model_name="gpt-4o",
43-
self_heal=True,
4441
system_prompt="You are a browser automation assistant that helps users navigate websites effectively.",
4542
model_client_options={"apiKey": os.getenv("MODEL_API_KEY")},
4643
verbose=2,
@@ -51,12 +48,11 @@ async def main():
5148

5249
# Initialize - this creates a new session automatically.
5350
console.print("\n🚀 [info]Initializing Stagehand...[/]")
54-
await stagehand.init()
55-
if stagehand.env == "BROWSERBASE":
56-
console.print(f"\n[yellow]Created new session:[/] {stagehand.session_id}")
57-
console.print(
58-
f"🌐 [white]View your live browser:[/] [url]https://www.browserbase.com/sessions/{stagehand.session_id}[/]"
59-
)
51+
await stagehand.init()
52+
console.print(f"\n[yellow]Created new session:[/] {stagehand.session_id}")
53+
console.print(
54+
f"🌐 [white]View your live browser:[/] [url]https://www.browserbase.com/sessions/{stagehand.session_id}[/]"
55+
)
6056

6157
console.print("\n▶️ [highlight] Navigating[/] to Google")
6258
await stagehand.page.goto("https://google.com/")

examples/agent_example_local.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
import asyncio
2+
import logging
3+
import os
4+
5+
from dotenv import load_dotenv
6+
from rich.console import Console
7+
from rich.panel import Panel
8+
from rich.theme import Theme
9+
10+
from stagehand import Stagehand, StagehandConfig, configure_logging
11+
12+
# Create a custom theme for consistent styling
13+
custom_theme = Theme(
14+
{
15+
"info": "cyan",
16+
"success": "green",
17+
"warning": "yellow",
18+
"error": "red bold",
19+
"highlight": "magenta",
20+
"url": "blue underline",
21+
}
22+
)
23+
24+
# Create a Rich console instance with our theme
25+
console = Console(theme=custom_theme)
26+
27+
load_dotenv()
28+
29+
# Configure logging with the utility function
30+
configure_logging(
31+
level=logging.INFO, # Set to INFO for regular logs, DEBUG for detailed
32+
quiet_dependencies=True, # Reduce noise from dependencies
33+
)
34+
35+
async def main():
36+
# Build a unified configuration object for Stagehand
37+
config = StagehandConfig(
38+
env="LOCAL",
39+
system_prompt="You are a browser automation assistant that helps users navigate websites effectively.",
40+
model_client_options={"apiKey": os.getenv("MODEL_API_KEY")},
41+
verbose=2,
42+
)
43+
44+
# Create a Stagehand client using the configuration object.
45+
stagehand = Stagehand(config)
46+
47+
# Initialize - this creates a new session automatically.
48+
console.print("\n🚀 [info]Initializing Stagehand...[/]")
49+
await stagehand.init()
50+
51+
console.print("\n▶️ [highlight] Navigating[/] to Google")
52+
await stagehand.page.goto("https://google.com/")
53+
console.print("✅ [success]Navigated to Google[/]")
54+
55+
console.print("\n▶️ [highlight] Using Agent to perform a task[/]: playing a game of 2048")
56+
agent = stagehand.agent(
57+
model="gemini-2.5-computer-use-preview-10-2025",
58+
instructions="You are a helpful web navigation assistant that helps users find information. You are currently on the following page: google.com. Do not ask follow up questions, the user will trust your judgement.",
59+
options={"apiKey": os.getenv("GEMINI_API_KEY")}
60+
)
61+
agent_result = await agent.execute(
62+
instruction="Play a game of 2048",
63+
max_steps=20,
64+
auto_screenshot=True,
65+
)
66+
67+
console.print(agent_result)
68+
69+
console.print("📊 [info]Agent execution result:[/]")
70+
console.print(f"🎯 Completed: [bold]{'Yes' if agent_result.completed else 'No'}[/]")
71+
if agent_result.message:
72+
console.print(f"💬 Message: [italic]{agent_result.message}[/]")
73+
74+
if agent_result.actions:
75+
console.print(f"🔄 Actions performed: [bold]{len(agent_result.actions)}[/]")
76+
for i, action in enumerate(agent_result.actions):
77+
action_type = action.type
78+
79+
console.print(f" Action {i+1}: {action_type if action_type else 'Unknown'}")
80+
81+
# For debugging, you can also print the full JSON
82+
console.print("[dim]Full response JSON:[/]")
83+
console.print_json(f"{agent_result.model_dump_json()}")
84+
85+
# Close the session
86+
console.print("\n⏹️ [warning]Closing session...[/]")
87+
await stagehand.close()
88+
console.print("✅ [success]Session closed successfully![/]")
89+
console.rule("[bold]End of Example[/]")
90+
91+
92+
if __name__ == "__main__":
93+
# Add a fancy header
94+
console.print(
95+
"\n",
96+
Panel(
97+
"[light_gray]Stagehand 🤘 Agent Example[/]",
98+
border_style="green",
99+
padding=(1, 10),
100+
),
101+
)
102+
asyncio.run(main())

stagehand/agent/agent.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,13 +170,10 @@ async def execute(
170170
f"Agent execution finished. Success: {agent_result.completed}. Message: {agent_result.message}",
171171
category="agent",
172172
)
173-
# To clean up pydantic model output
174-
actions_repr = [action.root for action in agent_result.actions]
175173
self.logger.debug(
176-
f"Agent actions: {actions_repr}",
174+
f"Agent actions: {agent_result.actions}",
177175
category="agent",
178176
)
179-
agent_result.actions = actions_repr
180177
return agent_result
181178
else:
182179
agent_config_payload = self.config.model_dump(

stagehand/agent/google_cua.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
AgentResult,
2626
)
2727
from .client import AgentClient
28+
from pydantic import TypeAdapter
2829

2930
load_dotenv()
3031

@@ -176,7 +177,7 @@ def _process_provider_response(
176177
and candidate.safety_ratings
177178
):
178179
error_message += f" - Safety Ratings: {candidate.safety_ratings}"
179-
self.logger.warning(error_message, category="agent")
180+
self.logger.error(error_message, category="agent")
180181
return [], reasoning_text, True, error_message, []
181182

182183
if not function_call_parts:
@@ -260,7 +261,7 @@ def _process_provider_response(
260261
"keys": [self.key_to_playwright("PageDown")],
261262
}
262263
else:
263-
self.logger.warning(
264+
self.logger.error(
264265
f"Unsupported scroll direction: {direction}", category="agent"
265266
)
266267
return (
@@ -282,7 +283,7 @@ def _process_provider_response(
282283
elif direction in ("left", "right"):
283284
magnitude = self._normalize_coordinates(magnitude, 0)[0]
284285
else:
285-
self.logger.warning(
286+
self.logger.error(
286287
f"Unsupported scroll direction: {direction}", category="agent"
287288
)
288289
return (
@@ -352,7 +353,7 @@ def _process_provider_response(
352353
"arguments": {"url": "https://www.google.com"},
353354
}
354355
else:
355-
self.logger.warning(
356+
self.logger.error(
356357
f"Unsupported Gemini CUA function: {action_name}", category="agent"
357358
)
358359
return (
@@ -367,13 +368,11 @@ def _process_provider_response(
367368
try:
368369
# Directly construct the AgentActionType using the payload.
369370
# Pydantic will use the 'type' field in action_payload_dict to discriminate the Union.
370-
action_payload_for_agent_action_type = AgentActionType(
371-
**action_payload_dict
372-
)
371+
action_payload_for_agent_action_type = TypeAdapter(AgentActionType).validate_python(action_payload_dict)
373372

374373
agent_action = AgentAction(
375374
action_type=action_type_str, # This should match the 'type' in action_payload_dict
376-
action=action_payload_for_agent_action_type, # No RootModel wrapping if AgentActionType is the RootModel itself
375+
action=action_payload_for_agent_action_type,
377376
reasoning=reasoning_text,
378377
status="tool_code",
379378
)
@@ -598,7 +597,7 @@ async def run_task(
598597
)
599598

600599
if not agent_action and not task_completed:
601-
self.logger.warning(
600+
self.logger.debug(
602601
"Model did not request an action and task not marked complete. Ending task.",
603602
category="agent",
604603
)
@@ -614,7 +613,7 @@ async def run_task(
614613
usage=usage_obj,
615614
)
616615

617-
self.logger.warning("Max steps reached for Gemini CUA task.", category="agent")
616+
self.logger.debug("Max steps reached for Gemini CUA task.", category="agent")
618617
usage_obj = {
619618
"input_tokens": total_input_tokens,
620619
"output_tokens": total_output_tokens,

stagehand/agent/openai_cua.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from openai import (
88
OpenAI as OpenAISDK, # Renamed to avoid conflict with a potential class name
99
)
10-
from pydantic import BaseModel # Ensure BaseModel is imported for isinstance check
10+
from pydantic import BaseModel, TypeAdapter # Ensure BaseModel is imported for isinstance check
1111

1212
from ..handlers.cua_handler import CUAHandler
1313
from ..types.agent import (
@@ -175,8 +175,8 @@ def _process_provider_response(
175175
)
176176

177177
try:
178-
action_payload = AgentActionType(
179-
**computer_call_item.action.model_dump()
178+
action_payload = TypeAdapter(AgentActionType).validate_python(
179+
computer_call_item.action.model_dump()
180180
)
181181
agent_action = AgentAction(
182182
action_type=computer_call_item.action.type,
@@ -225,7 +225,7 @@ def _process_provider_response(
225225
function_action_payload = FunctionAction(type="function", name=function_call_item.name, arguments=arguments) # type: ignore
226226
agent_action = AgentAction(
227227
action_type="function", # Literal 'function'
228-
action=AgentActionType(root=function_action_payload),
228+
action=function_action_payload,
229229
reasoning=reasoning_text, # Reasoning applies to this action
230230
status=(
231231
function_call_item.status

stagehand/handlers/cua_handler.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,12 @@ async def get_screenshot_base64(self) -> str:
3535

3636
async def perform_action(self, action: AgentAction) -> ActionExecutionResult:
3737
"""Execute a single action on the page."""
38+
specific_action_model = action.action
3839
self.logger.info(
39-
f"Performing action: {action.action.root if action.action else ''}",
40+
f"Performing action: {specific_action_model or ''}",
4041
category=StagehandFunctionName.AGENT,
4142
)
4243
action_type = action.action_type
43-
# action.action is the RootModel, action.action.root is the specific action model (e.g., ClickAction)
44-
specific_action_model = action.action.root if action.action else None
4544

4645
if not specific_action_model:
4746
self.logger.error(

stagehand/types/agent.py

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from typing import Any, Literal, Optional, Union
22

3-
from pydantic import BaseModel, RootModel
3+
from pydantic import BaseModel
44

55

66
class AgentConfig(BaseModel):
@@ -96,20 +96,18 @@ class KeyAction(BaseModel): # From Anthropic
9696
text: str
9797

9898

99-
AgentActionType = RootModel[
100-
Union[
101-
ClickAction,
102-
DoubleClickAction,
103-
TypeAction,
104-
KeyPressAction,
105-
ScrollAction,
106-
DragAction,
107-
MoveAction,
108-
WaitAction,
109-
ScreenshotAction,
110-
FunctionAction,
111-
KeyAction,
112-
]
99+
AgentActionType = Union[
100+
ClickAction,
101+
DoubleClickAction,
102+
TypeAction,
103+
KeyPressAction,
104+
ScrollAction,
105+
DragAction,
106+
MoveAction,
107+
WaitAction,
108+
ScreenshotAction,
109+
FunctionAction,
110+
KeyAction,
113111
]
114112

115113

0 commit comments

Comments
 (0)