|
4 | 4 | from rich.console import Console |
5 | 5 | from rich.panel import Panel |
6 | 6 | from rich.theme import Theme |
| 7 | +from pydantic import BaseModel, Field, HttpUrl |
7 | 8 | from dotenv import load_dotenv |
8 | 9 |
|
9 | | -from stagehand import Stagehand, StagehandConfig, configure_logging |
| 10 | +from stagehand import StagehandConfig, Stagehand, configure_logging |
| 11 | +from stagehand.schemas import ExtractOptions |
| 12 | +from stagehand.a11y.utils import get_accessibility_tree, get_xpath_by_resolved_object_id |
10 | 13 |
|
11 | | -from browserbase.types import SessionCreateParams as BrowserbaseSessionCreateParams |
| 14 | +# Load environment variables |
| 15 | +load_dotenv() |
12 | 16 |
|
13 | | -# Configure logging with cleaner format |
14 | 17 | configure_logging( |
15 | 18 | level=logging.INFO, |
16 | 19 | remove_logger_name=True, # Remove the redundant stagehand.client prefix |
17 | 20 | quiet_dependencies=True, # Suppress httpx and other noisy logs |
18 | 21 | ) |
19 | 22 |
|
20 | | -# Create a custom theme for consistent styling |
21 | | -custom_theme = Theme( |
22 | | - { |
23 | | - "info": "cyan", |
24 | | - "success": "green", |
25 | | - "warning": "yellow", |
26 | | - "error": "red bold", |
27 | | - "highlight": "magenta", |
28 | | - "url": "blue underline", |
29 | | - } |
30 | | -) |
31 | | - |
32 | | -# Create a Rich console instance with our theme |
33 | | -console = Console(theme=custom_theme) |
34 | | - |
35 | | -load_dotenv() |
36 | | - |
37 | | -browserbase_session_create_params = BrowserbaseSessionCreateParams( |
38 | | - project_id=os.getenv("BROWSERBASE_PROJECT_ID"), |
39 | | - proxies=True, |
40 | | -) |
| 23 | +# Configure Rich console |
| 24 | +console = Console(theme=Theme({ |
| 25 | + "info": "cyan", |
| 26 | + "success": "green", |
| 27 | + "warning": "yellow", |
| 28 | + "error": "red bold", |
| 29 | + "highlight": "magenta", |
| 30 | + "url": "blue underline", |
| 31 | +})) |
| 32 | + |
| 33 | +# Define Pydantic models for testing |
| 34 | +class Company(BaseModel): |
| 35 | + name: str = Field(..., description="The name of the company") |
| 36 | + url: HttpUrl = Field(..., description="The URL of the company website or relevant page") |
| 37 | + |
| 38 | +class Companies(BaseModel): |
| 39 | + companies: list[Company] = Field(..., description="List of companies extracted from the page, maximum of 5 companies") |
41 | 40 |
|
42 | | -console.print( |
43 | | - Panel.fit( |
44 | | - "[yellow]Logging Levels:[/]\n" |
45 | | - "[white]- Set [bold]verbose=0[/] for errors (ERROR)[/]\n" |
46 | | - "[white]- Set [bold]verbose=1[/] for minimal logs (INFO)[/]\n" |
47 | | - "[white]- Set [bold]verbose=2[/] for medium logs (WARNING)[/]\n" |
48 | | - "[white]- Set [bold]verbose=3[/] for detailed logs (DEBUG)[/]", |
49 | | - title="Verbosity Options", |
50 | | - border_style="blue", |
51 | | - ) |
52 | | -) |
| 41 | +class ElementAction(BaseModel): |
| 42 | + action: str |
| 43 | + id: int |
| 44 | + arguments: list[str] |
53 | 45 |
|
54 | 46 | async def main(): |
55 | | - # Build a unified configuration object for Stagehand |
| 47 | + |
| 48 | + # Create configuration |
56 | 49 | config = StagehandConfig( |
57 | | - env="BROWSERBASE", |
58 | 50 | api_key=os.getenv("BROWSERBASE_API_KEY"), |
59 | 51 | project_id=os.getenv("BROWSERBASE_PROJECT_ID"), |
60 | | - browserbase_session_create_params=browserbase_session_create_params, |
61 | | - headless=False, |
62 | | - dom_settle_timeout_ms=3000, |
63 | | - model_name="google/gemini-2.0-flash", |
64 | | - self_heal=True, |
65 | | - wait_for_captcha_solves=True, |
66 | | - system_prompt="You are a browser automation assistant that helps users navigate websites effectively.", |
| 52 | + model_name="google/gemini-2.5-flash-preview-04-17", |
67 | 53 | model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}, |
68 | | - # Use verbose=2 for medium-detail logs (1=minimal, 3=debug) |
69 | | - verbose=2, |
70 | | - ) |
71 | | - |
72 | | - stagehand = Stagehand(config) |
73 | | - |
74 | | - # Initialize - this creates a new session automatically. |
75 | | - console.print("\n🚀 [info]Initializing Stagehand...[/]") |
76 | | - await stagehand.init() |
77 | | - page = stagehand.page |
78 | | - console.print(f"\n[yellow]Created new session:[/] {stagehand.session_id}") |
79 | | - console.print( |
80 | | - f"🌐 [white]View your live browser:[/] [url]https://www.browserbase.com/sessions/{stagehand.session_id}[/]" |
| 54 | + verbose=1, |
81 | 55 | ) |
82 | | - |
83 | | - await asyncio.sleep(2) |
84 | | - |
85 | | - console.print("\n▶️ [highlight] Navigating[/] to Google") |
86 | | - await page.goto("https://google.com/") |
87 | | - console.print("✅ [success]Navigated to Google[/]") |
88 | | - |
89 | | - console.print("\n▶️ [highlight] Clicking[/] on About link") |
90 | | - # Click on the "About" link using Playwright |
91 | | - await page.get_by_role("link", name="About", exact=True).click() |
92 | | - console.print("✅ [success]Clicked on About link[/]") |
93 | | - |
94 | | - await asyncio.sleep(2) |
95 | | - console.print("\n▶️ [highlight] Navigating[/] back to Google") |
96 | | - await page.goto("https://google.com/") |
97 | | - console.print("✅ [success]Navigated back to Google[/]") |
98 | | - |
99 | | - console.print("\n▶️ [highlight] Performing action:[/] search for openai") |
100 | | - await page.act("search for openai") |
101 | | - await page.keyboard.press("Enter") |
102 | | - console.print("✅ [success]Performing Action:[/] Action completed successfully") |
103 | 56 |
|
104 | | - await asyncio.sleep(2) |
105 | | - |
106 | | - console.print("\n▶️ [highlight] Observing page[/] for news button") |
107 | | - observed = await page.observe("find the news button") |
| 57 | + # Initialize async client |
| 58 | + stagehand = Stagehand( |
| 59 | + config=config, |
| 60 | + env="BROWSERBASE", # LOCAL for local execution, BROWSERBASE for remote execution |
| 61 | + server_url=os.getenv("STAGEHAND_API_URL"), # only needed for remote execution |
| 62 | + ) |
108 | 63 |
|
109 | | - if len(observed) > 0: |
110 | | - element = observed[0] |
111 | | - console.print("✅ [success]Found element:[/] News button") |
112 | | - console.print("\n▶️ [highlight] Performing action on observed element:") |
113 | | - console.print(element) |
114 | | - await page.act(element) |
115 | | - console.print("✅ [success]Performing Action:[/] Action completed successfully") |
116 | | - |
117 | | - else: |
118 | | - console.print("❌ [error]No element found[/]") |
119 | | - |
120 | | - console.print("\n▶️ [highlight] Extracting[/] first search result") |
121 | | - data = await page.extract("extract the first result from the search") |
122 | | - console.print("📊 [info]Extracted data:[/]") |
123 | | - console.print_json(f"{data.model_dump_json()}") |
124 | | - |
125 | | - # Close the session |
126 | | - console.print("\n⏹️ [warning]Closing session...[/]") |
127 | | - await stagehand.close() |
128 | | - console.print("✅ [success]Session closed successfully![/]") |
129 | | - console.rule("[bold]End of Example[/]") |
130 | | - |
| 64 | + try: |
| 65 | + # Initialize the client |
| 66 | + await stagehand.init() |
| 67 | + console.print("[success]✓ Successfully initialized Stagehand async client[/]") |
| 68 | + console.print(f"[info]Environment: {stagehand.env}[/]") |
| 69 | + console.print(f"[info]LLM Client Available: {stagehand.llm is not None}[/]") |
| 70 | + |
| 71 | + # Navigate to AIgrant (as in the original test) |
| 72 | + await stagehand.page.goto("https://www.aigrant.com") |
| 73 | + console.print("[success]✓ Navigated to AIgrant[/]") |
| 74 | + await asyncio.sleep(2) |
| 75 | + |
| 76 | + # Get accessibility tree |
| 77 | + tree = await get_accessibility_tree(stagehand.page, stagehand.logger) |
| 78 | + console.print("[success]✓ Extracted accessibility tree[/]") |
| 79 | + |
| 80 | + print("ID to URL mapping:", tree.get("idToUrl")) |
| 81 | + print("IFrames:", tree.get("iframes")) |
| 82 | + |
| 83 | + # Click the "Get Started" button |
| 84 | + await stagehand.page.act("click the button with text 'Get Started'") |
| 85 | + console.print("[success]✓ Clicked 'Get Started' button[/]") |
| 86 | + |
| 87 | + # Observe the button |
| 88 | + await stagehand.page.observe("the button with text 'Get Started'") |
| 89 | + console.print("[success]✓ Observed 'Get Started' button[/]") |
| 90 | + |
| 91 | + # Extract companies using schema |
| 92 | + extract_options = ExtractOptions( |
| 93 | + instruction="Extract the names and URLs of up to 5 companies mentioned on this page", |
| 94 | + schema_definition=Companies |
| 95 | + ) |
| 96 | + |
| 97 | + extract_result = await stagehand.page.extract(extract_options) |
| 98 | + console.print("[success]✓ Extracted companies data[/]") |
| 99 | + |
| 100 | + # Display results |
| 101 | + print("Extract result:", extract_result) |
| 102 | + print("Extract result data:", extract_result.data if hasattr(extract_result, 'data') else 'No data field') |
| 103 | + |
| 104 | + # Parse the result into the Companies model |
| 105 | + companies_data = None |
| 106 | + |
| 107 | + # Both LOCAL and BROWSERBASE modes now return the Pydantic model directly |
| 108 | + try: |
| 109 | + companies_data = extract_result if isinstance(extract_result, Companies) else Companies.model_validate(extract_result) |
| 110 | + console.print("[success]✓ Successfully parsed extract result into Companies model[/]") |
| 111 | + |
| 112 | + # Handle URL resolution if needed |
| 113 | + if hasattr(companies_data, 'companies'): |
| 114 | + id_to_url = tree.get("idToUrl", {}) |
| 115 | + for company in companies_data.companies: |
| 116 | + if hasattr(company, 'url') and isinstance(company.url, str): |
| 117 | + # Check if URL is just an ID that needs to be resolved |
| 118 | + if company.url.isdigit() and company.url in id_to_url: |
| 119 | + company.url = id_to_url[company.url] |
| 120 | + console.print(f"[success]✓ Resolved URL for {company.name}: {company.url}[/]") |
| 121 | + |
| 122 | + except Exception as e: |
| 123 | + console.print(f"[error]Failed to parse extract result: {e}[/]") |
| 124 | + print("Raw extract result:", extract_result) |
| 125 | + |
| 126 | + print("\nExtracted Companies:") |
| 127 | + if companies_data and hasattr(companies_data, "companies"): |
| 128 | + for idx, company in enumerate(companies_data.companies, 1): |
| 129 | + print(f"{idx}. {company.name}: {company.url}") |
| 130 | + else: |
| 131 | + print("No companies were found in the extraction result") |
| 132 | + |
| 133 | + # XPath click |
| 134 | + await stagehand.page.locator("xpath=/html/body/div/ul[2]/li[2]/a").click() |
| 135 | + await stagehand.page.wait_for_load_state('networkidle') |
| 136 | + console.print("[success]✓ Clicked element using XPath[/]") |
| 137 | + |
| 138 | + # Open a new page with Google |
| 139 | + console.print("\n[info]Creating a new page...[/]") |
| 140 | + new_page = await stagehand.context.new_page() |
| 141 | + await new_page.goto("https://www.google.com") |
| 142 | + console.print("[success]✓ Opened Google in a new page[/]") |
| 143 | + |
| 144 | + # Get accessibility tree for the new page |
| 145 | + tree = await get_accessibility_tree(new_page, stagehand.logger) |
| 146 | + console.print("[success]✓ Extracted accessibility tree for new page[/]") |
| 147 | + |
| 148 | + # Try clicking Get Started button on Google |
| 149 | + await new_page.act("click the button with text 'Get Started'") |
| 150 | + |
| 151 | + # Only use LLM directly if in LOCAL mode |
| 152 | + if stagehand.llm is not None: |
| 153 | + console.print("[info]LLM client available - using direct LLM call[/]") |
| 154 | + |
| 155 | + # Use LLM to analyze the page |
| 156 | + response = stagehand.llm.create_response( |
| 157 | + messages=[ |
| 158 | + { |
| 159 | + "role": "system", |
| 160 | + "content": "Based on the provided accessibility tree of the page, find the element and the action the user is expecting to perform. The tree consists of an enhanced a11y tree from a website with unique identifiers prepended to each element's role, and name. The actions you can take are playwright compatible locator actions." |
| 161 | + }, |
| 162 | + { |
| 163 | + "role": "user", |
| 164 | + "content": [ |
| 165 | + { |
| 166 | + "type": "text", |
| 167 | + "text": f"fill the search bar with the text 'Hello'\nPage Tree:\n{tree.get('simplified')}" |
| 168 | + } |
| 169 | + ] |
| 170 | + } |
| 171 | + ], |
| 172 | + model=config.model_name, |
| 173 | + response_format=ElementAction, |
| 174 | + ) |
| 175 | + |
| 176 | + action = ElementAction.model_validate_json(response.choices[0].message.content) |
| 177 | + console.print(f"[success]✓ LLM identified element ID: {action.id}[/]") |
| 178 | + |
| 179 | + # Test CDP functionality |
| 180 | + args = {"backendNodeId": action.id} |
| 181 | + result = await new_page.send_cdp("DOM.resolveNode", args) |
| 182 | + object_info = result.get("object") |
| 183 | + print(object_info) |
| 184 | + |
| 185 | + xpath = await get_xpath_by_resolved_object_id(await new_page.get_cdp_client(), object_info["objectId"]) |
| 186 | + console.print(f"[success]✓ Retrieved XPath: {xpath}[/]") |
| 187 | + |
| 188 | + # Interact with the element |
| 189 | + if xpath: |
| 190 | + await new_page.locator(f"xpath={xpath}").click() |
| 191 | + await new_page.locator(f"xpath={xpath}").fill(action.arguments[0]) |
| 192 | + console.print("[success]✓ Filled search bar with 'Hello'[/]") |
| 193 | + else: |
| 194 | + print("No xpath found") |
| 195 | + else: |
| 196 | + console.print("[warning]LLM client not available in BROWSERBASE mode - skipping direct LLM test[/]") |
| 197 | + # Alternative: use page.observe to find the search bar |
| 198 | + observe_result = await new_page.observe("the search bar or search input field") |
| 199 | + console.print(f"[info]Observed search elements: {observe_result}[/]") |
| 200 | + |
| 201 | + # Use page.act to fill the search bar |
| 202 | + try: |
| 203 | + await new_page.act("fill the search bar with 'Hello'") |
| 204 | + console.print("[success]✓ Filled search bar using act()[/]") |
| 205 | + except Exception as e: |
| 206 | + console.print(f"[warning]Could not fill search bar: {e}[/]") |
| 207 | + |
| 208 | + # Final test summary |
| 209 | + console.print("\n[success]All tests completed successfully![/]") |
| 210 | + |
| 211 | + except Exception as e: |
| 212 | + console.print(f"[error]Error during testing: {str(e)}[/]") |
| 213 | + import traceback |
| 214 | + traceback.print_exc() |
| 215 | + raise |
| 216 | + finally: |
| 217 | + # Close the client |
| 218 | + # wait for 5 seconds |
| 219 | + await asyncio.sleep(5) |
| 220 | + await stagehand.close() |
| 221 | + console.print("[info]Stagehand async client closed[/]") |
131 | 222 |
|
132 | 223 | if __name__ == "__main__": |
133 | 224 | # Add a fancy header |
|
0 commit comments