1
+ import asyncio
2
+ import logging
3
+ import os
4
+ from rich .console import Console
5
+ from rich .panel import Panel
6
+ from rich .theme import Theme
7
+ from pydantic import BaseModel , Field , HttpUrl
8
+ from dotenv import load_dotenv
9
+ import time
10
+
11
+ from stagehand import StagehandConfig , Stagehand
12
+ from stagehand .utils import configure_logging
13
+ from stagehand .schemas import ObserveOptions , ActOptions , ExtractOptions
14
+ from stagehand .a11y .utils import get_accessibility_tree , get_xpath_by_resolved_object_id
15
+
16
+ # Load environment variables
17
+ load_dotenv ()
18
+
19
+ # Configure Rich console
20
+ console = Console (theme = Theme ({
21
+ "info" : "cyan" ,
22
+ "success" : "green" ,
23
+ "warning" : "yellow" ,
24
+ "error" : "red bold" ,
25
+ "highlight" : "magenta" ,
26
+ "url" : "blue underline" ,
27
+ }))
28
+
29
+ # Define Pydantic models for testing
30
+ class Company (BaseModel ):
31
+ name : str = Field (..., description = "The name of the company" )
32
+ url : HttpUrl = Field (..., description = "The URL of the company website or relevant page" )
33
+
34
+ class Companies (BaseModel ):
35
+ companies : list [Company ] = Field (..., description = "List of companies extracted from the page, maximum of 5 companies" )
36
+
37
+ class ElementAction (BaseModel ):
38
+ action : str
39
+ id : int
40
+ arguments : list [str ]
41
+
42
+ async def main ():
43
+ # Display header
44
+ console .print (
45
+ "\n " ,
46
+ Panel .fit (
47
+ "[light_gray]New Stagehand 🤘 Python Async Test[/]" ,
48
+ border_style = "green" ,
49
+ padding = (1 , 10 ),
50
+ ),
51
+ )
52
+
53
+ # Create configuration
54
+ config = StagehandConfig (
55
+ api_key = os .getenv ("BROWSERBASE_API_KEY" ),
56
+ project_id = os .getenv ("BROWSERBASE_PROJECT_ID" ),
57
+ model_name = "google/gemini-2.5-flash-preview-04-17" , # todo - unify gemini/google model names
58
+ model_client_options = {"apiKey" : os .getenv ("MODEL_API_KEY" )}, # this works locally even if there is a model provider mismatch
59
+ verbose = 3 ,
60
+ )
61
+
62
+ # Initialize async client
63
+ stagehand = Stagehand (
64
+ env = os .getenv ("STAGEHAND_ENV" ),
65
+ config = config ,
66
+ api_url = os .getenv ("STAGEHAND_SERVER_URL" ),
67
+ )
68
+
69
+ try :
70
+ # Initialize the client
71
+ await stagehand .init ()
72
+ console .print ("[success]✓ Successfully initialized Stagehand async client[/]" )
73
+ console .print (f"[info]Environment: { stagehand .env } [/]" )
74
+ console .print (f"[info]LLM Client Available: { stagehand .llm is not None } [/]" )
75
+
76
+ # Navigate to AIgrant (as in the original test)
77
+ await stagehand .page .goto ("https://www.aigrant.com" )
78
+ console .print ("[success]✓ Navigated to AIgrant[/]" )
79
+ await asyncio .sleep (2 )
80
+
81
+ # Get accessibility tree
82
+ tree = await get_accessibility_tree (stagehand .page , stagehand .logger )
83
+ console .print ("[success]✓ Extracted accessibility tree[/]" )
84
+ with open ("../tree.txt" , "w" ) as f :
85
+ f .write (tree .get ("simplified" ))
86
+
87
+ print ("ID to URL mapping:" , tree .get ("idToUrl" ))
88
+ print ("IFrames:" , tree .get ("iframes" ))
89
+
90
+ # Click the "Get Started" button
91
+ await stagehand .page .act ("click the button with text 'Get Started'" )
92
+ console .print ("[success]✓ Clicked 'Get Started' button[/]" )
93
+
94
+ # Observe the button
95
+ await stagehand .page .observe ("the button with text 'Get Started'" )
96
+ console .print ("[success]✓ Observed 'Get Started' button[/]" )
97
+
98
+ # Extract companies using schema
99
+ extract_options = ExtractOptions (
100
+ instruction = "Extract the names and URLs of up to 5 companies mentioned on this page" ,
101
+ schema_definition = Companies
102
+ )
103
+
104
+ extract_result = await stagehand .page .extract (extract_options )
105
+ console .print ("[success]✓ Extracted companies data[/]" )
106
+
107
+ # Display results
108
+ print ("Extract result:" , extract_result )
109
+ print ("Extract result data:" , extract_result .data if hasattr (extract_result , 'data' ) else 'No data field' )
110
+
111
+ # Parse the result into the Companies model
112
+ companies_data = None
113
+
114
+ # Handle different result formats between LOCAL and BROWSERBASE
115
+ if hasattr (extract_result , 'data' ) and extract_result .data :
116
+ # BROWSERBASE mode - data is in the 'data' field
117
+ try :
118
+ raw_data = extract_result .data
119
+ console .print (f"[info]Raw extract data: { raw_data } [/]" )
120
+
121
+ # Check if the data needs URL resolution from ID mapping
122
+ if isinstance (raw_data , dict ) and 'companies' in raw_data :
123
+ id_to_url = tree .get ("idToUrl" , {})
124
+ for company in raw_data ['companies' ]:
125
+ if 'url' in company and isinstance (company ['url' ], str ):
126
+ # Check if URL is just an ID that needs to be resolved
127
+ if company ['url' ].isdigit () and company ['url' ] in id_to_url :
128
+ company ['url' ] = id_to_url [company ['url' ]]
129
+ console .print (f"[success]✓ Resolved URL for { company ['name' ]} : { company ['url' ]} [/]" )
130
+
131
+ companies_data = Companies .model_validate (raw_data )
132
+ console .print ("[success]✓ Successfully parsed extract result into Companies model[/]" )
133
+ except Exception as e :
134
+ console .print (f"[error]Failed to parse extract result: { e } [/]" )
135
+ print ("Raw data:" , extract_result .data )
136
+ elif hasattr (extract_result , 'companies' ):
137
+ # LOCAL mode - companies field is directly available
138
+ try :
139
+ companies_data = Companies .model_validate (extract_result .model_dump ())
140
+ console .print ("[success]✓ Successfully parsed extract result into Companies model[/]" )
141
+ except Exception as e :
142
+ console .print (f"[error]Failed to parse extract result: { e } [/]" )
143
+ print ("Raw companies data:" , extract_result .companies )
144
+
145
+ print ("\n Extracted Companies:" )
146
+ if companies_data and hasattr (companies_data , "companies" ):
147
+ for idx , company in enumerate (companies_data .companies , 1 ):
148
+ print (f"{ idx } . { company .name } : { company .url } " )
149
+ else :
150
+ print ("No companies were found in the extraction result" )
151
+
152
+ # XPath click
153
+ await stagehand .page .locator ("xpath=/html/body/div/ul[2]/li[2]/a" ).click ()
154
+ await stagehand .page .wait_for_load_state ('networkidle' )
155
+ console .print ("[success]✓ Clicked element using XPath[/]" )
156
+
157
+ # Open a new page with Google
158
+ console .print ("\n [info]Creating a new page...[/]" )
159
+ new_page = await stagehand .context .new_page ()
160
+ await new_page .goto ("https://www.google.com" )
161
+ console .print ("[success]✓ Opened Google in a new page[/]" )
162
+
163
+ # Get accessibility tree for the new page
164
+ tree = await get_accessibility_tree (new_page , stagehand .logger )
165
+ with open ("../tree.txt" , "w" ) as f :
166
+ f .write (tree .get ("simplified" ))
167
+ console .print ("[success]✓ Extracted accessibility tree for new page[/]" )
168
+
169
+ # Try clicking Get Started button on Google
170
+ await new_page .act ("click the button with text 'Get Started'" )
171
+
172
+ # Only use LLM directly if in LOCAL mode
173
+ if stagehand .llm is not None :
174
+ console .print ("[info]LLM client available - using direct LLM call[/]" )
175
+
176
+ # Use LLM to analyze the page
177
+ response = stagehand .llm .create_response (
178
+ messages = [
179
+ {
180
+ "role" : "system" ,
181
+ "content" : "Based on the provided accessibility tree of the page, find the element and the action the user is expecting to perform. The tree consists of an enhanced a11y tree from a website with unique identifiers prepended to each element's role, and name. The actions you can take are playwright compatible locator actions."
182
+ },
183
+ {
184
+ "role" : "user" ,
185
+ "content" : [
186
+ {
187
+ "type" : "text" ,
188
+ "text" : f"fill the search bar with the text 'Hello'\n Page Tree:\n { tree .get ('simplified' )} "
189
+ }
190
+ ]
191
+ }
192
+ ],
193
+ model = "gemini/gemini-2.5-flash-preview-04-17" ,
194
+ response_format = ElementAction ,
195
+ )
196
+
197
+ action = ElementAction .model_validate_json (response .choices [0 ].message .content )
198
+ console .print (f"[success]✓ LLM identified element ID: { action .id } [/]" )
199
+
200
+ # Test CDP functionality
201
+ args = {"backendNodeId" : action .id }
202
+ result = await new_page .send_cdp ("DOM.resolveNode" , args )
203
+ object_info = result .get ("object" )
204
+ print (object_info )
205
+
206
+ xpath = await get_xpath_by_resolved_object_id (await new_page .get_cdp_client (), object_info ["objectId" ])
207
+ console .print (f"[success]✓ Retrieved XPath: { xpath } [/]" )
208
+
209
+ # Interact with the element
210
+ if xpath :
211
+ await new_page .locator (f"xpath={ xpath } " ).click ()
212
+ await new_page .locator (f"xpath={ xpath } " ).fill (action .arguments [0 ])
213
+ console .print ("[success]✓ Filled search bar with 'Hello'[/]" )
214
+ else :
215
+ print ("No xpath found" )
216
+ else :
217
+ console .print ("[warning]LLM client not available in BROWSERBASE mode - skipping direct LLM test[/]" )
218
+ # Alternative: use page.observe to find the search bar
219
+ observe_result = await new_page .observe ("the search bar or search input field" )
220
+ console .print (f"[info]Observed search elements: { observe_result } [/]" )
221
+
222
+ # Use page.act to fill the search bar
223
+ try :
224
+ await new_page .act ("fill the search bar with 'Hello'" )
225
+ console .print ("[success]✓ Filled search bar using act()[/]" )
226
+ except Exception as e :
227
+ console .print (f"[warning]Could not fill search bar: { e } [/]" )
228
+
229
+ # Final test summary
230
+ console .print ("\n [success]All async tests completed successfully![/]" )
231
+
232
+ except Exception as e :
233
+ console .print (f"[error]Error during testing: { str (e )} [/]" )
234
+ import traceback
235
+ traceback .print_exc ()
236
+ raise
237
+ finally :
238
+ # Close the client
239
+ # wait for 5 seconds
240
+ await asyncio .sleep (5 )
241
+ await stagehand .close ()
242
+ console .print ("[info]Stagehand async client closed[/]" )
243
+
244
+ if __name__ == "__main__" :
245
+ asyncio .run (main ())
0 commit comments