Skip to content

Commit f0d15cf

Browse files
update
1 parent 76c5a1c commit f0d15cf

File tree

3 files changed

+249
-56
lines changed

3 files changed

+249
-56
lines changed

README.md

Lines changed: 0 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -140,60 +140,6 @@ You can also make a copy of `.env.example` and add these to your `.env` file.
140140

141141
## Quickstart
142142

143-
Stagehand supports both synchronous and asynchronous usage. Here are examples for both approaches:
144-
145-
### Sync Client
146-
147-
```python
148-
import os
149-
from stagehand.sync import Stagehand
150-
from stagehand import StagehandConfig
151-
from dotenv import load_dotenv
152-
153-
load_dotenv()
154-
155-
def main():
156-
# Configure Stagehand
157-
config = StagehandConfig(
158-
env="BROWSERBASE",
159-
api_key=os.getenv("BROWSERBASE_API_KEY"),
160-
project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
161-
model_name="gpt-4o",
162-
model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}
163-
)
164-
165-
# Initialize Stagehand
166-
stagehand = Stagehand(config=config, api_url=os.getenv("STAGEHAND_API_URL"))
167-
stagehand.init()
168-
print(f"Session created: {stagehand.session_id}")
169-
170-
# Navigate to a page
171-
stagehand.page.goto("https://google.com/")
172-
173-
# Use Stagehand AI primitives
174-
stagehand.page.act("search for openai")
175-
176-
# Combine with Playwright
177-
stagehand.page.keyboard.press("Enter")
178-
179-
# Observe elements on the page
180-
observed = stagehand.page.observe("find the news button")
181-
if observed:
182-
stagehand.page.act(observed[0]) # Act on the first observed element
183-
184-
# Extract data from the page
185-
data = stagehand.page.extract("extract the first result from the search")
186-
print(f"Extracted data: {data}")
187-
188-
# Close the session
189-
stagehand.close()
190-
191-
if __name__ == "__main__":
192-
main()
193-
```
194-
195-
### Async Client
196-
197143
```python
198144
import os
199145
import asyncio

examples/example.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@ async def main():
6363
verbose=2,
6464
)
6565

66-
stagehand = Stagehand(config)
66+
stagehand = Stagehand(config,
67+
api_url=os.getenv("STAGEHAND_SERVER_URL"),
68+
env=os.getenv("STAGEHAND_ENV"))
6769

6870
# Initialize - this creates a new session automatically.
6971
console.print("\n🚀 [info]Initializing Stagehand...[/]")
@@ -114,7 +116,7 @@ async def main():
114116
console.print("\n▶️ [highlight] Extracting[/] first search result")
115117
data = await page.extract("extract the first result from the search")
116118
console.print("📊 [info]Extracted data:[/]")
117-
console.print_json(f"{data.model_dump_json()}")
119+
console.print_json(json.dumps(data))
118120

119121
# Close the session
120122
console.print("\n⏹️ [warning]Closing session...[/]")

examples/second_example.py

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
import asyncio
2+
import logging
3+
import os
4+
from rich.console import Console
5+
from rich.panel import Panel
6+
from rich.theme import Theme
7+
from pydantic import BaseModel, Field, HttpUrl
8+
from dotenv import load_dotenv
9+
import time
10+
11+
from stagehand import StagehandConfig, Stagehand
12+
from stagehand.utils import configure_logging
13+
from stagehand.schemas import ObserveOptions, ActOptions, ExtractOptions
14+
from stagehand.a11y.utils import get_accessibility_tree, get_xpath_by_resolved_object_id
15+
16+
# Load environment variables
17+
load_dotenv()
18+
19+
# Configure Rich console
20+
console = Console(theme=Theme({
21+
"info": "cyan",
22+
"success": "green",
23+
"warning": "yellow",
24+
"error": "red bold",
25+
"highlight": "magenta",
26+
"url": "blue underline",
27+
}))
28+
29+
# Define Pydantic models for testing
30+
class Company(BaseModel):
31+
name: str = Field(..., description="The name of the company")
32+
url: HttpUrl = Field(..., description="The URL of the company website or relevant page")
33+
34+
class Companies(BaseModel):
35+
companies: list[Company] = Field(..., description="List of companies extracted from the page, maximum of 5 companies")
36+
37+
class ElementAction(BaseModel):
38+
action: str
39+
id: int
40+
arguments: list[str]
41+
42+
async def main():
43+
# Display header
44+
console.print(
45+
"\n",
46+
Panel.fit(
47+
"[light_gray]New Stagehand 🤘 Python Async Test[/]",
48+
border_style="green",
49+
padding=(1, 10),
50+
),
51+
)
52+
53+
# Create configuration
54+
config = StagehandConfig(
55+
api_key=os.getenv("BROWSERBASE_API_KEY"),
56+
project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
57+
model_name="google/gemini-2.5-flash-preview-04-17", # todo - unify gemini/google model names
58+
model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}, # this works locally even if there is a model provider mismatch
59+
verbose=3,
60+
)
61+
62+
# Initialize async client
63+
stagehand = Stagehand(
64+
env=os.getenv("STAGEHAND_ENV"),
65+
config=config,
66+
api_url=os.getenv("STAGEHAND_SERVER_URL"),
67+
)
68+
69+
try:
70+
# Initialize the client
71+
await stagehand.init()
72+
console.print("[success]✓ Successfully initialized Stagehand async client[/]")
73+
console.print(f"[info]Environment: {stagehand.env}[/]")
74+
console.print(f"[info]LLM Client Available: {stagehand.llm is not None}[/]")
75+
76+
# Navigate to AIgrant (as in the original test)
77+
await stagehand.page.goto("https://www.aigrant.com")
78+
console.print("[success]✓ Navigated to AIgrant[/]")
79+
await asyncio.sleep(2)
80+
81+
# Get accessibility tree
82+
tree = await get_accessibility_tree(stagehand.page, stagehand.logger)
83+
console.print("[success]✓ Extracted accessibility tree[/]")
84+
with open("../tree.txt", "w") as f:
85+
f.write(tree.get("simplified"))
86+
87+
print("ID to URL mapping:", tree.get("idToUrl"))
88+
print("IFrames:", tree.get("iframes"))
89+
90+
# Click the "Get Started" button
91+
await stagehand.page.act("click the button with text 'Get Started'")
92+
console.print("[success]✓ Clicked 'Get Started' button[/]")
93+
94+
# Observe the button
95+
await stagehand.page.observe("the button with text 'Get Started'")
96+
console.print("[success]✓ Observed 'Get Started' button[/]")
97+
98+
# Extract companies using schema
99+
extract_options = ExtractOptions(
100+
instruction="Extract the names and URLs of up to 5 companies mentioned on this page",
101+
schema_definition=Companies
102+
)
103+
104+
extract_result = await stagehand.page.extract(extract_options)
105+
console.print("[success]✓ Extracted companies data[/]")
106+
107+
# Display results
108+
print("Extract result:", extract_result)
109+
print("Extract result data:", extract_result.data if hasattr(extract_result, 'data') else 'No data field')
110+
111+
# Parse the result into the Companies model
112+
companies_data = None
113+
114+
# Handle different result formats between LOCAL and BROWSERBASE
115+
if hasattr(extract_result, 'data') and extract_result.data:
116+
# BROWSERBASE mode - data is in the 'data' field
117+
try:
118+
raw_data = extract_result.data
119+
console.print(f"[info]Raw extract data: {raw_data}[/]")
120+
121+
# Check if the data needs URL resolution from ID mapping
122+
if isinstance(raw_data, dict) and 'companies' in raw_data:
123+
id_to_url = tree.get("idToUrl", {})
124+
for company in raw_data['companies']:
125+
if 'url' in company and isinstance(company['url'], str):
126+
# Check if URL is just an ID that needs to be resolved
127+
if company['url'].isdigit() and company['url'] in id_to_url:
128+
company['url'] = id_to_url[company['url']]
129+
console.print(f"[success]✓ Resolved URL for {company['name']}: {company['url']}[/]")
130+
131+
companies_data = Companies.model_validate(raw_data)
132+
console.print("[success]✓ Successfully parsed extract result into Companies model[/]")
133+
except Exception as e:
134+
console.print(f"[error]Failed to parse extract result: {e}[/]")
135+
print("Raw data:", extract_result.data)
136+
elif hasattr(extract_result, 'companies'):
137+
# LOCAL mode - companies field is directly available
138+
try:
139+
companies_data = Companies.model_validate(extract_result.model_dump())
140+
console.print("[success]✓ Successfully parsed extract result into Companies model[/]")
141+
except Exception as e:
142+
console.print(f"[error]Failed to parse extract result: {e}[/]")
143+
print("Raw companies data:", extract_result.companies)
144+
145+
print("\nExtracted Companies:")
146+
if companies_data and hasattr(companies_data, "companies"):
147+
for idx, company in enumerate(companies_data.companies, 1):
148+
print(f"{idx}. {company.name}: {company.url}")
149+
else:
150+
print("No companies were found in the extraction result")
151+
152+
# XPath click
153+
await stagehand.page.locator("xpath=/html/body/div/ul[2]/li[2]/a").click()
154+
await stagehand.page.wait_for_load_state('networkidle')
155+
console.print("[success]✓ Clicked element using XPath[/]")
156+
157+
# Open a new page with Google
158+
console.print("\n[info]Creating a new page...[/]")
159+
new_page = await stagehand.context.new_page()
160+
await new_page.goto("https://www.google.com")
161+
console.print("[success]✓ Opened Google in a new page[/]")
162+
163+
# Get accessibility tree for the new page
164+
tree = await get_accessibility_tree(new_page, stagehand.logger)
165+
with open("../tree.txt", "w") as f:
166+
f.write(tree.get("simplified"))
167+
console.print("[success]✓ Extracted accessibility tree for new page[/]")
168+
169+
# Try clicking Get Started button on Google
170+
await new_page.act("click the button with text 'Get Started'")
171+
172+
# Only use LLM directly if in LOCAL mode
173+
if stagehand.llm is not None:
174+
console.print("[info]LLM client available - using direct LLM call[/]")
175+
176+
# Use LLM to analyze the page
177+
response = stagehand.llm.create_response(
178+
messages=[
179+
{
180+
"role": "system",
181+
"content": "Based on the provided accessibility tree of the page, find the element and the action the user is expecting to perform. The tree consists of an enhanced a11y tree from a website with unique identifiers prepended to each element's role, and name. The actions you can take are playwright compatible locator actions."
182+
},
183+
{
184+
"role": "user",
185+
"content": [
186+
{
187+
"type": "text",
188+
"text": f"fill the search bar with the text 'Hello'\nPage Tree:\n{tree.get('simplified')}"
189+
}
190+
]
191+
}
192+
],
193+
model="gemini/gemini-2.5-flash-preview-04-17",
194+
response_format=ElementAction,
195+
)
196+
197+
action = ElementAction.model_validate_json(response.choices[0].message.content)
198+
console.print(f"[success]✓ LLM identified element ID: {action.id}[/]")
199+
200+
# Test CDP functionality
201+
args = {"backendNodeId": action.id}
202+
result = await new_page.send_cdp("DOM.resolveNode", args)
203+
object_info = result.get("object")
204+
print(object_info)
205+
206+
xpath = await get_xpath_by_resolved_object_id(await new_page.get_cdp_client(), object_info["objectId"])
207+
console.print(f"[success]✓ Retrieved XPath: {xpath}[/]")
208+
209+
# Interact with the element
210+
if xpath:
211+
await new_page.locator(f"xpath={xpath}").click()
212+
await new_page.locator(f"xpath={xpath}").fill(action.arguments[0])
213+
console.print("[success]✓ Filled search bar with 'Hello'[/]")
214+
else:
215+
print("No xpath found")
216+
else:
217+
console.print("[warning]LLM client not available in BROWSERBASE mode - skipping direct LLM test[/]")
218+
# Alternative: use page.observe to find the search bar
219+
observe_result = await new_page.observe("the search bar or search input field")
220+
console.print(f"[info]Observed search elements: {observe_result}[/]")
221+
222+
# Use page.act to fill the search bar
223+
try:
224+
await new_page.act("fill the search bar with 'Hello'")
225+
console.print("[success]✓ Filled search bar using act()[/]")
226+
except Exception as e:
227+
console.print(f"[warning]Could not fill search bar: {e}[/]")
228+
229+
# Final test summary
230+
console.print("\n[success]All async tests completed successfully![/]")
231+
232+
except Exception as e:
233+
console.print(f"[error]Error during testing: {str(e)}[/]")
234+
import traceback
235+
traceback.print_exc()
236+
raise
237+
finally:
238+
# Close the client
239+
# wait for 5 seconds
240+
await asyncio.sleep(5)
241+
await stagehand.close()
242+
console.print("[info]Stagehand async client closed[/]")
243+
244+
if __name__ == "__main__":
245+
asyncio.run(main())

0 commit comments

Comments
 (0)