From 802b3875e3ebe8b6469839e4d5ce88d37bf3b8c8 Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Mon, 2 Jun 2025 09:07:19 -0400 Subject: [PATCH 01/11] update readme --- README.md | 70 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index edb3b90d..6c616ab5 100644 --- a/README.md +++ b/README.md @@ -62,40 +62,67 @@ await stagehand.agent.execute("book a reservation for 2 people for a trip to the ## Installation -Install the Python package via pip: +**Recommended:** Install using `uv` (fast Python package manager): + +```bash +uv add stagehand +``` + +Alternatively, install via pip: ```bash pip install stagehand ``` + +### Installing with uv + +[uv](https://github.com/astral-sh/uv) is a fast Python package installer and resolver. If you don't have uv installed, you can install it with: + +```bash +# On macOS and Linux +curl -LsSf https://astral.sh/uv/install.sh | sh + +# On Windows +powershell -c "irm https://astral.sh/uv/install.ps1 | iex" + +# Or via pip +pip install uv +``` + +For new projects, you can create a new project with uv: + +```bash +uv init stagehand-project +cd stagehand-project +uv add stagehand +``` + ## Requirements - Python 3.9+ -- httpx (for async client) -- requests (for sync client) -- asyncio (for async client) -- pydantic -- python-dotenv (optional, for .env support) -- playwright -- rich (for `examples/` terminal support) +- All dependencies are automatically handled when installing via `uv` or `pip` -You can simply run: +The main dependencies include: +- httpx (for async HTTP client) +- requests (for sync HTTP client) +- pydantic (for data validation) +- playwright (for browser automation) +- python-dotenv (for environment variable support) +- browserbase (for Browserbase integration) + +### Development Dependencies + +For development, install with dev dependencies: ```bash -pip install -r requirements.txt +uv add stagehand --dev ``` -**requirements.txt** -```txt -httpx>=0.24.0 -asyncio>=3.4.3 -python-dotenv>=1.0.0 -pydantic>=1.10.0 -playwright>=1.42.1 -requests>=2.31.0 -rich -browserbase -``` +Or install dev dependencies separately: +```bash +uv add --dev pytest pytest-asyncio pytest-mock pytest-cov black isort mypy ruff rich +``` ## Environment Variables @@ -106,6 +133,7 @@ export BROWSERBASE_API_KEY="your-api-key" export BROWSERBASE_PROJECT_ID="your-project-id" export MODEL_API_KEY="your-openai-api-key" # or your preferred model's API key export STAGEHAND_API_URL="url-of-stagehand-server" +export STAGEHAND_ENV="BROWSERBASE" # or "LOCAL" to run Stagehand locally ``` You can also make a copy of `.env.example` and add these to your `.env` file. From 76c5a1c1742b8d5c5e792d838773da886498243e Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Mon, 2 Jun 2025 09:46:20 -0400 Subject: [PATCH 02/11] update env example --- .env.example => .env.example | 1 + .gitignore | 1 + 2 files changed, 2 insertions(+) rename .env.example => .env.example (82%) diff --git a/ .env.example b/.env.example similarity index 82% rename from .env.example rename to .env.example index fc61ab3c..074f8459 100644 --- a/ .env.example +++ b/.env.example @@ -2,3 +2,4 @@ MODEL_API_KEY = "anthropic-or-openai-api-key" BROWSERBASE_API_KEY = "browserbase-api-key" BROWSERBASE_PROJECT_ID = "browserbase-project-id" STAGEHAND_API_URL = "api_url" +STAGEHAND_ENV= "LOCAL or BROWSERBASE" \ No newline at end of file diff --git a/.gitignore b/.gitignore index 1ca635a6..4a024830 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,7 @@ yarn-error.log* # env files (can opt-in for committing if needed) .env* +!.env.example # vercel .vercel From f0d15cfa3ebece929ae249f20f17add602372cc9 Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Mon, 2 Jun 2025 10:02:27 -0400 Subject: [PATCH 03/11] update --- README.md | 54 -------- examples/example.py | 6 +- examples/second_example.py | 245 +++++++++++++++++++++++++++++++++++++ 3 files changed, 249 insertions(+), 56 deletions(-) create mode 100644 examples/second_example.py diff --git a/README.md b/README.md index 6c616ab5..8b09c556 100644 --- a/README.md +++ b/README.md @@ -140,60 +140,6 @@ You can also make a copy of `.env.example` and add these to your `.env` file. ## Quickstart -Stagehand supports both synchronous and asynchronous usage. Here are examples for both approaches: - -### Sync Client - -```python -import os -from stagehand.sync import Stagehand -from stagehand import StagehandConfig -from dotenv import load_dotenv - -load_dotenv() - -def main(): - # Configure Stagehand - config = StagehandConfig( - env="BROWSERBASE", - api_key=os.getenv("BROWSERBASE_API_KEY"), - project_id=os.getenv("BROWSERBASE_PROJECT_ID"), - model_name="gpt-4o", - model_client_options={"apiKey": os.getenv("MODEL_API_KEY")} - ) - - # Initialize Stagehand - stagehand = Stagehand(config=config, api_url=os.getenv("STAGEHAND_API_URL")) - stagehand.init() - print(f"Session created: {stagehand.session_id}") - - # Navigate to a page - stagehand.page.goto("https://google.com/") - - # Use Stagehand AI primitives - stagehand.page.act("search for openai") - - # Combine with Playwright - stagehand.page.keyboard.press("Enter") - - # Observe elements on the page - observed = stagehand.page.observe("find the news button") - if observed: - stagehand.page.act(observed[0]) # Act on the first observed element - - # Extract data from the page - data = stagehand.page.extract("extract the first result from the search") - print(f"Extracted data: {data}") - - # Close the session - stagehand.close() - -if __name__ == "__main__": - main() -``` - -### Async Client - ```python import os import asyncio diff --git a/examples/example.py b/examples/example.py index 78219966..a886a30a 100644 --- a/examples/example.py +++ b/examples/example.py @@ -63,7 +63,9 @@ async def main(): verbose=2, ) - stagehand = Stagehand(config) + stagehand = Stagehand(config, + api_url=os.getenv("STAGEHAND_SERVER_URL"), + env=os.getenv("STAGEHAND_ENV")) # Initialize - this creates a new session automatically. console.print("\nšŸš€ [info]Initializing Stagehand...[/]") @@ -114,7 +116,7 @@ async def main(): console.print("\nā–¶ļø [highlight] Extracting[/] first search result") data = await page.extract("extract the first result from the search") console.print("šŸ“Š [info]Extracted data:[/]") - console.print_json(f"{data.model_dump_json()}") + console.print_json(json.dumps(data)) # Close the session console.print("\nā¹ļø [warning]Closing session...[/]") diff --git a/examples/second_example.py b/examples/second_example.py new file mode 100644 index 00000000..6fa76656 --- /dev/null +++ b/examples/second_example.py @@ -0,0 +1,245 @@ +import asyncio +import logging +import os +from rich.console import Console +from rich.panel import Panel +from rich.theme import Theme +from pydantic import BaseModel, Field, HttpUrl +from dotenv import load_dotenv +import time + +from stagehand import StagehandConfig, Stagehand +from stagehand.utils import configure_logging +from stagehand.schemas import ObserveOptions, ActOptions, ExtractOptions +from stagehand.a11y.utils import get_accessibility_tree, get_xpath_by_resolved_object_id + +# Load environment variables +load_dotenv() + +# Configure Rich console +console = Console(theme=Theme({ + "info": "cyan", + "success": "green", + "warning": "yellow", + "error": "red bold", + "highlight": "magenta", + "url": "blue underline", +})) + +# Define Pydantic models for testing +class Company(BaseModel): + name: str = Field(..., description="The name of the company") + url: HttpUrl = Field(..., description="The URL of the company website or relevant page") + +class Companies(BaseModel): + companies: list[Company] = Field(..., description="List of companies extracted from the page, maximum of 5 companies") + +class ElementAction(BaseModel): + action: str + id: int + arguments: list[str] + +async def main(): + # Display header + console.print( + "\n", + Panel.fit( + "[light_gray]New Stagehand 🤘 Python Async Test[/]", + border_style="green", + padding=(1, 10), + ), + ) + + # Create configuration + config = StagehandConfig( + api_key=os.getenv("BROWSERBASE_API_KEY"), + project_id=os.getenv("BROWSERBASE_PROJECT_ID"), + model_name="google/gemini-2.5-flash-preview-04-17", # todo - unify gemini/google model names + model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}, # this works locally even if there is a model provider mismatch + verbose=3, + ) + + # Initialize async client + stagehand = Stagehand( + env=os.getenv("STAGEHAND_ENV"), + config=config, + api_url=os.getenv("STAGEHAND_SERVER_URL"), + ) + + try: + # Initialize the client + await stagehand.init() + console.print("[success]āœ“ Successfully initialized Stagehand async client[/]") + console.print(f"[info]Environment: {stagehand.env}[/]") + console.print(f"[info]LLM Client Available: {stagehand.llm is not None}[/]") + + # Navigate to AIgrant (as in the original test) + await stagehand.page.goto("https://www.aigrant.com") + console.print("[success]āœ“ Navigated to AIgrant[/]") + await asyncio.sleep(2) + + # Get accessibility tree + tree = await get_accessibility_tree(stagehand.page, stagehand.logger) + console.print("[success]āœ“ Extracted accessibility tree[/]") + with open("../tree.txt", "w") as f: + f.write(tree.get("simplified")) + + print("ID to URL mapping:", tree.get("idToUrl")) + print("IFrames:", tree.get("iframes")) + + # Click the "Get Started" button + await stagehand.page.act("click the button with text 'Get Started'") + console.print("[success]āœ“ Clicked 'Get Started' button[/]") + + # Observe the button + await stagehand.page.observe("the button with text 'Get Started'") + console.print("[success]āœ“ Observed 'Get Started' button[/]") + + # Extract companies using schema + extract_options = ExtractOptions( + instruction="Extract the names and URLs of up to 5 companies mentioned on this page", + schema_definition=Companies + ) + + extract_result = await stagehand.page.extract(extract_options) + console.print("[success]āœ“ Extracted companies data[/]") + + # Display results + print("Extract result:", extract_result) + print("Extract result data:", extract_result.data if hasattr(extract_result, 'data') else 'No data field') + + # Parse the result into the Companies model + companies_data = None + + # Handle different result formats between LOCAL and BROWSERBASE + if hasattr(extract_result, 'data') and extract_result.data: + # BROWSERBASE mode - data is in the 'data' field + try: + raw_data = extract_result.data + console.print(f"[info]Raw extract data: {raw_data}[/]") + + # Check if the data needs URL resolution from ID mapping + if isinstance(raw_data, dict) and 'companies' in raw_data: + id_to_url = tree.get("idToUrl", {}) + for company in raw_data['companies']: + if 'url' in company and isinstance(company['url'], str): + # Check if URL is just an ID that needs to be resolved + if company['url'].isdigit() and company['url'] in id_to_url: + company['url'] = id_to_url[company['url']] + console.print(f"[success]āœ“ Resolved URL for {company['name']}: {company['url']}[/]") + + companies_data = Companies.model_validate(raw_data) + console.print("[success]āœ“ Successfully parsed extract result into Companies model[/]") + except Exception as e: + console.print(f"[error]Failed to parse extract result: {e}[/]") + print("Raw data:", extract_result.data) + elif hasattr(extract_result, 'companies'): + # LOCAL mode - companies field is directly available + try: + companies_data = Companies.model_validate(extract_result.model_dump()) + console.print("[success]āœ“ Successfully parsed extract result into Companies model[/]") + except Exception as e: + console.print(f"[error]Failed to parse extract result: {e}[/]") + print("Raw companies data:", extract_result.companies) + + print("\nExtracted Companies:") + if companies_data and hasattr(companies_data, "companies"): + for idx, company in enumerate(companies_data.companies, 1): + print(f"{idx}. {company.name}: {company.url}") + else: + print("No companies were found in the extraction result") + + # XPath click + await stagehand.page.locator("xpath=/html/body/div/ul[2]/li[2]/a").click() + await stagehand.page.wait_for_load_state('networkidle') + console.print("[success]āœ“ Clicked element using XPath[/]") + + # Open a new page with Google + console.print("\n[info]Creating a new page...[/]") + new_page = await stagehand.context.new_page() + await new_page.goto("https://www.google.com") + console.print("[success]āœ“ Opened Google in a new page[/]") + + # Get accessibility tree for the new page + tree = await get_accessibility_tree(new_page, stagehand.logger) + with open("../tree.txt", "w") as f: + f.write(tree.get("simplified")) + console.print("[success]āœ“ Extracted accessibility tree for new page[/]") + + # Try clicking Get Started button on Google + await new_page.act("click the button with text 'Get Started'") + + # Only use LLM directly if in LOCAL mode + if stagehand.llm is not None: + console.print("[info]LLM client available - using direct LLM call[/]") + + # Use LLM to analyze the page + response = stagehand.llm.create_response( + messages=[ + { + "role": "system", + "content": "Based on the provided accessibility tree of the page, find the element and the action the user is expecting to perform. The tree consists of an enhanced a11y tree from a website with unique identifiers prepended to each element's role, and name. The actions you can take are playwright compatible locator actions." + }, + { + "role": "user", + "content": [ + { + "type": "text", + "text": f"fill the search bar with the text 'Hello'\nPage Tree:\n{tree.get('simplified')}" + } + ] + } + ], + model="gemini/gemini-2.5-flash-preview-04-17", + response_format=ElementAction, + ) + + action = ElementAction.model_validate_json(response.choices[0].message.content) + console.print(f"[success]āœ“ LLM identified element ID: {action.id}[/]") + + # Test CDP functionality + args = {"backendNodeId": action.id} + result = await new_page.send_cdp("DOM.resolveNode", args) + object_info = result.get("object") + print(object_info) + + xpath = await get_xpath_by_resolved_object_id(await new_page.get_cdp_client(), object_info["objectId"]) + console.print(f"[success]āœ“ Retrieved XPath: {xpath}[/]") + + # Interact with the element + if xpath: + await new_page.locator(f"xpath={xpath}").click() + await new_page.locator(f"xpath={xpath}").fill(action.arguments[0]) + console.print("[success]āœ“ Filled search bar with 'Hello'[/]") + else: + print("No xpath found") + else: + console.print("[warning]LLM client not available in BROWSERBASE mode - skipping direct LLM test[/]") + # Alternative: use page.observe to find the search bar + observe_result = await new_page.observe("the search bar or search input field") + console.print(f"[info]Observed search elements: {observe_result}[/]") + + # Use page.act to fill the search bar + try: + await new_page.act("fill the search bar with 'Hello'") + console.print("[success]āœ“ Filled search bar using act()[/]") + except Exception as e: + console.print(f"[warning]Could not fill search bar: {e}[/]") + + # Final test summary + console.print("\n[success]All async tests completed successfully![/]") + + except Exception as e: + console.print(f"[error]Error during testing: {str(e)}[/]") + import traceback + traceback.print_exc() + raise + finally: + # Close the client + # wait for 5 seconds + await asyncio.sleep(5) + await stagehand.close() + console.print("[info]Stagehand async client closed[/]") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file From 9e82aba411fcc77cf1d761cbade619d4b8ff5384 Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Mon, 2 Jun 2025 22:56:08 -0400 Subject: [PATCH 04/11] update to pip --- README.md | 59 ++++++++++++++++++++++++------------------------------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 8b09c556..4fa3f15a 100644 --- a/README.md +++ b/README.md @@ -62,45 +62,44 @@ await stagehand.agent.execute("book a reservation for 2 people for a trip to the ## Installation -**Recommended:** Install using `uv` (fast Python package manager): +### Creating a Virtual Environment (Recommended) + +First, create and activate a virtual environment to keep your project dependencies isolated: ```bash -uv add stagehand +# Create a virtual environment +python -m venv stagehand-env + +# Activate the environment +# On macOS/Linux: +source stagehand-env/bin/activate +# On Windows: +stagehand-env\Scripts\activate ``` -Alternatively, install via pip: +### Install Stagehand +**Normal Installation:** ```bash pip install stagehand ``` -### Installing with uv - -[uv](https://github.com/astral-sh/uv) is a fast Python package installer and resolver. If you don't have uv installed, you can install it with: +**Local Development Installation:** +If you're contributing to Stagehand or want to modify the source code: ```bash -# On macOS and Linux -curl -LsSf https://astral.sh/uv/install.sh | sh +# Clone the repository +git clone https://github.com/browserbase/stagehand-python.git +cd stagehand-python -# On Windows -powershell -c "irm https://astral.sh/uv/install.ps1 | iex" - -# Or via pip -pip install uv -``` - -For new projects, you can create a new project with uv: - -```bash -uv init stagehand-project -cd stagehand-project -uv add stagehand +# Install in editable mode with development dependencies +pip install -e .[dev] ``` ## Requirements - Python 3.9+ -- All dependencies are automatically handled when installing via `uv` or `pip` +- All dependencies are automatically handled when installing via `pip` The main dependencies include: - httpx (for async HTTP client) @@ -112,17 +111,11 @@ The main dependencies include: ### Development Dependencies -For development, install with dev dependencies: - -```bash -uv add stagehand --dev -``` - -Or install dev dependencies separately: - -```bash -uv add --dev pytest pytest-asyncio pytest-mock pytest-cov black isort mypy ruff rich -``` +The development dependencies are automatically installed when using `pip install -e .[dev]` and include: +- pytest, pytest-asyncio, pytest-mock, pytest-cov (testing) +- black, isort, ruff (code formatting and linting) +- mypy (type checking) +- rich (enhanced terminal output) ## Environment Variables From 89164ee7d47ff59d12ceadbe7f2d053509af661d Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Tue, 3 Jun 2025 08:07:07 -0400 Subject: [PATCH 05/11] update examples and README --- .env.example | 2 +- README.md | 16 +- examples/example.py | 340 ++++++++++++++++++++++++------------- examples/second_example.py | 339 +++++++++++++----------------------- pyproject.toml | 3 + 5 files changed, 355 insertions(+), 345 deletions(-) diff --git a/.env.example b/.env.example index 074f8459..45f5ae1b 100644 --- a/.env.example +++ b/.env.example @@ -1,4 +1,4 @@ -MODEL_API_KEY = "anthropic-or-openai-api-key" +MODEL_API_KEY = "your-favorite-llm-api-key" BROWSERBASE_API_KEY = "browserbase-api-key" BROWSERBASE_PROJECT_ID = "browserbase-project-id" STAGEHAND_API_URL = "api_url" diff --git a/README.md b/README.md index 4fa3f15a..83685e99 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,11 @@ git clone https://github.com/browserbase/stagehand-python.git cd stagehand-python # Install in editable mode with development dependencies -pip install -e .[dev] +pip install -e ".[dev]" + +### INSTRUCTION TO BE REMOVED BEFORE RELEASE +# install google cua +pip install temp/path-to-the-cua-wheel.wheel ``` ## Requirements @@ -111,7 +115,7 @@ The main dependencies include: ### Development Dependencies -The development dependencies are automatically installed when using `pip install -e .[dev]` and include: +The development dependencies are automatically installed when using `pip install -e ".[dev]"` and include: - pytest, pytest-asyncio, pytest-mock, pytest-cov (testing) - black, isort, ruff (code formatting and linting) - mypy (type checking) @@ -119,13 +123,13 @@ The development dependencies are automatically installed when using `pip install ## Environment Variables -Before running your script, set the following environment variables: +Before running your script, copy `.env.example` to `.env.` set the following environment variables: ```bash -export BROWSERBASE_API_KEY="your-api-key" -export BROWSERBASE_PROJECT_ID="your-project-id" +export BROWSERBASE_API_KEY="your-api-key" # if running remotely +export BROWSERBASE_PROJECT_ID="your-project-id" # if running remotely export MODEL_API_KEY="your-openai-api-key" # or your preferred model's API key -export STAGEHAND_API_URL="url-of-stagehand-server" +export STAGEHAND_API_URL="url-of-stagehand-server" # if running remotely export STAGEHAND_ENV="BROWSERBASE" # or "LOCAL" to run Stagehand locally ``` diff --git a/examples/example.py b/examples/example.py index a886a30a..c145c386 100644 --- a/examples/example.py +++ b/examples/example.py @@ -4,135 +4,245 @@ from rich.console import Console from rich.panel import Panel from rich.theme import Theme -import json +from pydantic import BaseModel, Field, HttpUrl from dotenv import load_dotenv +import time -from stagehand import Stagehand, StagehandConfig +from stagehand import StagehandConfig, Stagehand from stagehand.utils import configure_logging +from stagehand.schemas import ObserveOptions, ActOptions, ExtractOptions +from stagehand.a11y.utils import get_accessibility_tree, get_xpath_by_resolved_object_id -# Configure logging with cleaner format -configure_logging( - level=logging.INFO, - remove_logger_name=True, # Remove the redundant stagehand.client prefix - quiet_dependencies=True, # Suppress httpx and other noisy logs -) +# Load environment variables +load_dotenv() -# Create a custom theme for consistent styling -custom_theme = Theme( - { - "info": "cyan", - "success": "green", - "warning": "yellow", - "error": "red bold", - "highlight": "magenta", - "url": "blue underline", - } -) +# Configure Rich console +console = Console(theme=Theme({ + "info": "cyan", + "success": "green", + "warning": "yellow", + "error": "red bold", + "highlight": "magenta", + "url": "blue underline", +})) + +# Define Pydantic models for testing +class Company(BaseModel): + name: str = Field(..., description="The name of the company") + # todo - URL needs to be pydantic type HttpUrl otherwise it does not extract the URL + url: HttpUrl = Field(..., description="The URL of the company website or relevant page") + +class Companies(BaseModel): + companies: list[Company] = Field(..., description="List of companies extracted from the page, maximum of 5 companies") -# Create a Rich console instance with our theme -console = Console(theme=custom_theme) +class ElementAction(BaseModel): + action: str + id: int + arguments: list[str] -load_dotenv() - -console.print( - Panel.fit( - "[yellow]Logging Levels:[/]\n" - "[white]- Set [bold]verbose=0[/] for errors (ERROR)[/]\n" - "[white]- Set [bold]verbose=1[/] for minimal logs (INFO)[/]\n" - "[white]- Set [bold]verbose=2[/] for medium logs (WARNING)[/]\n" - "[white]- Set [bold]verbose=3[/] for detailed logs (DEBUG)[/]", - title="Verbosity Options", - border_style="blue", +async def main(): + # Display header + console.print( + "\n", + Panel.fit( + "[light_gray]New Stagehand 🤘 Python Async Test[/]", + border_style="green", + padding=(1, 10), + ), ) -) -async def main(): - # Build a unified configuration object for Stagehand + # Create configuration + model_name = "google/gemini-2.5-flash-preview-04-17" + config = StagehandConfig( - env="BROWSERBASE", api_key=os.getenv("BROWSERBASE_API_KEY"), project_id=os.getenv("BROWSERBASE_PROJECT_ID"), - headless=False, - dom_settle_timeout_ms=3000, - model_name="google/gemini-2.0-flash", - self_heal=True, - wait_for_captcha_solves=True, - system_prompt="You are a browser automation assistant that helps users navigate websites effectively.", - model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}, - # Use verbose=2 for medium-detail logs (1=minimal, 3=debug) - verbose=2, + model_name=model_name, # todo - unify gemini/google model names + model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}, # this works locally even if there is a model provider mismatch + verbose=3, ) - - stagehand = Stagehand(config, - api_url=os.getenv("STAGEHAND_SERVER_URL"), - env=os.getenv("STAGEHAND_ENV")) - - # Initialize - this creates a new session automatically. - console.print("\nšŸš€ [info]Initializing Stagehand...[/]") - await stagehand.init() - page = stagehand.page - console.print(f"\n[yellow]Created new session:[/] {stagehand.session_id}") - console.print( - f"🌐 [white]View your live browser:[/] [url]https://www.browserbase.com/sessions/{stagehand.session_id}[/]" - ) - - await asyncio.sleep(2) - - console.print("\nā–¶ļø [highlight] Navigating[/] to Google") - await page.goto("https://google.com/") - console.print("āœ… [success]Navigated to Google[/]") - - console.print("\nā–¶ļø [highlight] Clicking[/] on About link") - # Click on the "About" link using Playwright - await page.get_by_role("link", name="About", exact=True).click() - console.print("āœ… [success]Clicked on About link[/]") - - await asyncio.sleep(2) - console.print("\nā–¶ļø [highlight] Navigating[/] back to Google") - await page.goto("https://google.com/") - console.print("āœ… [success]Navigated back to Google[/]") - - console.print("\nā–¶ļø [highlight] Performing action:[/] search for openai") - await page.act("search for openai") - await page.keyboard.press("Enter") - console.print("āœ… [success]Performing Action:[/] Action completed successfully") - await asyncio.sleep(2) - - console.print("\nā–¶ļø [highlight] Observing page[/] for news button") - observed = await page.observe("find all articles") + # Initialize async client + stagehand = Stagehand( + env=os.getenv("STAGEHAND_ENV"), + config=config, + api_url=os.getenv("STAGEHAND_SERVER_URL"), + ) - if len(observed) > 0: - element = observed[0] - console.print("āœ… [success]Found element:[/] News button") - console.print("\nā–¶ļø [highlight] Performing action on observed element:") - console.print(element) - await page.act(element) - console.print("āœ… [success]Performing Action:[/] Action completed successfully") - - else: - console.print("āŒ [error]No element found[/]") - - console.print("\nā–¶ļø [highlight] Extracting[/] first search result") - data = await page.extract("extract the first result from the search") - console.print("šŸ“Š [info]Extracted data:[/]") - console.print_json(json.dumps(data)) - - # Close the session - console.print("\nā¹ļø [warning]Closing session...[/]") - await stagehand.close() - console.print("āœ… [success]Session closed successfully![/]") - console.rule("[bold]End of Example[/]") - + try: + # Initialize the client + await stagehand.init() + console.print("[success]āœ“ Successfully initialized Stagehand async client[/]") + console.print(f"[info]Environment: {stagehand.env}[/]") + console.print(f"[info]LLM Client Available: {stagehand.llm is not None}[/]") + + # Navigate to AIgrant (as in the original test) + await stagehand.page.goto("https://www.aigrant.com") + console.print("[success]āœ“ Navigated to AIgrant[/]") + await asyncio.sleep(2) + + # Get accessibility tree + tree = await get_accessibility_tree(stagehand.page, stagehand.logger) + console.print("[success]āœ“ Extracted accessibility tree[/]") + with open("../tree.txt", "w") as f: + f.write(tree.get("simplified")) + + print("ID to URL mapping:", tree.get("idToUrl")) + print("IFrames:", tree.get("iframes")) + + # Click the "Get Started" button + await stagehand.page.act("click the button with text 'Get Started'") + console.print("[success]āœ“ Clicked 'Get Started' button[/]") + + # Observe the button + await stagehand.page.observe("the button with text 'Get Started'") + console.print("[success]āœ“ Observed 'Get Started' button[/]") + + # Extract companies using schema + extract_options = ExtractOptions( + instruction="Extract the names and URLs of up to 5 companies mentioned on this page", + schema_definition=Companies + ) + + extract_result = await stagehand.page.extract(extract_options) + console.print("[success]āœ“ Extracted companies data[/]") + + # Display results + print("Extract result:", extract_result) + print("Extract result data:", extract_result.data if hasattr(extract_result, 'data') else 'No data field') + + # Parse the result into the Companies model + companies_data = None + + # Handle different result formats between LOCAL and BROWSERBASE + if hasattr(extract_result, 'data') and extract_result.data: + # BROWSERBASE mode - data is in the 'data' field + try: + raw_data = extract_result.data + console.print(f"[info]Raw extract data: {raw_data}[/]") + + # Check if the data needs URL resolution from ID mapping + if isinstance(raw_data, dict) and 'companies' in raw_data: + id_to_url = tree.get("idToUrl", {}) + for company in raw_data['companies']: + if 'url' in company and isinstance(company['url'], str): + # Check if URL is just an ID that needs to be resolved + if company['url'].isdigit() and company['url'] in id_to_url: + company['url'] = id_to_url[company['url']] + console.print(f"[success]āœ“ Resolved URL for {company['name']}: {company['url']}[/]") + + companies_data = Companies.model_validate(raw_data) + console.print("[success]āœ“ Successfully parsed extract result into Companies model[/]") + except Exception as e: + console.print(f"[error]Failed to parse extract result: {e}[/]") + print("Raw data:", extract_result.data) + elif hasattr(extract_result, 'companies'): + # LOCAL mode - companies field is directly available + try: + companies_data = Companies.model_validate(extract_result.model_dump()) + console.print("[success]āœ“ Successfully parsed extract result into Companies model[/]") + except Exception as e: + console.print(f"[error]Failed to parse extract result: {e}[/]") + print("Raw companies data:", extract_result.companies) + + print("\nExtracted Companies:") + if companies_data and hasattr(companies_data, "companies"): + for idx, company in enumerate(companies_data.companies, 1): + print(f"{idx}. {company.name}: {company.url}") + else: + print("No companies were found in the extraction result") + + # XPath click + await stagehand.page.locator("xpath=/html/body/div/ul[2]/li[2]/a").click() + await stagehand.page.wait_for_load_state('networkidle') + console.print("[success]āœ“ Clicked element using XPath[/]") + + # Open a new page with Google + console.print("\n[info]Creating a new page...[/]") + new_page = await stagehand.context.new_page() + await new_page.goto("https://www.google.com") + console.print("[success]āœ“ Opened Google in a new page[/]") + + # Get accessibility tree for the new page + tree = await get_accessibility_tree(new_page, stagehand.logger) + with open("../tree.txt", "w") as f: + f.write(tree.get("simplified")) + console.print("[success]āœ“ Extracted accessibility tree for new page[/]") + + # Try clicking Get Started button on Google + await new_page.act("click the button with text 'Get Started'") + + # Only use LLM directly if in LOCAL mode + if stagehand.llm is not None: + console.print("[info]LLM client available - using direct LLM call[/]") + + # Use LLM to analyze the page + response = stagehand.llm.create_response( + messages=[ + { + "role": "system", + "content": "Based on the provided accessibility tree of the page, find the element and the action the user is expecting to perform. The tree consists of an enhanced a11y tree from a website with unique identifiers prepended to each element's role, and name. The actions you can take are playwright compatible locator actions." + }, + { + "role": "user", + "content": [ + { + "type": "text", + "text": f"fill the search bar with the text 'Hello'\nPage Tree:\n{tree.get('simplified')}" + } + ] + } + ], + model=model_name, + response_format=ElementAction, + ) + + action = ElementAction.model_validate_json(response.choices[0].message.content) + console.print(f"[success]āœ“ LLM identified element ID: {action.id}[/]") + + # Test CDP functionality + args = {"backendNodeId": action.id} + result = await new_page.send_cdp("DOM.resolveNode", args) + object_info = result.get("object") + print(object_info) + + xpath = await get_xpath_by_resolved_object_id(await new_page.get_cdp_client(), object_info["objectId"]) + console.print(f"[success]āœ“ Retrieved XPath: {xpath}[/]") + + # Interact with the element + if xpath: + await new_page.locator(f"xpath={xpath}").click() + await new_page.locator(f"xpath={xpath}").fill(action.arguments[0]) + console.print("[success]āœ“ Filled search bar with 'Hello'[/]") + else: + print("No xpath found") + else: + console.print("[warning]LLM client not available in BROWSERBASE mode - skipping direct LLM test[/]") + # Alternative: use page.observe to find the search bar + observe_result = await new_page.observe("the search bar or search input field") + console.print(f"[info]Observed search elements: {observe_result}[/]") + + # Use page.act to fill the search bar + try: + await new_page.act("fill the search bar with 'Hello'") + console.print("[success]āœ“ Filled search bar using act()[/]") + except Exception as e: + console.print(f"[warning]Could not fill search bar: {e}[/]") + + # Final test summary + console.print("\n[success]All async tests completed successfully![/]") + + except Exception as e: + console.print(f"[error]Error during testing: {str(e)}[/]") + import traceback + traceback.print_exc() + raise + finally: + # Close the client + # wait for 5 seconds + await asyncio.sleep(5) + await stagehand.close() + console.print("[info]Stagehand async client closed[/]") if __name__ == "__main__": - # Add a fancy header - console.print( - "\n", - Panel.fit( - "[light_gray]Stagehand 🤘 Python Example[/]", - border_style="green", - padding=(1, 10), - ), - ) - asyncio.run(main()) + asyncio.run(main()) \ No newline at end of file diff --git a/examples/second_example.py b/examples/second_example.py index 6fa76656..f3b39f5f 100644 --- a/examples/second_example.py +++ b/examples/second_example.py @@ -4,242 +4,135 @@ from rich.console import Console from rich.panel import Panel from rich.theme import Theme -from pydantic import BaseModel, Field, HttpUrl +import json from dotenv import load_dotenv -import time -from stagehand import StagehandConfig, Stagehand +from stagehand import Stagehand, StagehandConfig from stagehand.utils import configure_logging -from stagehand.schemas import ObserveOptions, ActOptions, ExtractOptions -from stagehand.a11y.utils import get_accessibility_tree, get_xpath_by_resolved_object_id -# Load environment variables -load_dotenv() +# Configure logging with cleaner format +configure_logging( + level=logging.INFO, + remove_logger_name=True, # Remove the redundant stagehand.client prefix + quiet_dependencies=True, # Suppress httpx and other noisy logs +) -# Configure Rich console -console = Console(theme=Theme({ - "info": "cyan", - "success": "green", - "warning": "yellow", - "error": "red bold", - "highlight": "magenta", - "url": "blue underline", -})) - -# Define Pydantic models for testing -class Company(BaseModel): - name: str = Field(..., description="The name of the company") - url: HttpUrl = Field(..., description="The URL of the company website or relevant page") - -class Companies(BaseModel): - companies: list[Company] = Field(..., description="List of companies extracted from the page, maximum of 5 companies") +# Create a custom theme for consistent styling +custom_theme = Theme( + { + "info": "cyan", + "success": "green", + "warning": "yellow", + "error": "red bold", + "highlight": "magenta", + "url": "blue underline", + } +) -class ElementAction(BaseModel): - action: str - id: int - arguments: list[str] +# Create a Rich console instance with our theme +console = Console(theme=custom_theme) -async def main(): - # Display header - console.print( - "\n", - Panel.fit( - "[light_gray]New Stagehand 🤘 Python Async Test[/]", - border_style="green", - padding=(1, 10), - ), +load_dotenv() + +console.print( + Panel.fit( + "[yellow]Logging Levels:[/]\n" + "[white]- Set [bold]verbose=0[/] for errors (ERROR)[/]\n" + "[white]- Set [bold]verbose=1[/] for minimal logs (INFO)[/]\n" + "[white]- Set [bold]verbose=2[/] for medium logs (WARNING)[/]\n" + "[white]- Set [bold]verbose=3[/] for detailed logs (DEBUG)[/]", + title="Verbosity Options", + border_style="blue", ) - - # Create configuration +) + +async def main(): + # Build a unified configuration object for Stagehand config = StagehandConfig( + env="BROWSERBASE", api_key=os.getenv("BROWSERBASE_API_KEY"), project_id=os.getenv("BROWSERBASE_PROJECT_ID"), - model_name="google/gemini-2.5-flash-preview-04-17", # todo - unify gemini/google model names - model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}, # this works locally even if there is a model provider mismatch - verbose=3, + headless=False, + dom_settle_timeout_ms=3000, + model_name="google/gemini-2.0-flash", + self_heal=True, + wait_for_captcha_solves=True, + system_prompt="You are a browser automation assistant that helps users navigate websites effectively.", + model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}, + # Use verbose=2 for medium-detail logs (1=minimal, 3=debug) + verbose=2, ) - - # Initialize async client - stagehand = Stagehand( - env=os.getenv("STAGEHAND_ENV"), - config=config, - api_url=os.getenv("STAGEHAND_SERVER_URL"), + + stagehand = Stagehand(config, + api_url=os.getenv("STAGEHAND_SERVER_URL"), + env=os.getenv("STAGEHAND_ENV")) + + # Initialize - this creates a new session automatically. + console.print("\nšŸš€ [info]Initializing Stagehand...[/]") + await stagehand.init() + page = stagehand.page + console.print(f"\n[yellow]Created new session:[/] {stagehand.session_id}") + console.print( + f"🌐 [white]View your live browser:[/] [url]https://www.browserbase.com/sessions/{stagehand.session_id}[/]" ) + + await asyncio.sleep(2) + + console.print("\nā–¶ļø [highlight] Navigating[/] to Google") + await page.goto("https://google.com/") + console.print("āœ… [success]Navigated to Google[/]") + + console.print("\nā–¶ļø [highlight] Clicking[/] on About link") + # Click on the "About" link using Playwright + await page.get_by_role("link", name="About", exact=True).click() + console.print("āœ… [success]Clicked on About link[/]") + + await asyncio.sleep(2) + console.print("\nā–¶ļø [highlight] Navigating[/] back to Google") + await page.goto("https://google.com/") + console.print("āœ… [success]Navigated back to Google[/]") + + console.print("\nā–¶ļø [highlight] Performing action:[/] search for openai") + await page.act("search for openai") + await page.keyboard.press("Enter") + console.print("āœ… [success]Performing Action:[/] Action completed successfully") - try: - # Initialize the client - await stagehand.init() - console.print("[success]āœ“ Successfully initialized Stagehand async client[/]") - console.print(f"[info]Environment: {stagehand.env}[/]") - console.print(f"[info]LLM Client Available: {stagehand.llm is not None}[/]") - - # Navigate to AIgrant (as in the original test) - await stagehand.page.goto("https://www.aigrant.com") - console.print("[success]āœ“ Navigated to AIgrant[/]") - await asyncio.sleep(2) - - # Get accessibility tree - tree = await get_accessibility_tree(stagehand.page, stagehand.logger) - console.print("[success]āœ“ Extracted accessibility tree[/]") - with open("../tree.txt", "w") as f: - f.write(tree.get("simplified")) - - print("ID to URL mapping:", tree.get("idToUrl")) - print("IFrames:", tree.get("iframes")) - - # Click the "Get Started" button - await stagehand.page.act("click the button with text 'Get Started'") - console.print("[success]āœ“ Clicked 'Get Started' button[/]") - - # Observe the button - await stagehand.page.observe("the button with text 'Get Started'") - console.print("[success]āœ“ Observed 'Get Started' button[/]") - - # Extract companies using schema - extract_options = ExtractOptions( - instruction="Extract the names and URLs of up to 5 companies mentioned on this page", - schema_definition=Companies - ) - - extract_result = await stagehand.page.extract(extract_options) - console.print("[success]āœ“ Extracted companies data[/]") - - # Display results - print("Extract result:", extract_result) - print("Extract result data:", extract_result.data if hasattr(extract_result, 'data') else 'No data field') - - # Parse the result into the Companies model - companies_data = None - - # Handle different result formats between LOCAL and BROWSERBASE - if hasattr(extract_result, 'data') and extract_result.data: - # BROWSERBASE mode - data is in the 'data' field - try: - raw_data = extract_result.data - console.print(f"[info]Raw extract data: {raw_data}[/]") - - # Check if the data needs URL resolution from ID mapping - if isinstance(raw_data, dict) and 'companies' in raw_data: - id_to_url = tree.get("idToUrl", {}) - for company in raw_data['companies']: - if 'url' in company and isinstance(company['url'], str): - # Check if URL is just an ID that needs to be resolved - if company['url'].isdigit() and company['url'] in id_to_url: - company['url'] = id_to_url[company['url']] - console.print(f"[success]āœ“ Resolved URL for {company['name']}: {company['url']}[/]") - - companies_data = Companies.model_validate(raw_data) - console.print("[success]āœ“ Successfully parsed extract result into Companies model[/]") - except Exception as e: - console.print(f"[error]Failed to parse extract result: {e}[/]") - print("Raw data:", extract_result.data) - elif hasattr(extract_result, 'companies'): - # LOCAL mode - companies field is directly available - try: - companies_data = Companies.model_validate(extract_result.model_dump()) - console.print("[success]āœ“ Successfully parsed extract result into Companies model[/]") - except Exception as e: - console.print(f"[error]Failed to parse extract result: {e}[/]") - print("Raw companies data:", extract_result.companies) - - print("\nExtracted Companies:") - if companies_data and hasattr(companies_data, "companies"): - for idx, company in enumerate(companies_data.companies, 1): - print(f"{idx}. {company.name}: {company.url}") - else: - print("No companies were found in the extraction result") - - # XPath click - await stagehand.page.locator("xpath=/html/body/div/ul[2]/li[2]/a").click() - await stagehand.page.wait_for_load_state('networkidle') - console.print("[success]āœ“ Clicked element using XPath[/]") - - # Open a new page with Google - console.print("\n[info]Creating a new page...[/]") - new_page = await stagehand.context.new_page() - await new_page.goto("https://www.google.com") - console.print("[success]āœ“ Opened Google in a new page[/]") - - # Get accessibility tree for the new page - tree = await get_accessibility_tree(new_page, stagehand.logger) - with open("../tree.txt", "w") as f: - f.write(tree.get("simplified")) - console.print("[success]āœ“ Extracted accessibility tree for new page[/]") - - # Try clicking Get Started button on Google - await new_page.act("click the button with text 'Get Started'") - - # Only use LLM directly if in LOCAL mode - if stagehand.llm is not None: - console.print("[info]LLM client available - using direct LLM call[/]") - - # Use LLM to analyze the page - response = stagehand.llm.create_response( - messages=[ - { - "role": "system", - "content": "Based on the provided accessibility tree of the page, find the element and the action the user is expecting to perform. The tree consists of an enhanced a11y tree from a website with unique identifiers prepended to each element's role, and name. The actions you can take are playwright compatible locator actions." - }, - { - "role": "user", - "content": [ - { - "type": "text", - "text": f"fill the search bar with the text 'Hello'\nPage Tree:\n{tree.get('simplified')}" - } - ] - } - ], - model="gemini/gemini-2.5-flash-preview-04-17", - response_format=ElementAction, - ) - - action = ElementAction.model_validate_json(response.choices[0].message.content) - console.print(f"[success]āœ“ LLM identified element ID: {action.id}[/]") - - # Test CDP functionality - args = {"backendNodeId": action.id} - result = await new_page.send_cdp("DOM.resolveNode", args) - object_info = result.get("object") - print(object_info) - - xpath = await get_xpath_by_resolved_object_id(await new_page.get_cdp_client(), object_info["objectId"]) - console.print(f"[success]āœ“ Retrieved XPath: {xpath}[/]") - - # Interact with the element - if xpath: - await new_page.locator(f"xpath={xpath}").click() - await new_page.locator(f"xpath={xpath}").fill(action.arguments[0]) - console.print("[success]āœ“ Filled search bar with 'Hello'[/]") - else: - print("No xpath found") - else: - console.print("[warning]LLM client not available in BROWSERBASE mode - skipping direct LLM test[/]") - # Alternative: use page.observe to find the search bar - observe_result = await new_page.observe("the search bar or search input field") - console.print(f"[info]Observed search elements: {observe_result}[/]") - - # Use page.act to fill the search bar - try: - await new_page.act("fill the search bar with 'Hello'") - console.print("[success]āœ“ Filled search bar using act()[/]") - except Exception as e: - console.print(f"[warning]Could not fill search bar: {e}[/]") - - # Final test summary - console.print("\n[success]All async tests completed successfully![/]") - - except Exception as e: - console.print(f"[error]Error during testing: {str(e)}[/]") - import traceback - traceback.print_exc() - raise - finally: - # Close the client - # wait for 5 seconds - await asyncio.sleep(5) - await stagehand.close() - console.print("[info]Stagehand async client closed[/]") + await asyncio.sleep(2) + + console.print("\nā–¶ļø [highlight] Observing page[/] for news button") + observed = await page.observe("find all articles") + + if len(observed) > 0: + element = observed[0] + console.print("āœ… [success]Found element:[/] News button") + console.print("\nā–¶ļø [highlight] Performing action on observed element:") + console.print(element) + await page.act(element) + console.print("āœ… [success]Performing Action:[/] Action completed successfully") + + else: + console.print("āŒ [error]No element found[/]") + + console.print("\nā–¶ļø [highlight] Extracting[/] first search result") + data = await page.extract("extract the first result from the search") + console.print("šŸ“Š [info]Extracted data:[/]") + console.print_json(data=data.model_dump()) + + # Close the session + console.print("\nā¹ļø [warning]Closing session...[/]") + await stagehand.close() + console.print("āœ… [success]Session closed successfully![/]") + console.rule("[bold]End of Example[/]") + if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + # Add a fancy header + console.print( + "\n", + Panel.fit( + "[light_gray]Stagehand 🤘 Python Example[/]", + border_style="green", + padding=(1, 10), + ), + ) + asyncio.run(main()) diff --git a/pyproject.toml b/pyproject.toml index 7080ba31..ce941c87 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,9 @@ dependencies = [ "playwright>=1.42.1", "requests>=2.31.0", "browserbase>=1.4.0", + "anthropic>=0.52.2", + "openai>=1.83.0", + "litellm>=1.72.0" ] [project.optional-dependencies] From 33ef876e593f7b2a477b1fb1f90406703b33878b Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Tue, 3 Jun 2025 08:09:30 -0400 Subject: [PATCH 06/11] do not require bb key for local runs --- stagehand/client.py | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/stagehand/client.py b/stagehand/client.py index 296b60a6..e5df0413 100644 --- a/stagehand/client.py +++ b/stagehand/client.py @@ -122,7 +122,25 @@ def __init__( self.wait_for_captcha_solves = self.config.wait_for_captcha_solves self.system_prompt = self.config.system_prompt self.verbose = self.config.verbose - self.env = self.config.env.upper() if self.config.env else "BROWSERBASE" + + # Smart environment detection + if self.config.env: + self.env = self.config.env.upper() + else: + # Auto-detect environment based on available configuration + has_browserbase_config = bool(self.browserbase_api_key and self.browserbase_project_id) + has_local_config = bool(self.config.local_browser_launch_options) + + if has_local_config and not has_browserbase_config: + # Local browser options specified but no Browserbase config + self.env = "LOCAL" + elif not has_browserbase_config and not has_local_config: + # No configuration specified, default to LOCAL for easier local development + self.env = "LOCAL" + else: + # Default to BROWSERBASE if Browserbase config is available + self.env = "BROWSERBASE" + self.local_browser_launch_options = ( self.config.local_browser_launch_options or {} ) @@ -230,7 +248,10 @@ def cleanup_handler(sig, frame): return self.__class__._cleanup_called = True - print(f"\n[{signal.Signals(sig).name}] received. Ending Browserbase session...") + if self.env == "BROWSERBASE": + print(f"\n[{signal.Signals(sig).name}] received. Ending Browserbase session...") + else: + print(f"\n[{signal.Signals(sig).name}] received. Cleaning up Stagehand resources...") try: # Try to get the current event loop @@ -269,9 +290,15 @@ async def _async_cleanup(self): """Async cleanup method called from signal handler.""" try: await self.close() - print(f"Session {self.session_id} ended successfully") + if self.env == "BROWSERBASE" and self.session_id: + print(f"Session {self.session_id} ended successfully") + else: + print("Stagehand resources cleaned up successfully") except Exception as e: - print(f"Error ending Browserbase session: {str(e)}") + if self.env == "BROWSERBASE": + print(f"Error ending Browserbase session: {str(e)}") + else: + print(f"Error cleaning up Stagehand resources: {str(e)}") finally: # Force exit after cleanup completes (or fails) # Use os._exit to avoid any further Python cleanup that might hang From 37d55100d917de1366d14bf69ec9cd998d89cedf Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Tue, 3 Jun 2025 08:15:56 -0400 Subject: [PATCH 07/11] update example --- examples/example.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/example.py b/examples/example.py index c145c386..dda6d9da 100644 --- a/examples/example.py +++ b/examples/example.py @@ -45,7 +45,7 @@ async def main(): console.print( "\n", Panel.fit( - "[light_gray]New Stagehand 🤘 Python Async Test[/]", + "[light_gray]New Stagehand 🤘 Python Test[/]", border_style="green", padding=(1, 10), ), @@ -230,7 +230,7 @@ async def main(): console.print(f"[warning]Could not fill search bar: {e}[/]") # Final test summary - console.print("\n[success]All async tests completed successfully![/]") + console.print("\n[success]All tests completed successfully![/]") except Exception as e: console.print(f"[error]Error during testing: {str(e)}[/]") From 8ca77515d8b615e7b1d746f19f229d9cefe10f1c Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Tue, 3 Jun 2025 08:17:07 -0400 Subject: [PATCH 08/11] format --- stagehand/client.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/stagehand/client.py b/stagehand/client.py index e5df0413..92c43ad0 100644 --- a/stagehand/client.py +++ b/stagehand/client.py @@ -2,9 +2,9 @@ import json import os import shutil -import tempfile import signal import sys +import tempfile import time from pathlib import Path from typing import Any, Literal, Optional @@ -47,7 +47,7 @@ class Stagehand: # Dictionary to store one lock per session_id _session_locks = {} - + # Flag to track if cleanup has been called _cleanup_called = False @@ -122,15 +122,17 @@ def __init__( self.wait_for_captcha_solves = self.config.wait_for_captcha_solves self.system_prompt = self.config.system_prompt self.verbose = self.config.verbose - + # Smart environment detection if self.config.env: self.env = self.config.env.upper() else: # Auto-detect environment based on available configuration - has_browserbase_config = bool(self.browserbase_api_key and self.browserbase_project_id) + has_browserbase_config = bool( + self.browserbase_api_key and self.browserbase_project_id + ) has_local_config = bool(self.config.local_browser_launch_options) - + if has_local_config and not has_browserbase_config: # Local browser options specified but no Browserbase config self.env = "LOCAL" @@ -140,7 +142,7 @@ def __init__( else: # Default to BROWSERBASE if Browserbase config is available self.env = "BROWSERBASE" - + self.local_browser_launch_options = ( self.config.local_browser_launch_options or {} ) @@ -211,7 +213,7 @@ def __init__( raise ValueError( "browserbase_project_id is required for BROWSERBASE env with existing session_id (or set BROWSERBASE_PROJECT_ID in env)." ) - + # Register signal handlers for graceful shutdown self._register_signal_handlers() @@ -242,6 +244,7 @@ def __init__( def _register_signal_handlers(self): """Register signal handlers for SIGINT and SIGTERM to ensure proper cleanup.""" + def cleanup_handler(sig, frame): # Prevent multiple cleanup calls if self.__class__._cleanup_called: @@ -249,9 +252,13 @@ def cleanup_handler(sig, frame): self.__class__._cleanup_called = True if self.env == "BROWSERBASE": - print(f"\n[{signal.Signals(sig).name}] received. Ending Browserbase session...") + print( + f"\n[{signal.Signals(sig).name}] received. Ending Browserbase session..." + ) else: - print(f"\n[{signal.Signals(sig).name}] received. Cleaning up Stagehand resources...") + print( + f"\n[{signal.Signals(sig).name}] received. Cleaning up Stagehand resources..." + ) try: # Try to get the current event loop @@ -275,9 +282,9 @@ def schedule_cleanup(): # Shield the task to prevent it from being cancelled shielded = asyncio.shield(task) # We don't need to await here since we're in call_soon_threadsafe - + loop.call_soon_threadsafe(schedule_cleanup) - + except Exception as e: print(f"Error during signal cleanup: {str(e)}") sys.exit(1) From 93a901126e16d077385f37c3044671a263202574 Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Tue, 3 Jun 2025 08:19:36 -0400 Subject: [PATCH 09/11] formatting --- stagehand/client.py | 2 +- stagehand/utils.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/stagehand/client.py b/stagehand/client.py index 92c43ad0..1acb872a 100644 --- a/stagehand/client.py +++ b/stagehand/client.py @@ -280,7 +280,7 @@ def cleanup_handler(sig, frame): def schedule_cleanup(): task = asyncio.create_task(self._async_cleanup()) # Shield the task to prevent it from being cancelled - shielded = asyncio.shield(task) + asyncio.shield(task) # We don't need to await here since we're in call_soon_threadsafe loop.call_soon_threadsafe(schedule_cleanup) diff --git a/stagehand/utils.py b/stagehand/utils.py index 9ef5278d..a94161e3 100644 --- a/stagehand/utils.py +++ b/stagehand/utils.py @@ -840,7 +840,7 @@ def transform_url_strings_to_ids(schema): return transform_model(schema) -def transform_model(model_cls, path=[]): +def transform_model(model_cls, path=None): """ Recursively transforms a Pydantic model by replacing URL fields with numeric fields. @@ -851,6 +851,9 @@ def transform_model(model_cls, path=[]): Returns: Tuple of (transformed_model_cls, url_paths) """ + if path is None: + path = [] + # Get model fields based on Pydantic version try: # Pydantic V2 approach From a8fe211a79ab8963012959124a7a27449cb72fa1 Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Tue, 3 Jun 2025 08:45:14 -0400 Subject: [PATCH 10/11] format; git commit -m ;format --- stagehand/client.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/stagehand/client.py b/stagehand/client.py index abd71f83..d771f671 100644 --- a/stagehand/client.py +++ b/stagehand/client.py @@ -1,9 +1,7 @@ import asyncio import os -import shutil import signal import sys -import tempfile import time from pathlib import Path from typing import Any, Literal, Optional From 9ee80962e16cddbceb9693c17d957ff3428fedeb Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Tue, 3 Jun 2025 20:52:41 -0400 Subject: [PATCH 11/11] remove saving the tree --- examples/example.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/examples/example.py b/examples/example.py index dda6d9da..5d089f51 100644 --- a/examples/example.py +++ b/examples/example.py @@ -84,9 +84,7 @@ async def main(): # Get accessibility tree tree = await get_accessibility_tree(stagehand.page, stagehand.logger) console.print("[success]āœ“ Extracted accessibility tree[/]") - with open("../tree.txt", "w") as f: - f.write(tree.get("simplified")) - + print("ID to URL mapping:", tree.get("idToUrl")) print("IFrames:", tree.get("iframes")) @@ -165,8 +163,6 @@ async def main(): # Get accessibility tree for the new page tree = await get_accessibility_tree(new_page, stagehand.logger) - with open("../tree.txt", "w") as f: - f.write(tree.get("simplified")) console.print("[success]āœ“ Extracted accessibility tree for new page[/]") # Try clicking Get Started button on Google