Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,68 @@ jobs:
name: smoke-test-results
path: junit-smoke.xml

test-regression:
name: Regression Tests
runs-on: ubuntu-latest
needs: test-unit
if: |
contains(github.event.pull_request.labels.*.name, 'test-regression') ||
contains(github.event.pull_request.labels.*.name, 'regression')

steps:
- uses: actions/checkout@v4

- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11"

- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y xvfb

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[dev]"
pip install jsonschema
# Install temporary Google GenAI wheel
pip install temp/google_genai-1.14.0-py3-none-any.whl
playwright install chromium
playwright install-deps chromium

- name: Run regression tests
run: |
xvfb-run -a pytest tests/ -v \
--cov=stagehand \
--cov-report=xml \
--junit-xml=junit-regression.xml \
-m "regression" \
--tb=short \
--maxfail=10
env:
BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY || 'mock-api-key' }}
BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID || 'mock-project-id' }}
MODEL_API_KEY: ${{ secrets.MODEL_API_KEY || 'mock-model-key' }}
STAGEHAND_API_URL: ${{ secrets.STAGEHAND_API_URL || 'http://localhost:3000' }}

- name: Upload regression test results
uses: actions/upload-artifact@v4
if: always()
with:
name: regression-test-results
path: junit-regression.xml

- name: Upload coverage data
uses: actions/upload-artifact@v4
if: always()
with:
name: coverage-data-regression
path: |
.coverage
coverage.xml

test-e2e:
name: End-to-End Tests
runs-on: ubuntu-latest
Expand Down
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ markers =
local: marks tests as local integration tests
api: marks tests as API integration tests
e2e: marks tests as end-to-end tests
regression: marks tests as regression tests

log_cli = true
log_cli_level = INFO
26 changes: 26 additions & 0 deletions stagehand/handlers/act_handler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import traceback
import asyncio
from typing import Any, Optional, Union

from stagehand.handlers.act_handler_utils import (
Expand Down Expand Up @@ -46,6 +47,31 @@ async def act(self, options: Union[ActOptions, ObserveResult]) -> ActResult:
options, self.stagehand.dom_settle_timeout_ms
)

# Extract timeout_ms from options (check both snake_case and camelCase)
## TODO - this is a temporary fix to support the timeout_ms field in the options.
## We should update the options to use the timeout_ms field instead of timeoutMs.
timeout_ms = options.get("timeout_ms") or options.get("timeoutMs")

# If timeout is specified, wrap the entire act operation with asyncio.wait_for
if timeout_ms:
try:
return await asyncio.wait_for(
self._perform_act_with_timeout(options),
timeout=timeout_ms / 1000.0 # Convert ms to seconds
)
except asyncio.TimeoutError:
action_task = options.get("action")
return ActResult(
success=False,
message=f"Action timed out after {timeout_ms}ms",
action=action_task,
)
else:
# No timeout specified, use existing behavior
return await self._perform_act_with_timeout(options)

async def _perform_act_with_timeout(self, options) -> ActResult:
"""Extract the main act logic into a separate method for timeout handling"""
# Start inference timer if available
if hasattr(self.stagehand, "start_inference_timer"):
self.stagehand.start_inference_timer()
Expand Down
Empty file added tests/regression/__init__.py
Empty file.
110 changes: 110 additions & 0 deletions tests/regression/test_act_timeout.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
"""
Regression test for act timeout functionality.

This test verifies that the timeout mechanism works correctly for act operations,
based on the TypeScript expect_act_timeout evaluation.
"""

import os
import pytest
import pytest_asyncio

from stagehand import Stagehand, StagehandConfig


class TestActTimeout:
"""Regression test for act timeout functionality"""

@pytest.fixture(scope="class")
def local_config(self):
"""Configuration for LOCAL mode testing"""
return StagehandConfig(
env="LOCAL",
model_name="gpt-4o-mini",
headless=True,
verbose=1,
dom_settle_timeout_ms=2000,
model_client_options={"apiKey": os.getenv("MODEL_API_KEY") or os.getenv("OPENAI_API_KEY")},
)

@pytest.fixture(scope="class")
def browserbase_config(self):
"""Configuration for BROWSERBASE mode testing"""
return StagehandConfig(
env="BROWSERBASE",
api_key=os.getenv("BROWSERBASE_API_KEY"),
project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
model_name="gpt-4o",
headless=False,
verbose=2,
model_client_options={"apiKey": os.getenv("MODEL_API_KEY") or os.getenv("OPENAI_API_KEY")},
)

@pytest_asyncio.fixture
async def local_stagehand(self, local_config):
"""Create a Stagehand instance for LOCAL testing"""
stagehand = Stagehand(config=local_config)
await stagehand.init()
yield stagehand
await stagehand.close()

@pytest_asyncio.fixture
async def browserbase_stagehand(self, browserbase_config):
"""Create a Stagehand instance for BROWSERBASE testing"""
if not (os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID")):
pytest.skip("Browserbase credentials not available")

stagehand = Stagehand(config=browserbase_config)
await stagehand.init()
yield stagehand
await stagehand.close()

@pytest.mark.asyncio
@pytest.mark.regression
@pytest.mark.local
async def test_expect_act_timeout_local(self, local_stagehand):
"""
Regression test: expect_act_timeout

Mirrors the TypeScript expect_act_timeout evaluation:
- Navigate to docs.stagehand.dev
- Attempt action with 1 second timeout
- Expect the action to fail due to timeout
"""
stagehand = local_stagehand

await stagehand.page.goto("https://docs.stagehand.dev")

result = await stagehand.page.act(
"search for 'Stagehand'",
timeout_ms=1000 # 1 second timeout
)

# Test passes if the action failed (due to timeout or element not found)
# This mirrors the TypeScript: _success: !result.success
assert not result.success, "Action should have failed due to timeout or missing element"

@pytest.mark.asyncio
@pytest.mark.regression
@pytest.mark.api
@pytest.mark.skipif(
not (os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID")),
reason="Browserbase credentials not available"
)
async def test_expect_act_timeout_browserbase(self, browserbase_stagehand):
"""
Regression test: expect_act_timeout (Browserbase)

Same test as local but running in Browserbase environment.
"""
stagehand = browserbase_stagehand

await stagehand.page.goto("https://docs.stagehand.dev")

result = await stagehand.page.act(
"search for 'Stagehand'",
timeout_ms=1000 # 1 second timeout
)

# Test passes if the action failed (due to timeout or element not found)
assert not result.success, "Action should have failed due to timeout or missing element"
Loading
Loading