browserbase · filip-michalsky · Aug 1, 2025 · Jun 12, 2025 · Jun 12, 2025 · Jun 12, 2025
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -187,6 +187,68 @@ jobs:
         name: smoke-test-results
         path: junit-smoke.xml
 
+  test-regression:
+    name: Regression Tests
+    runs-on: ubuntu-latest
+    needs: test-unit
+    if: |
+      contains(github.event.pull_request.labels.*.name, 'test-regression') ||
+      contains(github.event.pull_request.labels.*.name, 'regression')
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python 3.11
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.11"
+
+    - name: Install system dependencies
+      run: |
+        sudo apt-get update
+        sudo apt-get install -y xvfb
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e ".[dev]"
+        pip install jsonschema
+        # Install temporary Google GenAI wheel
+        pip install temp/google_genai-1.14.0-py3-none-any.whl
+        playwright install chromium
+        playwright install-deps chromium
+
+    - name: Run regression tests
+      run: |
+        xvfb-run -a pytest tests/ -v \
+          --cov=stagehand \
+          --cov-report=xml \
+          --junit-xml=junit-regression.xml \
+          -m "regression" \
+          --tb=short \
+          --maxfail=10
+      env:
+        BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY || 'mock-api-key' }}
+        BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID || 'mock-project-id' }}
+        MODEL_API_KEY: ${{ secrets.MODEL_API_KEY || 'mock-model-key' }}
+        STAGEHAND_API_URL: ${{ secrets.STAGEHAND_API_URL || 'http://localhost:3000' }}
+
+    - name: Upload regression test results
+      uses: actions/upload-artifact@v4
+      if: always()
+      with:
+        name: regression-test-results
+        path: junit-regression.xml
+
+    - name: Upload coverage data
+      uses: actions/upload-artifact@v4
+      if: always()
+      with:
+        name: coverage-data-regression
+        path: |
+          .coverage
+          coverage.xml
+
   test-e2e:
     name: End-to-End Tests
     runs-on: ubuntu-latest

diff --git a/pytest.ini b/pytest.ini
@@ -12,6 +12,7 @@ markers =
     local: marks tests as local integration tests
     api: marks tests as API integration tests
     e2e: marks tests as end-to-end tests
+    regression: marks tests as regression tests
 
 log_cli = true
 log_cli_level = INFO 
diff --git a/stagehand/handlers/act_handler.py b/stagehand/handlers/act_handler.py
@@ -1,4 +1,5 @@
 import traceback
+import asyncio
 from typing import Any, Optional, Union
 
 from stagehand.handlers.act_handler_utils import (
@@ -46,6 +47,31 @@ async def act(self, options: Union[ActOptions, ObserveResult]) -> ActResult:
                 options, self.stagehand.dom_settle_timeout_ms
             )
 
+        # Extract timeout_ms from options (check both snake_case and camelCase)
+        ## TODO - this is a temporary fix to support the timeout_ms field in the options.
+        ## We should update the options to use the timeout_ms field instead of timeoutMs.
+        timeout_ms = options.get("timeout_ms") or options.get("timeoutMs")
+
+        # If timeout is specified, wrap the entire act operation with asyncio.wait_for
+        if timeout_ms:
+            try:
+                return await asyncio.wait_for(
+                    self._perform_act_with_timeout(options),
+                    timeout=timeout_ms / 1000.0  # Convert ms to seconds
+                )
+            except asyncio.TimeoutError:
+                action_task = options.get("action")
+                return ActResult(
+                    success=False,
+                    message=f"Action timed out after {timeout_ms}ms",
+                    action=action_task,
+                )
+        else:
+            # No timeout specified, use existing behavior
+            return await self._perform_act_with_timeout(options)
+
+    async def _perform_act_with_timeout(self, options) -> ActResult:
+        """Extract the main act logic into a separate method for timeout handling"""
         # Start inference timer if available
         if hasattr(self.stagehand, "start_inference_timer"):
             self.stagehand.start_inference_timer()

diff --git a/tests/regression/__init__.py b/tests/regression/__init__.py
diff --git a/tests/regression/test_act_timeout.py b/tests/regression/test_act_timeout.py
@@ -0,0 +1,110 @@
+"""
+Regression test for act timeout functionality.
+
+This test verifies that the timeout mechanism works correctly for act operations,
+based on the TypeScript expect_act_timeout evaluation.
+"""
+
+import os
+import pytest
+import pytest_asyncio
+
+from stagehand import Stagehand, StagehandConfig
+
+
+class TestActTimeout:
+    """Regression test for act timeout functionality"""
+
+    @pytest.fixture(scope="class")
+    def local_config(self):
+        """Configuration for LOCAL mode testing"""
+        return StagehandConfig(
+            env="LOCAL",
+            model_name="gpt-4o-mini",
+            headless=True,
+            verbose=1,
+            dom_settle_timeout_ms=2000,
+            model_client_options={"apiKey": os.getenv("MODEL_API_KEY") or os.getenv("OPENAI_API_KEY")},
+        )
+
+    @pytest.fixture(scope="class")
+    def browserbase_config(self):
+        """Configuration for BROWSERBASE mode testing"""
+        return StagehandConfig(
+            env="BROWSERBASE",
+            api_key=os.getenv("BROWSERBASE_API_KEY"),
+            project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
+            model_name="gpt-4o",
+            headless=False,
+            verbose=2,
+            model_client_options={"apiKey": os.getenv("MODEL_API_KEY") or os.getenv("OPENAI_API_KEY")},
+        )
+
+    @pytest_asyncio.fixture
+    async def local_stagehand(self, local_config):
+        """Create a Stagehand instance for LOCAL testing"""
+        stagehand = Stagehand(config=local_config)
+        await stagehand.init()
+        yield stagehand
+        await stagehand.close()
+
+    @pytest_asyncio.fixture
+    async def browserbase_stagehand(self, browserbase_config):
+        """Create a Stagehand instance for BROWSERBASE testing"""
+        if not (os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID")):
+            pytest.skip("Browserbase credentials not available")
+
+        stagehand = Stagehand(config=browserbase_config)
+        await stagehand.init()
+        yield stagehand
+        await stagehand.close()
+
+    @pytest.mark.asyncio
+    @pytest.mark.regression
+    @pytest.mark.local
+    async def test_expect_act_timeout_local(self, local_stagehand):
+        """
+        Regression test: expect_act_timeout
+
+        Mirrors the TypeScript expect_act_timeout evaluation:
+        - Navigate to docs.stagehand.dev
+        - Attempt action with 1 second timeout
+        - Expect the action to fail due to timeout
+        """
+        stagehand = local_stagehand
+
+        await stagehand.page.goto("https://docs.stagehand.dev")
+
+        result = await stagehand.page.act(
+             "search for 'Stagehand'",
+            timeout_ms=1000  # 1 second timeout
+        )
+
+        # Test passes if the action failed (due to timeout or element not found)
+        # This mirrors the TypeScript: _success: !result.success
+        assert not result.success, "Action should have failed due to timeout or missing element"
+
+    @pytest.mark.asyncio
+    @pytest.mark.regression
+    @pytest.mark.api
+    @pytest.mark.skipif(
+        not (os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID")),
+        reason="Browserbase credentials not available"
+    )
+    async def test_expect_act_timeout_browserbase(self, browserbase_stagehand):
+        """
+        Regression test: expect_act_timeout (Browserbase)
+
+        Same test as local but running in Browserbase environment.
+        """
+        stagehand = browserbase_stagehand
+
+        await stagehand.page.goto("https://docs.stagehand.dev")
+
+        result = await stagehand.page.act(
+            "search for 'Stagehand'",
+            timeout_ms=1000  # 1 second timeout
+        )
+
+        # Test passes if the action failed (due to timeout or element not found)
+        assert not result.success, "Action should have failed due to timeout or missing element"