-
Notifications
You must be signed in to change notification settings - Fork 62
first regression test #88
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+566
−0
Merged
Changes from 4 commits
Commits
Show all changes
21 commits
Select commit
Hold shift + click to select a range
2018b03
first regression test
filip-michalsky 896a7cd
add regression tests to the CI (optional based on label)
filip-michalsky f5366ff
update test
filip-michalsky a8811fe
need to add timeoutMs to Act in local mode
filip-michalsky 297d154
add comment for todo
filip-michalsky 694c4fb
second regression test
filip-michalsky 252e47e
add ionwave test
filip-michalsky 6800683
add one extract test
filip-michalsky 1cb9463
tests added, have some TODOs
filip-michalsky fb13bc6
format check
filip-michalsky f253269
Merge branch 'main' into fm/stg-489-add-regression-tests
filip-michalsky d3a2bcf
update workflow
filip-michalsky be1dfe4
revert handler
filip-michalsky fb785aa
update
filip-michalsky bb2a924
update tests
filip-michalsky 5214c1a
update comment
filip-michalsky ac43e5a
Merge branch 'main' into fm/stg-489-add-regression-tests
filip-michalsky aa755ed
Merge branch 'main' into fm/stg-489-add-regression-tests
filip-michalsky 5b617b2
resolved todos
filip-michalsky cbbab3d
add changeset
filip-michalsky 2bc572a
removed superfluous tests
filip-michalsky File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
filip-michalsky marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
""" | ||
Regression test for act timeout functionality. | ||
|
||
This test verifies that the timeout mechanism works correctly for act operations, | ||
based on the TypeScript expect_act_timeout evaluation. | ||
""" | ||
|
||
import os | ||
import pytest | ||
import pytest_asyncio | ||
|
||
from stagehand import Stagehand, StagehandConfig | ||
|
||
|
||
class TestActTimeout: | ||
"""Regression test for act timeout functionality""" | ||
|
||
@pytest.fixture(scope="class") | ||
def local_config(self): | ||
"""Configuration for LOCAL mode testing""" | ||
return StagehandConfig( | ||
env="LOCAL", | ||
model_name="gpt-4o-mini", | ||
headless=True, | ||
verbose=1, | ||
dom_settle_timeout_ms=2000, | ||
model_client_options={"apiKey": os.getenv("MODEL_API_KEY") or os.getenv("OPENAI_API_KEY")}, | ||
) | ||
|
||
@pytest.fixture(scope="class") | ||
def browserbase_config(self): | ||
"""Configuration for BROWSERBASE mode testing""" | ||
return StagehandConfig( | ||
env="BROWSERBASE", | ||
api_key=os.getenv("BROWSERBASE_API_KEY"), | ||
project_id=os.getenv("BROWSERBASE_PROJECT_ID"), | ||
model_name="gpt-4o", | ||
headless=False, | ||
verbose=2, | ||
model_client_options={"apiKey": os.getenv("MODEL_API_KEY") or os.getenv("OPENAI_API_KEY")}, | ||
) | ||
|
||
@pytest_asyncio.fixture | ||
async def local_stagehand(self, local_config): | ||
"""Create a Stagehand instance for LOCAL testing""" | ||
stagehand = Stagehand(config=local_config) | ||
await stagehand.init() | ||
yield stagehand | ||
await stagehand.close() | ||
|
||
@pytest_asyncio.fixture | ||
async def browserbase_stagehand(self, browserbase_config): | ||
"""Create a Stagehand instance for BROWSERBASE testing""" | ||
if not (os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID")): | ||
pytest.skip("Browserbase credentials not available") | ||
|
||
stagehand = Stagehand(config=browserbase_config) | ||
await stagehand.init() | ||
yield stagehand | ||
await stagehand.close() | ||
|
||
@pytest.mark.asyncio | ||
@pytest.mark.regression | ||
@pytest.mark.local | ||
async def test_expect_act_timeout_local(self, local_stagehand): | ||
""" | ||
Regression test: expect_act_timeout | ||
|
||
Mirrors the TypeScript expect_act_timeout evaluation: | ||
- Navigate to docs.stagehand.dev | ||
- Attempt action with 1 second timeout | ||
- Expect the action to fail due to timeout | ||
""" | ||
stagehand = local_stagehand | ||
|
||
await stagehand.page.goto("https://docs.stagehand.dev") | ||
|
||
result = await stagehand.page.act( | ||
"search for 'Stagehand'", | ||
timeout_ms=1000 # 1 second timeout | ||
) | ||
|
||
# Test passes if the action failed (due to timeout or element not found) | ||
# This mirrors the TypeScript: _success: !result.success | ||
assert not result.success, "Action should have failed due to timeout or missing element" | ||
|
||
@pytest.mark.asyncio | ||
@pytest.mark.regression | ||
@pytest.mark.api | ||
@pytest.mark.skipif( | ||
not (os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID")), | ||
reason="Browserbase credentials not available" | ||
) | ||
async def test_expect_act_timeout_browserbase(self, browserbase_stagehand): | ||
""" | ||
Regression test: expect_act_timeout (Browserbase) | ||
|
||
Same test as local but running in Browserbase environment. | ||
""" | ||
stagehand = browserbase_stagehand | ||
|
||
await stagehand.page.goto("https://docs.stagehand.dev") | ||
|
||
result = await stagehand.page.act( | ||
"search for 'Stagehand'", | ||
timeout_ms=1000 # 1 second timeout | ||
) | ||
|
||
# Test passes if the action failed (due to timeout or element not found) | ||
assert not result.success, "Action should have failed due to timeout or missing element" |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.