Skip to content

Commit 6800683

Browse files
add one extract test
1 parent 252e47e commit 6800683

File tree

1 file changed

+180
-0
lines changed

1 file changed

+180
-0
lines changed

tests/regression/test_wichita.py

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
"""
2+
Regression test for wichita functionality.
3+
4+
This test verifies that combination actions (act + extract) work correctly,
5+
based on the TypeScript wichita evaluation.
6+
"""
7+
8+
import os
9+
import pytest
10+
import pytest_asyncio
11+
from pydantic import BaseModel, Field
12+
13+
from stagehand import Stagehand, StagehandConfig
14+
from stagehand.schemas import ExtractOptions
15+
16+
17+
class BidResults(BaseModel):
18+
"""Schema for bid results extraction"""
19+
total_results: str = Field(..., description="The total number of bids that the search produced")
20+
21+
22+
class TestWichita:
23+
"""Regression test for wichita functionality"""
24+
25+
@pytest.fixture(scope="class")
26+
def local_config(self):
27+
"""Configuration for LOCAL mode testing"""
28+
return StagehandConfig(
29+
env="LOCAL",
30+
model_name="gpt-4o-mini",
31+
headless=True,
32+
verbose=1,
33+
dom_settle_timeout_ms=2000,
34+
model_client_options={"apiKey": os.getenv("MODEL_API_KEY") or os.getenv("OPENAI_API_KEY")},
35+
)
36+
37+
@pytest.fixture(scope="class")
38+
def browserbase_config(self):
39+
"""Configuration for BROWSERBASE mode testing"""
40+
return StagehandConfig(
41+
env="BROWSERBASE",
42+
api_key=os.getenv("BROWSERBASE_API_KEY"),
43+
project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
44+
model_name="gpt-4o",
45+
headless=False,
46+
verbose=2,
47+
model_client_options={"apiKey": os.getenv("MODEL_API_KEY") or os.getenv("OPENAI_API_KEY")},
48+
)
49+
50+
@pytest_asyncio.fixture
51+
async def local_stagehand(self, local_config):
52+
"""Create a Stagehand instance for LOCAL testing"""
53+
stagehand = Stagehand(config=local_config)
54+
await stagehand.init()
55+
yield stagehand
56+
await stagehand.close()
57+
58+
@pytest_asyncio.fixture
59+
async def browserbase_stagehand(self, browserbase_config):
60+
"""Create a Stagehand instance for BROWSERBASE testing"""
61+
if not (os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID")):
62+
pytest.skip("Browserbase credentials not available")
63+
64+
stagehand = Stagehand(config=browserbase_config)
65+
await stagehand.init()
66+
yield stagehand
67+
await stagehand.close()
68+
69+
@pytest.mark.asyncio
70+
@pytest.mark.regression
71+
@pytest.mark.local
72+
async def test_wichita_local(self, local_stagehand):
73+
"""
74+
Regression test: wichita
75+
76+
Mirrors the TypeScript wichita evaluation:
77+
- Navigate to Wichita Falls TX government bids page
78+
- Click on "Show Closed/Awarded/Cancelled bids"
79+
- Extract the total number of bids
80+
- Verify the count is within expected range (405 ± 10)
81+
"""
82+
stagehand = local_stagehand
83+
84+
await stagehand.page.goto("https://www.wichitafallstx.gov/Bids.aspx")
85+
86+
# Click to show closed/awarded/cancelled bids
87+
await stagehand.page.act('Click on "Show Closed/Awarded/Cancelled bids"')
88+
89+
# Extract the total number of results using proper Python schema-based extraction
90+
extract_options = ExtractOptions(
91+
instruction="Extract the total number of bids that the search produced.",
92+
schema_definition=BidResults
93+
)
94+
95+
result = await stagehand.page.extract(extract_options)
96+
#TODO - how to unify the extract result handling between LOCAL and BROWSERBASE?
97+
98+
# Handle result based on the mode (LOCAL returns data directly, BROWSERBASE returns ExtractResult)
99+
if hasattr(result, 'data') and result.data:
100+
# BROWSERBASE mode format
101+
bid_data = BidResults.model_validate(result.data)
102+
total_results = bid_data.total_results
103+
elif hasattr(result, 'total_results'):
104+
# LOCAL mode format - result is the Pydantic model instance
105+
total_results = result.total_results
106+
else:
107+
# Fallback - try to get total_results from the result directly
108+
total_results = getattr(result, 'total_results', str(result))
109+
110+
# Parse the number from the result
111+
expected_number = 405
112+
extracted_number = int(''.join(filter(str.isdigit, total_results)))
113+
114+
# Check if the number is within expected range (±10)
115+
is_within_range = (
116+
extracted_number >= expected_number - 10 and
117+
extracted_number <= expected_number + 10
118+
)
119+
120+
assert is_within_range, (
121+
f"Total number of results {extracted_number} is not within the expected range "
122+
f"{expected_number} ± 10"
123+
)
124+
125+
@pytest.mark.asyncio
126+
@pytest.mark.regression
127+
@pytest.mark.api
128+
@pytest.mark.skipif(
129+
not (os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID")),
130+
reason="Browserbase credentials not available"
131+
)
132+
async def test_wichita_browserbase(self, browserbase_stagehand):
133+
"""
134+
Regression test: wichita (Browserbase)
135+
136+
Same test as local but running in Browserbase environment.
137+
"""
138+
stagehand = browserbase_stagehand
139+
140+
await stagehand.page.goto("https://www.wichitafallstx.gov/Bids.aspx")
141+
142+
# Click to show closed/awarded/cancelled bids
143+
await stagehand.page.act('Click on "Show Closed/Awarded/Cancelled bids"')
144+
145+
# Extract the total number of results using proper Python schema-based extraction
146+
extract_options = ExtractOptions(
147+
instruction="Extract the total number of bids that the search produced.",
148+
schema_definition=BidResults
149+
)
150+
151+
result = await stagehand.page.extract(extract_options)
152+
153+
#TODO - how to unify the extract result handling between LOCAL and BROWSERBASE?
154+
155+
# Handle result based on the mode (LOCAL returns data directly, BROWSERBASE returns ExtractResult)
156+
if hasattr(result, 'data') and result.data:
157+
# BROWSERBASE mode format
158+
bid_data = BidResults.model_validate(result.data)
159+
total_results = bid_data.total_results
160+
elif hasattr(result, 'total_results'):
161+
# LOCAL mode format - result is the Pydantic model instance
162+
total_results = result.total_results
163+
else:
164+
# Fallback - try to get total_results from the result directly
165+
total_results = getattr(result, 'total_results', str(result))
166+
167+
# Parse the number from the result
168+
expected_number = 405
169+
extracted_number = int(''.join(filter(str.isdigit, total_results)))
170+
171+
# Check if the number is within expected range (±10)
172+
is_within_range = (
173+
extracted_number >= expected_number - 10 and
174+
extracted_number <= expected_number + 10
175+
)
176+
177+
assert is_within_range, (
178+
f"Total number of results {extracted_number} is not within the expected range "
179+
f"{expected_number} ± 10"
180+
)

0 commit comments

Comments
 (0)