1+ """
2+ Regression test for wichita functionality.
3+
4+ This test verifies that combination actions (act + extract) work correctly,
5+ based on the TypeScript wichita evaluation.
6+ """
7+
8+ import os
9+ import pytest
10+ import pytest_asyncio
11+ from pydantic import BaseModel , Field
12+
13+ from stagehand import Stagehand , StagehandConfig
14+ from stagehand .schemas import ExtractOptions
15+
16+
17+ class BidResults (BaseModel ):
18+ """Schema for bid results extraction"""
19+ total_results : str = Field (..., description = "The total number of bids that the search produced" )
20+
21+
22+ class TestWichita :
23+ """Regression test for wichita functionality"""
24+
25+ @pytest .fixture (scope = "class" )
26+ def local_config (self ):
27+ """Configuration for LOCAL mode testing"""
28+ return StagehandConfig (
29+ env = "LOCAL" ,
30+ model_name = "gpt-4o-mini" ,
31+ headless = True ,
32+ verbose = 1 ,
33+ dom_settle_timeout_ms = 2000 ,
34+ model_client_options = {"apiKey" : os .getenv ("MODEL_API_KEY" ) or os .getenv ("OPENAI_API_KEY" )},
35+ )
36+
37+ @pytest .fixture (scope = "class" )
38+ def browserbase_config (self ):
39+ """Configuration for BROWSERBASE mode testing"""
40+ return StagehandConfig (
41+ env = "BROWSERBASE" ,
42+ api_key = os .getenv ("BROWSERBASE_API_KEY" ),
43+ project_id = os .getenv ("BROWSERBASE_PROJECT_ID" ),
44+ model_name = "gpt-4o" ,
45+ headless = False ,
46+ verbose = 2 ,
47+ model_client_options = {"apiKey" : os .getenv ("MODEL_API_KEY" ) or os .getenv ("OPENAI_API_KEY" )},
48+ )
49+
50+ @pytest_asyncio .fixture
51+ async def local_stagehand (self , local_config ):
52+ """Create a Stagehand instance for LOCAL testing"""
53+ stagehand = Stagehand (config = local_config )
54+ await stagehand .init ()
55+ yield stagehand
56+ await stagehand .close ()
57+
58+ @pytest_asyncio .fixture
59+ async def browserbase_stagehand (self , browserbase_config ):
60+ """Create a Stagehand instance for BROWSERBASE testing"""
61+ if not (os .getenv ("BROWSERBASE_API_KEY" ) and os .getenv ("BROWSERBASE_PROJECT_ID" )):
62+ pytest .skip ("Browserbase credentials not available" )
63+
64+ stagehand = Stagehand (config = browserbase_config )
65+ await stagehand .init ()
66+ yield stagehand
67+ await stagehand .close ()
68+
69+ @pytest .mark .asyncio
70+ @pytest .mark .regression
71+ @pytest .mark .local
72+ async def test_wichita_local (self , local_stagehand ):
73+ """
74+ Regression test: wichita
75+
76+ Mirrors the TypeScript wichita evaluation:
77+ - Navigate to Wichita Falls TX government bids page
78+ - Click on "Show Closed/Awarded/Cancelled bids"
79+ - Extract the total number of bids
80+ - Verify the count is within expected range (405 ± 10)
81+ """
82+ stagehand = local_stagehand
83+
84+ await stagehand .page .goto ("https://www.wichitafallstx.gov/Bids.aspx" )
85+
86+ # Click to show closed/awarded/cancelled bids
87+ await stagehand .page .act ('Click on "Show Closed/Awarded/Cancelled bids"' )
88+
89+ # Extract the total number of results using proper Python schema-based extraction
90+ extract_options = ExtractOptions (
91+ instruction = "Extract the total number of bids that the search produced." ,
92+ schema_definition = BidResults
93+ )
94+
95+ result = await stagehand .page .extract (extract_options )
96+ #TODO - how to unify the extract result handling between LOCAL and BROWSERBASE?
97+
98+ # Handle result based on the mode (LOCAL returns data directly, BROWSERBASE returns ExtractResult)
99+ if hasattr (result , 'data' ) and result .data :
100+ # BROWSERBASE mode format
101+ bid_data = BidResults .model_validate (result .data )
102+ total_results = bid_data .total_results
103+ elif hasattr (result , 'total_results' ):
104+ # LOCAL mode format - result is the Pydantic model instance
105+ total_results = result .total_results
106+ else :
107+ # Fallback - try to get total_results from the result directly
108+ total_results = getattr (result , 'total_results' , str (result ))
109+
110+ # Parse the number from the result
111+ expected_number = 405
112+ extracted_number = int ('' .join (filter (str .isdigit , total_results )))
113+
114+ # Check if the number is within expected range (±10)
115+ is_within_range = (
116+ extracted_number >= expected_number - 10 and
117+ extracted_number <= expected_number + 10
118+ )
119+
120+ assert is_within_range , (
121+ f"Total number of results { extracted_number } is not within the expected range "
122+ f"{ expected_number } ± 10"
123+ )
124+
125+ @pytest .mark .asyncio
126+ @pytest .mark .regression
127+ @pytest .mark .api
128+ @pytest .mark .skipif (
129+ not (os .getenv ("BROWSERBASE_API_KEY" ) and os .getenv ("BROWSERBASE_PROJECT_ID" )),
130+ reason = "Browserbase credentials not available"
131+ )
132+ async def test_wichita_browserbase (self , browserbase_stagehand ):
133+ """
134+ Regression test: wichita (Browserbase)
135+
136+ Same test as local but running in Browserbase environment.
137+ """
138+ stagehand = browserbase_stagehand
139+
140+ await stagehand .page .goto ("https://www.wichitafallstx.gov/Bids.aspx" )
141+
142+ # Click to show closed/awarded/cancelled bids
143+ await stagehand .page .act ('Click on "Show Closed/Awarded/Cancelled bids"' )
144+
145+ # Extract the total number of results using proper Python schema-based extraction
146+ extract_options = ExtractOptions (
147+ instruction = "Extract the total number of bids that the search produced." ,
148+ schema_definition = BidResults
149+ )
150+
151+ result = await stagehand .page .extract (extract_options )
152+
153+ #TODO - how to unify the extract result handling between LOCAL and BROWSERBASE?
154+
155+ # Handle result based on the mode (LOCAL returns data directly, BROWSERBASE returns ExtractResult)
156+ if hasattr (result , 'data' ) and result .data :
157+ # BROWSERBASE mode format
158+ bid_data = BidResults .model_validate (result .data )
159+ total_results = bid_data .total_results
160+ elif hasattr (result , 'total_results' ):
161+ # LOCAL mode format - result is the Pydantic model instance
162+ total_results = result .total_results
163+ else :
164+ # Fallback - try to get total_results from the result directly
165+ total_results = getattr (result , 'total_results' , str (result ))
166+
167+ # Parse the number from the result
168+ expected_number = 405
169+ extracted_number = int ('' .join (filter (str .isdigit , total_results )))
170+
171+ # Check if the number is within expected range (±10)
172+ is_within_range = (
173+ extracted_number >= expected_number - 10 and
174+ extracted_number <= expected_number + 10
175+ )
176+
177+ assert is_within_range , (
178+ f"Total number of results { extracted_number } is not within the expected range "
179+ f"{ expected_number } ± 10"
180+ )
0 commit comments