1
+ """
2
+ Regression test for wichita functionality.
3
+
4
+ This test verifies that combination actions (act + extract) work correctly,
5
+ based on the TypeScript wichita evaluation.
6
+ """
7
+
8
+ import os
9
+ import pytest
10
+ import pytest_asyncio
11
+ from pydantic import BaseModel , Field
12
+
13
+ from stagehand import Stagehand , StagehandConfig
14
+ from stagehand .schemas import ExtractOptions
15
+
16
+
17
+ class BidResults (BaseModel ):
18
+ """Schema for bid results extraction"""
19
+ total_results : str = Field (..., description = "The total number of bids that the search produced" )
20
+
21
+
22
+ class TestWichita :
23
+ """Regression test for wichita functionality"""
24
+
25
+ @pytest .fixture (scope = "class" )
26
+ def local_config (self ):
27
+ """Configuration for LOCAL mode testing"""
28
+ return StagehandConfig (
29
+ env = "LOCAL" ,
30
+ model_name = "gpt-4o-mini" ,
31
+ headless = True ,
32
+ verbose = 1 ,
33
+ dom_settle_timeout_ms = 2000 ,
34
+ model_client_options = {"apiKey" : os .getenv ("MODEL_API_KEY" ) or os .getenv ("OPENAI_API_KEY" )},
35
+ )
36
+
37
+ @pytest .fixture (scope = "class" )
38
+ def browserbase_config (self ):
39
+ """Configuration for BROWSERBASE mode testing"""
40
+ return StagehandConfig (
41
+ env = "BROWSERBASE" ,
42
+ api_key = os .getenv ("BROWSERBASE_API_KEY" ),
43
+ project_id = os .getenv ("BROWSERBASE_PROJECT_ID" ),
44
+ model_name = "gpt-4o" ,
45
+ headless = False ,
46
+ verbose = 2 ,
47
+ model_client_options = {"apiKey" : os .getenv ("MODEL_API_KEY" ) or os .getenv ("OPENAI_API_KEY" )},
48
+ )
49
+
50
+ @pytest_asyncio .fixture
51
+ async def local_stagehand (self , local_config ):
52
+ """Create a Stagehand instance for LOCAL testing"""
53
+ stagehand = Stagehand (config = local_config )
54
+ await stagehand .init ()
55
+ yield stagehand
56
+ await stagehand .close ()
57
+
58
+ @pytest_asyncio .fixture
59
+ async def browserbase_stagehand (self , browserbase_config ):
60
+ """Create a Stagehand instance for BROWSERBASE testing"""
61
+ if not (os .getenv ("BROWSERBASE_API_KEY" ) and os .getenv ("BROWSERBASE_PROJECT_ID" )):
62
+ pytest .skip ("Browserbase credentials not available" )
63
+
64
+ stagehand = Stagehand (config = browserbase_config )
65
+ await stagehand .init ()
66
+ yield stagehand
67
+ await stagehand .close ()
68
+
69
+ @pytest .mark .asyncio
70
+ @pytest .mark .regression
71
+ @pytest .mark .local
72
+ async def test_wichita_local (self , local_stagehand ):
73
+ """
74
+ Regression test: wichita
75
+
76
+ Mirrors the TypeScript wichita evaluation:
77
+ - Navigate to Wichita Falls TX government bids page
78
+ - Click on "Show Closed/Awarded/Cancelled bids"
79
+ - Extract the total number of bids
80
+ - Verify the count is within expected range (405 ± 10)
81
+ """
82
+ stagehand = local_stagehand
83
+
84
+ await stagehand .page .goto ("https://www.wichitafallstx.gov/Bids.aspx" )
85
+
86
+ # Click to show closed/awarded/cancelled bids
87
+ await stagehand .page .act ('Click on "Show Closed/Awarded/Cancelled bids"' )
88
+
89
+ # Extract the total number of results using proper Python schema-based extraction
90
+ extract_options = ExtractOptions (
91
+ instruction = "Extract the total number of bids that the search produced." ,
92
+ schema_definition = BidResults
93
+ )
94
+
95
+ result = await stagehand .page .extract (extract_options )
96
+ #TODO - how to unify the extract result handling between LOCAL and BROWSERBASE?
97
+
98
+ # Handle result based on the mode (LOCAL returns data directly, BROWSERBASE returns ExtractResult)
99
+ if hasattr (result , 'data' ) and result .data :
100
+ # BROWSERBASE mode format
101
+ bid_data = BidResults .model_validate (result .data )
102
+ total_results = bid_data .total_results
103
+ elif hasattr (result , 'total_results' ):
104
+ # LOCAL mode format - result is the Pydantic model instance
105
+ total_results = result .total_results
106
+ else :
107
+ # Fallback - try to get total_results from the result directly
108
+ total_results = getattr (result , 'total_results' , str (result ))
109
+
110
+ # Parse the number from the result
111
+ expected_number = 405
112
+ extracted_number = int ('' .join (filter (str .isdigit , total_results )))
113
+
114
+ # Check if the number is within expected range (±10)
115
+ is_within_range = (
116
+ extracted_number >= expected_number - 10 and
117
+ extracted_number <= expected_number + 10
118
+ )
119
+
120
+ assert is_within_range , (
121
+ f"Total number of results { extracted_number } is not within the expected range "
122
+ f"{ expected_number } ± 10"
123
+ )
124
+
125
+ @pytest .mark .asyncio
126
+ @pytest .mark .regression
127
+ @pytest .mark .api
128
+ @pytest .mark .skipif (
129
+ not (os .getenv ("BROWSERBASE_API_KEY" ) and os .getenv ("BROWSERBASE_PROJECT_ID" )),
130
+ reason = "Browserbase credentials not available"
131
+ )
132
+ async def test_wichita_browserbase (self , browserbase_stagehand ):
133
+ """
134
+ Regression test: wichita (Browserbase)
135
+
136
+ Same test as local but running in Browserbase environment.
137
+ """
138
+ stagehand = browserbase_stagehand
139
+
140
+ await stagehand .page .goto ("https://www.wichitafallstx.gov/Bids.aspx" )
141
+
142
+ # Click to show closed/awarded/cancelled bids
143
+ await stagehand .page .act ('Click on "Show Closed/Awarded/Cancelled bids"' )
144
+
145
+ # Extract the total number of results using proper Python schema-based extraction
146
+ extract_options = ExtractOptions (
147
+ instruction = "Extract the total number of bids that the search produced." ,
148
+ schema_definition = BidResults
149
+ )
150
+
151
+ result = await stagehand .page .extract (extract_options )
152
+
153
+ #TODO - how to unify the extract result handling between LOCAL and BROWSERBASE?
154
+
155
+ # Handle result based on the mode (LOCAL returns data directly, BROWSERBASE returns ExtractResult)
156
+ if hasattr (result , 'data' ) and result .data :
157
+ # BROWSERBASE mode format
158
+ bid_data = BidResults .model_validate (result .data )
159
+ total_results = bid_data .total_results
160
+ elif hasattr (result , 'total_results' ):
161
+ # LOCAL mode format - result is the Pydantic model instance
162
+ total_results = result .total_results
163
+ else :
164
+ # Fallback - try to get total_results from the result directly
165
+ total_results = getattr (result , 'total_results' , str (result ))
166
+
167
+ # Parse the number from the result
168
+ expected_number = 405
169
+ extracted_number = int ('' .join (filter (str .isdigit , total_results )))
170
+
171
+ # Check if the number is within expected range (±10)
172
+ is_within_range = (
173
+ extracted_number >= expected_number - 10 and
174
+ extracted_number <= expected_number + 10
175
+ )
176
+
177
+ assert is_within_range , (
178
+ f"Total number of results { extracted_number } is not within the expected range "
179
+ f"{ expected_number } ± 10"
180
+ )
0 commit comments