@@ -21,7 +21,7 @@ def setUp(self):
2121 # Create a test evaluation file
2222 self .test_eval_file = tempfile .NamedTemporaryFile (mode = "w" , suffix = ".py" , delete = False )
2323
24- # Write test evaluation functions
24+ # Write test evaluation functions with shorter sleep times for faster tests
2525 self .test_eval_file .write (
2626 """
2727import time
@@ -32,12 +32,12 @@ def evaluate(program_path):
3232 code = f.read()
3333
3434 if 'SLEEP_LONG' in code:
35- # Sleep for a long time to trigger timeout
36- time.sleep(30 )
35+ # Sleep for a long time to trigger timeout (reduced for faster tests)
36+ time.sleep(8 )
3737 return {"score": 1.0}
3838 elif 'SLEEP_SHORT' in code:
3939 # Sleep for a short time that should not timeout
40- time.sleep(1 )
40+ time.sleep(0.5 )
4141 return {"score": 0.8}
4242 elif 'RAISE_ERROR' in code:
4343 # Raise an error to trigger retries
@@ -51,7 +51,7 @@ def evaluate_stage1(program_path):
5151 code = f.read()
5252
5353 if 'STAGE1_TIMEOUT' in code:
54- time.sleep(30 )
54+ time.sleep(8 )
5555 return {"stage1_score": 1.0}
5656 else:
5757 return {"stage1_score": 0.7}
@@ -61,7 +61,7 @@ def evaluate_stage2(program_path):
6161 code = f.read()
6262
6363 if 'STAGE2_TIMEOUT' in code:
64- time.sleep(30 )
64+ time.sleep(8 )
6565 return {"stage2_score": 1.0}
6666 else:
6767 return {"stage2_score": 0.8}
@@ -71,7 +71,7 @@ def evaluate_stage3(program_path):
7171 code = f.read()
7272
7373 if 'STAGE3_TIMEOUT' in code:
74- time.sleep(30 )
74+ time.sleep(8 )
7575 return {"stage3_score": 1.0}
7676 else:
7777 return {"stage3_score": 0.9}
@@ -84,8 +84,8 @@ def tearDown(self):
8484 if os .path .exists (self .test_eval_file .name ):
8585 os .unlink (self .test_eval_file .name )
8686
87- def _create_evaluator (self , timeout = 5 , cascade_evaluation = False ):
88- """Helper to create evaluator with given settings"""
87+ def _create_evaluator (self , timeout = 3 , cascade_evaluation = False ):
88+ """Helper to create evaluator with given settings (shorter timeout for faster tests) """
8989 config = EvaluatorConfig ()
9090 config .timeout = timeout
9191 config .max_retries = 1 # Minimal retries for faster testing
@@ -103,7 +103,7 @@ def test_fast_evaluation_completes(self):
103103 """Test that fast evaluations complete successfully"""
104104
105105 async def run_test ():
106- evaluator = self ._create_evaluator (timeout = 5 )
106+ evaluator = self ._create_evaluator (timeout = 3 )
107107 program_code = "def test(): return 'fast'"
108108 start_time = time .time ()
109109
@@ -112,7 +112,7 @@ async def run_test():
112112 elapsed_time = time .time () - start_time
113113
114114 # Should complete quickly
115- self .assertLess (elapsed_time , 3 .0 )
115+ self .assertLess (elapsed_time , 2 .0 )
116116 # Should return successful result
117117 self .assertIn ("score" , result )
118118 self .assertEqual (result ["score" ], 0.5 )
@@ -126,7 +126,7 @@ def test_short_evaluation_completes(self):
126126 """Test that evaluations shorter than timeout complete successfully"""
127127
128128 async def run_test ():
129- evaluator = self ._create_evaluator (timeout = 5 )
129+ evaluator = self ._create_evaluator (timeout = 3 )
130130 program_code = "# SLEEP_SHORT\n def test(): return 'short'"
131131 start_time = time .time ()
132132
@@ -135,7 +135,7 @@ async def run_test():
135135 elapsed_time = time .time () - start_time
136136
137137 # Should complete within timeout
138- self .assertLess (elapsed_time , 5 )
138+ self .assertLess (elapsed_time , 3 )
139139 # Should return successful result
140140 self .assertIn ("score" , result )
141141 self .assertEqual (result ["score" ], 0.8 )
@@ -149,7 +149,7 @@ def test_long_evaluation_times_out(self):
149149 """Test that long evaluations time out properly"""
150150
151151 async def run_test ():
152- evaluator = self ._create_evaluator (timeout = 5 )
152+ evaluator = self ._create_evaluator (timeout = 3 )
153153 program_code = "# SLEEP_LONG\n def test(): return 'long'"
154154 start_time = time .time ()
155155
@@ -158,8 +158,8 @@ async def run_test():
158158 elapsed_time = time .time () - start_time
159159
160160 # Should complete around the timeout period (allowing some margin)
161- self .assertGreater (elapsed_time , 4 )
162- self .assertLess (elapsed_time , 8 )
161+ self .assertGreater (elapsed_time , 2.5 )
162+ self .assertLess (elapsed_time , 5 )
163163
164164 # Should return timeout result
165165 self .assertIn ("error" , result )
@@ -173,7 +173,7 @@ def test_cascade_evaluation_timeout_stage1(self):
173173 """Test timeout in cascade evaluation stage 1"""
174174
175175 async def run_test ():
176- evaluator = self ._create_evaluator (timeout = 5 , cascade_evaluation = True )
176+ evaluator = self ._create_evaluator (timeout = 3 , cascade_evaluation = True )
177177 program_code = "# STAGE1_TIMEOUT\n def test(): return 'stage1_timeout'"
178178 start_time = time .time ()
179179
@@ -182,8 +182,8 @@ async def run_test():
182182 elapsed_time = time .time () - start_time
183183
184184 # Should timeout around the configured timeout
185- self .assertGreater (elapsed_time , 4 )
186- self .assertLess (elapsed_time , 8 )
185+ self .assertGreater (elapsed_time , 2.5 )
186+ self .assertLess (elapsed_time , 5 )
187187
188188 # Should return stage1 timeout result
189189 self .assertIn ("stage1_passed" , result )
@@ -197,7 +197,7 @@ def test_cascade_evaluation_timeout_stage2(self):
197197 """Test timeout in cascade evaluation stage 2"""
198198
199199 async def run_test ():
200- evaluator = self ._create_evaluator (timeout = 5 , cascade_evaluation = True )
200+ evaluator = self ._create_evaluator (timeout = 3 , cascade_evaluation = True )
201201 program_code = "# STAGE2_TIMEOUT\n def test(): return 'stage2_timeout'"
202202 start_time = time .time ()
203203
@@ -206,8 +206,8 @@ async def run_test():
206206 elapsed_time = time .time () - start_time
207207
208208 # Should timeout on stage 2, but stage 1 should complete first
209- self .assertGreater (elapsed_time , 4 )
210- self .assertLess (elapsed_time , 8 )
209+ self .assertGreater (elapsed_time , 2.5 )
210+ self .assertLess (elapsed_time , 5 )
211211
212212 # Should have stage1 result but stage2 timeout
213213 self .assertIn ("stage1_score" , result )
@@ -223,7 +223,7 @@ def test_cascade_evaluation_timeout_stage3(self):
223223 """Test timeout in cascade evaluation stage 3"""
224224
225225 async def run_test ():
226- evaluator = self ._create_evaluator (timeout = 5 , cascade_evaluation = True )
226+ evaluator = self ._create_evaluator (timeout = 3 , cascade_evaluation = True )
227227 program_code = "# STAGE3_TIMEOUT\n def test(): return 'stage3_timeout'"
228228 start_time = time .time ()
229229
@@ -232,8 +232,8 @@ async def run_test():
232232 elapsed_time = time .time () - start_time
233233
234234 # Should timeout on stage 3, but stages 1 and 2 should complete first
235- self .assertGreater (elapsed_time , 4 )
236- self .assertLess (elapsed_time , 8 )
235+ self .assertGreater (elapsed_time , 2.5 )
236+ self .assertLess (elapsed_time , 5 )
237237
238238 # Should have stage1 and stage2 results but stage3 timeout
239239 self .assertIn ("stage1_score" , result )
@@ -252,7 +252,7 @@ def test_timeout_config_respected(self):
252252
253253 async def run_test ():
254254 # Create evaluator with different timeout
255- evaluator = self ._create_evaluator (timeout = 10 )
255+ evaluator = self ._create_evaluator (timeout = 5 )
256256
257257 program_code = "# SLEEP_LONG\n def test(): return 'long'"
258258 start_time = time .time ()
@@ -261,9 +261,9 @@ async def run_test():
261261
262262 elapsed_time = time .time () - start_time
263263
264- # Should timeout around 10 seconds, not 5
265- self .assertGreater (elapsed_time , 9 )
266- self .assertLess (elapsed_time , 13 )
264+ # Should timeout around 5 seconds, not 3
265+ self .assertGreater (elapsed_time , 4.5 )
266+ self .assertLess (elapsed_time , 7 )
267267
268268 # Should return timeout result
269269 self .assertIn ("timeout" , result )
@@ -277,7 +277,7 @@ def test_multiple_retries_with_errors(self):
277277 async def run_test ():
278278 # Create evaluator with more retries
279279 config = EvaluatorConfig ()
280- config .timeout = 10 # Long timeout to avoid timeout during this test
280+ config .timeout = 8 # Long timeout to avoid timeout during this test
281281 config .max_retries = 2 # 3 total attempts
282282 config .cascade_evaluation = False
283283
@@ -300,7 +300,7 @@ async def run_test():
300300 # Each attempt should fail quickly, plus 1 second sleep between retries
301301 # So total time should be around 2-3 seconds (quick failures + 2 sleep periods)
302302 self .assertGreater (elapsed_time , 1.8 ) # At least 2 sleep periods
303- self .assertLess (elapsed_time , 5 ) # But not too long
303+ self .assertLess (elapsed_time , 4 ) # But not too long
304304
305305 # Should return error result after all retries fail
306306 self .assertIn ("error" , result )
@@ -314,7 +314,7 @@ def test_timeout_does_not_trigger_retries(self):
314314 async def run_test ():
315315 # Create evaluator with retries enabled
316316 config = EvaluatorConfig ()
317- config .timeout = 3 # Short timeout
317+ config .timeout = 2 # Short timeout
318318 config .max_retries = 2 # Would allow 3 attempts if retries were triggered
319319 config .cascade_evaluation = False
320320
@@ -333,10 +333,10 @@ async def run_test():
333333
334334 elapsed_time = time .time () - start_time
335335
336- # Should timeout only once (~3 seconds), not retry multiple times
337- # If retries were happening, this would take ~9 seconds
338- self .assertGreater (elapsed_time , 2.5 ) # At least the timeout period
339- self .assertLess (elapsed_time , 5 ) # But not multiple timeout periods
336+ # Should timeout only once (~2 seconds), not retry multiple times
337+ # If retries were happening, this would take ~6 seconds
338+ self .assertGreater (elapsed_time , 1.8 ) # At least the timeout period
339+ self .assertLess (elapsed_time , 3. 5 ) # But not multiple timeout periods
340340
341341 # Should return timeout result
342342 self .assertIn ("timeout" , result )
@@ -350,7 +350,7 @@ def test_artifacts_on_timeout(self):
350350 async def run_test ():
351351 # Enable artifacts
352352 with patch .dict (os .environ , {"ENABLE_ARTIFACTS" : "true" }):
353- evaluator = self ._create_evaluator (timeout = 5 )
353+ evaluator = self ._create_evaluator (timeout = 3 )
354354 program_code = "# SLEEP_LONG\n def test(): return 'long'"
355355
356356 # Execute evaluation
@@ -382,7 +382,7 @@ async def run_test():
382382 "timeout_duration" , artifacts , "Artifacts should contain timeout_duration"
383383 )
384384 self .assertEqual (
385- artifacts ["timeout_duration" ], 5 , "timeout_duration should match config"
385+ artifacts ["timeout_duration" ], 3 , "timeout_duration should match config"
386386 )
387387
388388 print (f"✅ Artifacts captured correctly: { list (artifacts .keys ())} " )
@@ -405,8 +405,8 @@ async def run_test():
405405import time
406406
407407def evaluate(program_path):
408- # Simulate a very long evaluation (like the 11-hour case)
409- time.sleep(20 ) # 20 seconds to test timeout
408+ # Simulate a very long evaluation (like the 11-hour case)
409+ time.sleep(6 ) # 6 seconds to test timeout (reduced for faster tests)
410410 return {"accReturn": 0.1, "CalmarRatio": 0.9, "combined_score": 0.82}
411411"""
412412 )
@@ -415,7 +415,7 @@ def evaluate(program_path):
415415 try :
416416 # Configure like user's config but with shorter timeout for testing
417417 config = EvaluatorConfig ()
418- config .timeout = 5 # 5 seconds instead of 600
418+ config .timeout = 3 # 3 seconds instead of 600
419419 config .max_retries = 1
420420 config .cascade_evaluation = False
421421 config .parallel_evaluations = 1
@@ -438,9 +438,9 @@ def search_algorithm():
438438 result = await evaluator .evaluate_program (program_code , "financial_test" )
439439 elapsed_time = time .time () - start_time
440440
441- # Should timeout in ~5 seconds, not 20 + seconds
442- self .assertLess (elapsed_time , 8 )
443- self .assertGreater (elapsed_time , 4 )
441+ # Should timeout in ~3 seconds, not 6 + seconds
442+ self .assertLess (elapsed_time , 5 )
443+ self .assertGreater (elapsed_time , 2.5 )
444444
445445 # Should return timeout error
446446 self .assertIn ("error" , result )
0 commit comments