1111from openevolve .evaluation_result import EvaluationResult
1212
1313
14- class TestCascadeValidation (unittest .TestCase ):
14+ class TestCascadeValidation (unittest .IsolatedAsyncioTestCase ):
1515 """Tests for cascade evaluation configuration validation"""
1616
1717 def setUp (self ):
@@ -23,10 +23,9 @@ def setUp(self):
2323
2424 def tearDown (self ):
2525 """Clean up temporary files"""
26- # Clean up temp files
27- for file in os .listdir (self .temp_dir ):
28- os .remove (os .path .join (self .temp_dir , file ))
29- os .rmdir (self .temp_dir )
26+ # Clean up temp files more safely
27+ import shutil
28+ shutil .rmtree (self .temp_dir , ignore_errors = True )
3029
3130 def _create_evaluator_file (self , filename : str , content : str ) -> str :
3231 """Helper to create temporary evaluator file"""
@@ -59,7 +58,7 @@ def evaluate(program_path):
5958
6059 # Should not raise warnings for valid cascade evaluator
6160 with patch ('openevolve.evaluator.logger' ) as mock_logger :
62- evaluator = Evaluator (self .config .evaluator , None )
61+ evaluator = Evaluator (self .config .evaluator , evaluator_path )
6362
6463 # Should not have called warning
6564 mock_logger .warning .assert_not_called ()
@@ -79,7 +78,7 @@ def evaluate(program_path):
7978
8079 # Should warn about missing cascade functions
8180 with patch ('openevolve.evaluator.logger' ) as mock_logger :
82- evaluator = Evaluator (self .config .evaluator , None )
81+ evaluator = Evaluator (self .config .evaluator , evaluator_path )
8382
8483 # Should have warned about missing stage functions
8584 mock_logger .warning .assert_called ()
@@ -103,12 +102,14 @@ def evaluate(program_path):
103102 self .config .evaluator .cascade_evaluation = True
104103 self .config .evaluator .evaluation_file = evaluator_path
105104
106- # Should not warn since stage1 exists (minimum requirement)
105+ # Should warn about missing additional stages
107106 with patch ('openevolve.evaluator.logger' ) as mock_logger :
108- evaluator = Evaluator (self .config .evaluator , None )
107+ evaluator = Evaluator (self .config .evaluator , evaluator_path )
109108
110- # Should not warn since stage1 exists
111- mock_logger .warning .assert_not_called ()
109+ # Should warn about missing stage2/stage3
110+ mock_logger .warning .assert_called_once ()
111+ warning_call = mock_logger .warning .call_args [0 ][0 ]
112+ self .assertIn ("defines 'evaluate_stage1' but no additional cascade stages" , warning_call )
112113
113114 def test_no_cascade_validation_when_disabled (self ):
114115 """Test no validation when cascade evaluation is disabled"""
@@ -125,12 +126,12 @@ def evaluate(program_path):
125126
126127 # Should not perform validation or warn
127128 with patch ('openevolve.evaluator.logger' ) as mock_logger :
128- evaluator = Evaluator (self .config .evaluator , None )
129+ evaluator = Evaluator (self .config .evaluator , evaluator_path )
129130
130131 # Should not warn when cascade evaluation is disabled
131132 mock_logger .warning .assert_not_called ()
132133
133- def test_direct_evaluate_supports_evaluation_result (self ):
134+ async def test_direct_evaluate_supports_evaluation_result (self ):
134135 """Test that _direct_evaluate supports EvaluationResult returns"""
135136 # Create evaluator that returns EvaluationResult
136137 evaluator_content = '''
@@ -148,27 +149,29 @@ def evaluate(program_path):
148149 self .config .evaluator .evaluation_file = evaluator_path
149150 self .config .evaluator .timeout = 10
150151
151- evaluator = Evaluator (self .config .evaluator , None )
152+ evaluator = Evaluator (self .config .evaluator , evaluator_path )
152153
153154 # Create a dummy program file
154155 program_path = self ._create_evaluator_file ("test_program.py" , "def test(): pass" )
155156
156- # Mock the evaluation process
157- with patch ( 'openevolve.evaluator.run_external_evaluator' ) as mock_run :
158- mock_run . return_value = EvaluationResult (
157+ # Mock the evaluation function
158+ def mock_evaluate ( path ) :
159+ return EvaluationResult (
159160 metrics = {"score" : 0.8 , "accuracy" : 0.9 },
160161 artifacts = {"debug_info" : "test data" }
161162 )
162-
163- # Should handle EvaluationResult without issues
164- result = evaluator ._direct_evaluate (program_path )
165-
166- # Should return the EvaluationResult as-is
167- self .assertIsInstance (result , EvaluationResult )
168- self .assertEqual (result .metrics ["score" ], 0.8 )
169- self .assertEqual (result .artifacts ["debug_info" ], "test data" )
163+
164+ evaluator .evaluate_function = mock_evaluate
165+
166+ # Should handle EvaluationResult without issues
167+ result = await evaluator ._direct_evaluate (program_path )
168+
169+ # Should return the EvaluationResult as-is
170+ self .assertIsInstance (result , EvaluationResult )
171+ self .assertEqual (result .metrics ["score" ], 0.8 )
172+ self .assertEqual (result .artifacts ["debug_info" ], "test data" )
170173
171- def test_direct_evaluate_supports_dict_result (self ):
174+ async def test_direct_evaluate_supports_dict_result (self ):
172175 """Test that _direct_evaluate still supports dict returns"""
173176 # Create evaluator that returns dict
174177 evaluator_content = '''
@@ -181,31 +184,36 @@ def evaluate(program_path):
181184 self .config .evaluator .evaluation_file = evaluator_path
182185 self .config .evaluator .timeout = 10
183186
184- evaluator = Evaluator (self .config .evaluator , None )
187+ evaluator = Evaluator (self .config .evaluator , evaluator_path )
185188
186189 # Create a dummy program file
187190 program_path = self ._create_evaluator_file ("test_program.py" , "def test(): pass" )
188191
189- # Mock the evaluation process
190- with patch ('openevolve.evaluator.run_external_evaluator' ) as mock_run :
191- mock_run .return_value = {"score" : 0.7 , "performance" : 0.85 }
192-
193- # Should handle dict result without issues
194- result = evaluator ._direct_evaluate (program_path )
195-
196- # Should return the dict as-is
197- self .assertIsInstance (result , dict )
198- self .assertEqual (result ["score" ], 0.7 )
199- self .assertEqual (result ["performance" ], 0.85 )
192+ # Mock the evaluation function directly
193+ def mock_evaluate (path ):
194+ return {"score" : 0.7 , "performance" : 0.85 }
195+
196+ evaluator .evaluate_function = mock_evaluate
197+
198+ # Should handle dict result without issues
199+ result = await evaluator ._direct_evaluate (program_path )
200+
201+ # Should return the dict as-is
202+ self .assertIsInstance (result , dict )
203+ self .assertEqual (result ["score" ], 0.7 )
204+ self .assertEqual (result ["performance" ], 0.85 )
200205
201206 def test_cascade_validation_with_class_based_evaluator (self ):
202207 """Test cascade validation with class-based evaluator"""
203- # Create class-based evaluator
208+ # Create class-based evaluator with all stages
204209 evaluator_content = '''
205210class Evaluator:
206211 def evaluate_stage1(self, program_path):
207212 return {"stage1_score": 0.5}
208213
214+ def evaluate_stage2(self, program_path):
215+ return {"stage2_score": 0.7}
216+
209217 def evaluate(self, program_path):
210218 return {"score": 0.5}
211219
@@ -214,6 +222,10 @@ def evaluate_stage1(program_path):
214222 evaluator = Evaluator()
215223 return evaluator.evaluate_stage1(program_path)
216224
225+ def evaluate_stage2(program_path):
226+ evaluator = Evaluator()
227+ return evaluator.evaluate_stage2(program_path)
228+
217229def evaluate(program_path):
218230 evaluator = Evaluator()
219231 return evaluator.evaluate(program_path)
@@ -226,7 +238,7 @@ def evaluate(program_path):
226238
227239 # Should not warn since module-level functions exist
228240 with patch ('openevolve.evaluator.logger' ) as mock_logger :
229- evaluator = Evaluator (self .config .evaluator , None )
241+ evaluator = Evaluator (self .config .evaluator , evaluator_path )
230242
231243 mock_logger .warning .assert_not_called ()
232244
@@ -243,58 +255,65 @@ def evaluate_stage1(program_path) # Missing colon
243255 self .config .evaluator .cascade_evaluation = True
244256 self .config .evaluator .evaluation_file = evaluator_path
245257
246- # Should handle syntax error and still warn about cascade
247- with patch ('openevolve.evaluator.logger' ) as mock_logger :
248- evaluator = Evaluator (self .config .evaluator , None )
249-
250- # Should have warned about missing functions (due to import failure)
251- mock_logger .warning .assert_called ()
258+ # Should raise an error due to syntax error
259+ with self .assertRaises (Exception ): # Could be SyntaxError or other import error
260+ evaluator = Evaluator (self .config .evaluator , evaluator_path )
252261
253262 def test_cascade_validation_nonexistent_file (self ):
254263 """Test cascade validation with nonexistent evaluator file"""
255264 # Configure with nonexistent file
265+ nonexistent_path = "/nonexistent/path.py"
256266 self .config .evaluator .cascade_evaluation = True
257- self .config .evaluator .evaluation_file = "/nonexistent/path.py"
267+ self .config .evaluator .evaluation_file = nonexistent_path
258268
259- # Should handle missing file gracefully
260- with patch ('openevolve.evaluator.logger' ) as mock_logger :
261- evaluator = Evaluator (self .config .evaluator , None )
262-
263- # Should have warned about missing functions (due to import failure)
264- mock_logger .warning .assert_called ()
269+ # Should raise ValueError for missing file
270+ with self .assertRaises (ValueError ) as context :
271+ evaluator = Evaluator (self .config .evaluator , nonexistent_path )
272+
273+ self .assertIn ("not found" , str (context .exception ))
265274
266275 def test_process_evaluation_result_with_artifacts (self ):
267276 """Test that _process_evaluation_result handles artifacts correctly"""
268- evaluator_path = self ._create_evaluator_file ("dummy.py" , "def evaluate(p): pass" )
277+ evaluator_content = '''
278+ def evaluate(program_path):
279+ return {"score": 0.5}
280+ '''
281+ evaluator_path = self ._create_evaluator_file ("dummy.py" , evaluator_content )
269282
283+ self .config .evaluator .cascade_evaluation = False # Disable cascade to avoid warnings
270284 self .config .evaluator .evaluation_file = evaluator_path
271- evaluator = Evaluator (self .config .evaluator , None )
285+ evaluator = Evaluator (self .config .evaluator , evaluator_path )
272286
273287 # Test with EvaluationResult containing artifacts
274288 eval_result = EvaluationResult (
275289 metrics = {"score" : 0.9 },
276290 artifacts = {"log" : "test log" , "data" : [1 , 2 , 3 ]}
277291 )
278292
279- metrics , artifacts = evaluator ._process_evaluation_result (eval_result )
293+ result = evaluator ._process_evaluation_result (eval_result )
280294
281- self .assertEqual (metrics , {"score" : 0.9 })
282- self .assertEqual (artifacts , {"log" : "test log" , "data" : [1 , 2 , 3 ]})
295+ self .assertEqual (result . metrics , {"score" : 0.9 })
296+ self .assertEqual (result . artifacts , {"log" : "test log" , "data" : [1 , 2 , 3 ]})
283297
284298 def test_process_evaluation_result_with_dict (self ):
285299 """Test that _process_evaluation_result handles dict results correctly"""
286- evaluator_path = self ._create_evaluator_file ("dummy.py" , "def evaluate(p): pass" )
300+ evaluator_content = '''
301+ def evaluate(program_path):
302+ return {"score": 0.5}
303+ '''
304+ evaluator_path = self ._create_evaluator_file ("dummy.py" , evaluator_content )
287305
306+ self .config .evaluator .cascade_evaluation = False # Disable cascade to avoid warnings
288307 self .config .evaluator .evaluation_file = evaluator_path
289- evaluator = Evaluator (self .config .evaluator , None )
308+ evaluator = Evaluator (self .config .evaluator , evaluator_path )
290309
291310 # Test with dict result
292311 dict_result = {"score" : 0.7 , "accuracy" : 0.8 }
293312
294- metrics , artifacts = evaluator ._process_evaluation_result (dict_result )
313+ result = evaluator ._process_evaluation_result (dict_result )
295314
296- self .assertEqual (metrics , {"score" : 0.7 , "accuracy" : 0.8 })
297- self .assertEqual (artifacts , {})
315+ self .assertEqual (result . metrics , {"score" : 0.7 , "accuracy" : 0.8 })
316+ self .assertEqual (result . artifacts , {})
298317
299318
300319if __name__ == "__main__" :
0 commit comments