algorithmicsuperintelligence
diff --git a/‎openevolve/process_parallel.py‎
Lines changed: 12 additions & 2 deletions b/‎openevolve/process_parallel.py‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎tests/test_checkpoint_resume.py‎
Lines changed: 3 additions & 1 deletion b/‎tests/test_checkpoint_resume.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎tests/test_feature_stats_persistence.py‎
Lines changed: 59 additions & 78 deletions b/‎tests/test_feature_stats_persistence.py‎
Lines changed: 59 additions & 78 deletions
@@ -8,7 +8,7 @@
 import pickle
 import signal
 import time
-from concurrent.futures import ProcessPoolExecutor, Future
+from concurrent.futures import ProcessPoolExecutor, Future, TimeoutError as FutureTimeoutError
 from dataclasses import dataclass, asdict
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
@@ -454,7 +454,9 @@ async def run_evolution(
             future = pending_futures.pop(completed_iteration)
 
             try:
-                result = future.result()
+                # Use evaluator timeout + buffer to gracefully handle stuck processes
+                timeout_seconds = self.config.evaluator.timeout + 30
+                result = future.result(timeout=timeout_seconds)
 
                 if result.error:
                     logger.warning(f"Iteration {completed_iteration} error: {result.error}")
@@ -612,6 +614,14 @@ async def run_evolution(
                                 )
                                 break
 
+            except FutureTimeoutError:
+                logger.error(
+                    f"⏰ Iteration {completed_iteration} timed out after {timeout_seconds}s "
+                    f"(evaluator timeout: {self.config.evaluator.timeout}s + 30s buffer). "
+                    f"Canceling future and continuing with next iteration."
+                )
+                # Cancel the future to clean up the process
+                future.cancel()
             except Exception as e:
                 logger.error(f"Error processing result from iteration {completed_iteration}: {e}")
 
 
@@ -308,7 +308,9 @@ async def run_test():
                 )
 
                 # Mock the parallel controller to avoid API calls
-                with patch("openevolve.controller.ProcessParallelController") as mock_parallel_class:
+                with patch(
+                    "openevolve.controller.ProcessParallelController"
+                ) as mock_parallel_class:
                     mock_parallel = MagicMock()
                     mock_parallel.run_evolution = AsyncMock(return_value=None)
                     mock_parallel.start = MagicMock()
 
@@ -22,7 +22,7 @@ def setUp(self):
         self.config = DatabaseConfig(
             db_path=self.test_dir,
             feature_dimensions=["score", "custom_metric1", "custom_metric2"],
-            feature_bins=10
+            feature_bins=10,
         )
 
     def tearDown(self):
@@ -33,7 +33,7 @@ def test_feature_stats_saved_and_loaded(self):
         """Test that feature_stats are correctly saved and loaded from checkpoints"""
         # Create database and add programs to build feature_stats
         db1 = ProgramDatabase(self.config)
-        
+
         programs = []
         for i in range(5):
             program = Program(
@@ -42,8 +42,8 @@ def test_feature_stats_saved_and_loaded(self):
                 metrics={
                     "combined_score": 0.1 + i * 0.2,
                     "custom_metric1": 10 + i * 20,
-                    "custom_metric2": 100 + i * 50
-                }
+                    "custom_metric2": 100 + i * 50,
+                },
             )
             programs.append(program)
             db1.add(program)
@@ -52,14 +52,10 @@ def test_feature_stats_saved_and_loaded(self):
         self.assertIn("score", db1.feature_stats)
         self.assertIn("custom_metric1", db1.feature_stats)
         self.assertIn("custom_metric2", db1.feature_stats)
-        
+
         # Store original feature_stats for comparison
         original_stats = {
-            dim: {
-                "min": stats["min"],
-                "max": stats["max"],
-                "values": stats["values"].copy()
-            }
+            dim: {"min": stats["min"], "max": stats["max"], "values": stats["values"].copy()}
             for dim, stats in db1.feature_stats.items()
         }
 
@@ -72,33 +68,33 @@ def test_feature_stats_saved_and_loaded(self):
 
         # Verify feature_stats were loaded correctly
         self.assertEqual(len(db2.feature_stats), len(original_stats))
-        
+
         for dim, original in original_stats.items():
             self.assertIn(dim, db2.feature_stats)
             loaded = db2.feature_stats[dim]
-            
+
             self.assertAlmostEqual(loaded["min"], original["min"], places=5)
             self.assertAlmostEqual(loaded["max"], original["max"], places=5)
             self.assertEqual(loaded["values"], original["values"])
 
     def test_empty_feature_stats_handling(self):
         """Test handling of empty feature_stats"""
         db1 = ProgramDatabase(self.config)
-        
+
         # Save without any programs (empty feature_stats)
         db1.save(self.test_dir, iteration=1)
-        
+
         # Load and verify
         db2 = ProgramDatabase(self.config)
         db2.load(self.test_dir)
-        
+
         self.assertEqual(db2.feature_stats, {})
 
     def test_backward_compatibility_missing_feature_stats(self):
         """Test loading checkpoints that don't have feature_stats (backward compatibility)"""
         # Create a checkpoint manually without feature_stats
         os.makedirs(self.test_dir, exist_ok=True)
-        
+
         # Create metadata without feature_stats (simulating old checkpoint)
         metadata = {
             "feature_map": {},
@@ -112,60 +108,48 @@ def test_backward_compatibility_missing_feature_stats(self):
             "last_migration_generation": 0,
             # Note: no "feature_stats" key
         }
-        
+
         with open(os.path.join(self.test_dir, "metadata.json"), "w") as f:
             json.dump(metadata, f)
-        
+
         # Load should work without errors
         db = ProgramDatabase(self.config)
         db.load(self.test_dir)
-        
+
         # feature_stats should be empty but not None
         self.assertEqual(db.feature_stats, {})
 
     def test_feature_stats_serialization_edge_cases(self):
         """Test feature_stats serialization handles edge cases correctly"""
         db = ProgramDatabase(self.config)
-        
+
         # Test with various edge cases
         db.feature_stats = {
-            "normal_case": {
-                "min": 1.0,
-                "max": 10.0,
-                "values": [1.0, 5.0, 10.0]
-            },
-            "single_value": {
-                "min": 5.0,
-                "max": 5.0,
-                "values": [5.0]
-            },
+            "normal_case": {"min": 1.0, "max": 10.0, "values": [1.0, 5.0, 10.0]},
+            "single_value": {"min": 5.0, "max": 5.0, "values": [5.0]},
             "large_values_list": {
                 "min": 0.0,
                 "max": 200.0,
-                "values": list(range(200))  # Should be truncated to 100
+                "values": list(range(200)),  # Should be truncated to 100
             },
-            "empty_values": {
-                "min": 0.0,
-                "max": 1.0,
-                "values": []
-            }
+            "empty_values": {"min": 0.0, "max": 1.0, "values": []},
         }
-        
+
         # Test serialization
         serialized = db._serialize_feature_stats()
-        
+
         # Check that large values list was truncated
         self.assertLessEqual(len(serialized["large_values_list"]["values"]), 100)
-        
+
         # Test deserialization
         deserialized = db._deserialize_feature_stats(serialized)
-        
+
         # Verify structure is maintained
         self.assertIn("normal_case", deserialized)
         self.assertIn("single_value", deserialized)
         self.assertIn("large_values_list", deserialized)
         self.assertIn("empty_values", deserialized)
-        
+
         # Verify types are correct
         for dim, stats in deserialized.items():
             self.assertIsInstance(stats["min"], float)
@@ -176,29 +160,26 @@ def test_feature_stats_preservation_during_load(self):
         """Test that feature_stats ranges are preserved when loading from checkpoint"""
         # Create database with programs
         db1 = ProgramDatabase(self.config)
-        
+
         test_programs = []
-        
+
         for i in range(3):
             program = Program(
                 id=f"stats_test_{i}",
                 code=f"# Stats test {i}",
                 metrics={
                     "combined_score": 0.2 + i * 0.3,
                     "custom_metric1": 20 + i * 30,
-                    "custom_metric2": 200 + i * 100
-                }
+                    "custom_metric2": 200 + i * 100,
+                },
             )
             test_programs.append(program)
             db1.add(program)
 
         # Record original feature ranges
         original_ranges = {}
         for dim, stats in db1.feature_stats.items():
-            original_ranges[dim] = {
-                "min": stats["min"],
-                "max": stats["max"]
-            }
+            original_ranges[dim] = {"min": stats["min"], "max": stats["max"]}
 
         # Save checkpoint
         db1.save(self.test_dir, iteration=50)
@@ -211,31 +192,35 @@ def test_feature_stats_preservation_during_load(self):
         for dim, original_range in original_ranges.items():
             self.assertIn(dim, db2.feature_stats)
             loaded_stats = db2.feature_stats[dim]
-            
+
             self.assertAlmostEqual(
-                loaded_stats["min"], original_range["min"], places=5,
-                msg=f"Min value changed for {dim}: {original_range['min']} -> {loaded_stats['min']}"
+                loaded_stats["min"],
+                original_range["min"],
+                places=5,
+                msg=f"Min value changed for {dim}: {original_range['min']} -> {loaded_stats['min']}",
             )
             self.assertAlmostEqual(
-                loaded_stats["max"], original_range["max"], places=5,
-                msg=f"Max value changed for {dim}: {original_range['max']} -> {loaded_stats['max']}"
+                loaded_stats["max"],
+                original_range["max"],
+                places=5,
+                msg=f"Max value changed for {dim}: {original_range['max']} -> {loaded_stats['max']}",
             )
-        
+
         # Test that adding a new program within existing ranges doesn't break anything
         new_program = Program(
             id="range_test",
             code="# Program to test range stability",
             metrics={
                 "combined_score": 0.35,  # Within existing range
-                "custom_metric1": 35,    # Within existing range
-                "custom_metric2": 250    # Within existing range
-            }
+                "custom_metric1": 35,  # Within existing range
+                "custom_metric2": 250,  # Within existing range
+            },
         )
-        
+
         # Adding this program should not cause issues
         db2.add(new_program)
         new_coords = db2._calculate_feature_coords(new_program)
-        
+
         # Should get valid coordinates
         self.assertEqual(len(new_coords), len(self.config.feature_dimensions))
         for coord in new_coords:
@@ -245,25 +230,25 @@ def test_feature_stats_preservation_during_load(self):
     def test_feature_stats_with_numpy_types(self):
         """Test that numpy types are correctly handled in serialization"""
         import numpy as np
-        
+
         db = ProgramDatabase(self.config)
-        
+
         # Simulate feature_stats with numpy types
         db.feature_stats = {
             "numpy_test": {
                 "min": np.float64(1.5),
                 "max": np.float64(9.5),
-                "values": [np.float64(x) for x in [1.5, 5.0, 9.5]]
+                "values": [np.float64(x) for x in [1.5, 5.0, 9.5]],
             }
         }
-        
+
         # Test serialization doesn't fail
         serialized = db._serialize_feature_stats()
-        
+
         # Verify numpy types were converted to Python types
         self.assertIsInstance(serialized["numpy_test"]["min"], float)
         self.assertIsInstance(serialized["numpy_test"]["max"], float)
-        
+
         # Test deserialization
         deserialized = db._deserialize_feature_stats(serialized)
         self.assertIsInstance(deserialized["numpy_test"]["min"], float)
@@ -272,32 +257,28 @@ def test_feature_stats_with_numpy_types(self):
     def test_malformed_feature_stats_handling(self):
         """Test handling of malformed feature_stats during deserialization"""
         db = ProgramDatabase(self.config)
-        
+
         # Test with malformed data
         malformed_data = {
-            "valid_entry": {
-                "min": 1.0,
-                "max": 10.0,
-                "values": [1.0, 5.0, 10.0]
-            },
+            "valid_entry": {"min": 1.0, "max": 10.0, "values": [1.0, 5.0, 10.0]},
             "invalid_entry": "this is not a dict",
             "missing_keys": {
                 "min": 1.0
                 # missing "max" and "values"
-            }
+            },
         }
-        
-        with patch('openevolve.database.logger') as mock_logger:
+
+        with patch("openevolve.database.logger") as mock_logger:
             deserialized = db._deserialize_feature_stats(malformed_data)
-        
+
         # Should have valid entry and skip invalid ones
         self.assertIn("valid_entry", deserialized)
         self.assertNotIn("invalid_entry", deserialized)
         self.assertIn("missing_keys", deserialized)  # Should be created with defaults
-        
+
         # Should have logged warning for invalid entry
         mock_logger.warning.assert_called()
 
 
 if __name__ == "__main__":
-    unittest.main()
+    unittest.main()