Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion openevolve/_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Version information for openevolve package."""

__version__ = "0.2.10"
__version__ = "0.2.11"
14 changes: 12 additions & 2 deletions openevolve/process_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pickle
import signal
import time
from concurrent.futures import ProcessPoolExecutor, Future
from concurrent.futures import ProcessPoolExecutor, Future, TimeoutError as FutureTimeoutError
from dataclasses import dataclass, asdict
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
Expand Down Expand Up @@ -454,7 +454,9 @@ async def run_evolution(
future = pending_futures.pop(completed_iteration)

try:
result = future.result()
# Use evaluator timeout + buffer to gracefully handle stuck processes
timeout_seconds = self.config.evaluator.timeout + 30
result = future.result(timeout=timeout_seconds)

if result.error:
logger.warning(f"Iteration {completed_iteration} error: {result.error}")
Expand Down Expand Up @@ -612,6 +614,14 @@ async def run_evolution(
)
break

except FutureTimeoutError:
logger.error(
f"⏰ Iteration {completed_iteration} timed out after {timeout_seconds}s "
f"(evaluator timeout: {self.config.evaluator.timeout}s + 30s buffer). "
f"Canceling future and continuing with next iteration."
)
# Cancel the future to clean up the process
future.cancel()
except Exception as e:
logger.error(f"Error processing result from iteration {completed_iteration}: {e}")

Expand Down
4 changes: 3 additions & 1 deletion tests/test_checkpoint_resume.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,9 @@ async def run_test():
)

# Mock the parallel controller to avoid API calls
with patch("openevolve.controller.ProcessParallelController") as mock_parallel_class:
with patch(
"openevolve.controller.ProcessParallelController"
) as mock_parallel_class:
mock_parallel = MagicMock()
mock_parallel.run_evolution = AsyncMock(return_value=None)
mock_parallel.start = MagicMock()
Expand Down
137 changes: 59 additions & 78 deletions tests/test_feature_stats_persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def setUp(self):
self.config = DatabaseConfig(
db_path=self.test_dir,
feature_dimensions=["score", "custom_metric1", "custom_metric2"],
feature_bins=10
feature_bins=10,
)

def tearDown(self):
Expand All @@ -33,7 +33,7 @@ def test_feature_stats_saved_and_loaded(self):
"""Test that feature_stats are correctly saved and loaded from checkpoints"""
# Create database and add programs to build feature_stats
db1 = ProgramDatabase(self.config)

programs = []
for i in range(5):
program = Program(
Expand All @@ -42,8 +42,8 @@ def test_feature_stats_saved_and_loaded(self):
metrics={
"combined_score": 0.1 + i * 0.2,
"custom_metric1": 10 + i * 20,
"custom_metric2": 100 + i * 50
}
"custom_metric2": 100 + i * 50,
},
)
programs.append(program)
db1.add(program)
Expand All @@ -52,14 +52,10 @@ def test_feature_stats_saved_and_loaded(self):
self.assertIn("score", db1.feature_stats)
self.assertIn("custom_metric1", db1.feature_stats)
self.assertIn("custom_metric2", db1.feature_stats)

# Store original feature_stats for comparison
original_stats = {
dim: {
"min": stats["min"],
"max": stats["max"],
"values": stats["values"].copy()
}
dim: {"min": stats["min"], "max": stats["max"], "values": stats["values"].copy()}
for dim, stats in db1.feature_stats.items()
}

Expand All @@ -72,33 +68,33 @@ def test_feature_stats_saved_and_loaded(self):

# Verify feature_stats were loaded correctly
self.assertEqual(len(db2.feature_stats), len(original_stats))

for dim, original in original_stats.items():
self.assertIn(dim, db2.feature_stats)
loaded = db2.feature_stats[dim]

self.assertAlmostEqual(loaded["min"], original["min"], places=5)
self.assertAlmostEqual(loaded["max"], original["max"], places=5)
self.assertEqual(loaded["values"], original["values"])

def test_empty_feature_stats_handling(self):
"""Test handling of empty feature_stats"""
db1 = ProgramDatabase(self.config)

# Save without any programs (empty feature_stats)
db1.save(self.test_dir, iteration=1)

# Load and verify
db2 = ProgramDatabase(self.config)
db2.load(self.test_dir)

self.assertEqual(db2.feature_stats, {})

def test_backward_compatibility_missing_feature_stats(self):
"""Test loading checkpoints that don't have feature_stats (backward compatibility)"""
# Create a checkpoint manually without feature_stats
os.makedirs(self.test_dir, exist_ok=True)

# Create metadata without feature_stats (simulating old checkpoint)
metadata = {
"feature_map": {},
Expand All @@ -112,60 +108,48 @@ def test_backward_compatibility_missing_feature_stats(self):
"last_migration_generation": 0,
# Note: no "feature_stats" key
}

with open(os.path.join(self.test_dir, "metadata.json"), "w") as f:
json.dump(metadata, f)

# Load should work without errors
db = ProgramDatabase(self.config)
db.load(self.test_dir)

# feature_stats should be empty but not None
self.assertEqual(db.feature_stats, {})

def test_feature_stats_serialization_edge_cases(self):
"""Test feature_stats serialization handles edge cases correctly"""
db = ProgramDatabase(self.config)

# Test with various edge cases
db.feature_stats = {
"normal_case": {
"min": 1.0,
"max": 10.0,
"values": [1.0, 5.0, 10.0]
},
"single_value": {
"min": 5.0,
"max": 5.0,
"values": [5.0]
},
"normal_case": {"min": 1.0, "max": 10.0, "values": [1.0, 5.0, 10.0]},
"single_value": {"min": 5.0, "max": 5.0, "values": [5.0]},
"large_values_list": {
"min": 0.0,
"max": 200.0,
"values": list(range(200)) # Should be truncated to 100
"values": list(range(200)), # Should be truncated to 100
},
"empty_values": {
"min": 0.0,
"max": 1.0,
"values": []
}
"empty_values": {"min": 0.0, "max": 1.0, "values": []},
}

# Test serialization
serialized = db._serialize_feature_stats()

# Check that large values list was truncated
self.assertLessEqual(len(serialized["large_values_list"]["values"]), 100)

# Test deserialization
deserialized = db._deserialize_feature_stats(serialized)

# Verify structure is maintained
self.assertIn("normal_case", deserialized)
self.assertIn("single_value", deserialized)
self.assertIn("large_values_list", deserialized)
self.assertIn("empty_values", deserialized)

# Verify types are correct
for dim, stats in deserialized.items():
self.assertIsInstance(stats["min"], float)
Expand All @@ -176,29 +160,26 @@ def test_feature_stats_preservation_during_load(self):
"""Test that feature_stats ranges are preserved when loading from checkpoint"""
# Create database with programs
db1 = ProgramDatabase(self.config)

test_programs = []

for i in range(3):
program = Program(
id=f"stats_test_{i}",
code=f"# Stats test {i}",
metrics={
"combined_score": 0.2 + i * 0.3,
"custom_metric1": 20 + i * 30,
"custom_metric2": 200 + i * 100
}
"custom_metric2": 200 + i * 100,
},
)
test_programs.append(program)
db1.add(program)

# Record original feature ranges
original_ranges = {}
for dim, stats in db1.feature_stats.items():
original_ranges[dim] = {
"min": stats["min"],
"max": stats["max"]
}
original_ranges[dim] = {"min": stats["min"], "max": stats["max"]}

# Save checkpoint
db1.save(self.test_dir, iteration=50)
Expand All @@ -211,31 +192,35 @@ def test_feature_stats_preservation_during_load(self):
for dim, original_range in original_ranges.items():
self.assertIn(dim, db2.feature_stats)
loaded_stats = db2.feature_stats[dim]

self.assertAlmostEqual(
loaded_stats["min"], original_range["min"], places=5,
msg=f"Min value changed for {dim}: {original_range['min']} -> {loaded_stats['min']}"
loaded_stats["min"],
original_range["min"],
places=5,
msg=f"Min value changed for {dim}: {original_range['min']} -> {loaded_stats['min']}",
)
self.assertAlmostEqual(
loaded_stats["max"], original_range["max"], places=5,
msg=f"Max value changed for {dim}: {original_range['max']} -> {loaded_stats['max']}"
loaded_stats["max"],
original_range["max"],
places=5,
msg=f"Max value changed for {dim}: {original_range['max']} -> {loaded_stats['max']}",
)

# Test that adding a new program within existing ranges doesn't break anything
new_program = Program(
id="range_test",
code="# Program to test range stability",
metrics={
"combined_score": 0.35, # Within existing range
"custom_metric1": 35, # Within existing range
"custom_metric2": 250 # Within existing range
}
"custom_metric1": 35, # Within existing range
"custom_metric2": 250, # Within existing range
},
)

# Adding this program should not cause issues
db2.add(new_program)
new_coords = db2._calculate_feature_coords(new_program)

# Should get valid coordinates
self.assertEqual(len(new_coords), len(self.config.feature_dimensions))
for coord in new_coords:
Expand All @@ -245,25 +230,25 @@ def test_feature_stats_preservation_during_load(self):
def test_feature_stats_with_numpy_types(self):
"""Test that numpy types are correctly handled in serialization"""
import numpy as np

db = ProgramDatabase(self.config)

# Simulate feature_stats with numpy types
db.feature_stats = {
"numpy_test": {
"min": np.float64(1.5),
"max": np.float64(9.5),
"values": [np.float64(x) for x in [1.5, 5.0, 9.5]]
"values": [np.float64(x) for x in [1.5, 5.0, 9.5]],
}
}

# Test serialization doesn't fail
serialized = db._serialize_feature_stats()

# Verify numpy types were converted to Python types
self.assertIsInstance(serialized["numpy_test"]["min"], float)
self.assertIsInstance(serialized["numpy_test"]["max"], float)

# Test deserialization
deserialized = db._deserialize_feature_stats(serialized)
self.assertIsInstance(deserialized["numpy_test"]["min"], float)
Expand All @@ -272,32 +257,28 @@ def test_feature_stats_with_numpy_types(self):
def test_malformed_feature_stats_handling(self):
"""Test handling of malformed feature_stats during deserialization"""
db = ProgramDatabase(self.config)

# Test with malformed data
malformed_data = {
"valid_entry": {
"min": 1.0,
"max": 10.0,
"values": [1.0, 5.0, 10.0]
},
"valid_entry": {"min": 1.0, "max": 10.0, "values": [1.0, 5.0, 10.0]},
"invalid_entry": "this is not a dict",
"missing_keys": {
"min": 1.0
# missing "max" and "values"
}
},
}
with patch('openevolve.database.logger') as mock_logger:

with patch("openevolve.database.logger") as mock_logger:
deserialized = db._deserialize_feature_stats(malformed_data)

# Should have valid entry and skip invalid ones
self.assertIn("valid_entry", deserialized)
self.assertNotIn("invalid_entry", deserialized)
self.assertIn("missing_keys", deserialized) # Should be created with defaults

# Should have logged warning for invalid entry
mock_logger.warning.assert_called()


if __name__ == "__main__":
unittest.main()
unittest.main()
Loading