Skip to content

Commit 16a7c96

Browse files
authored
Merge pull request algorithmicsuperintelligence#250 from codelion/fix-process-timeout
fix timeout
2 parents 21b295c + fcf54ff commit 16a7c96

11 files changed

+342
-414
lines changed

openevolve/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Version information for openevolve package."""
22

3-
__version__ = "0.2.10"
3+
__version__ = "0.2.11"

openevolve/process_parallel.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import pickle
99
import signal
1010
import time
11-
from concurrent.futures import ProcessPoolExecutor, Future
11+
from concurrent.futures import ProcessPoolExecutor, Future, TimeoutError as FutureTimeoutError
1212
from dataclasses import dataclass, asdict
1313
from pathlib import Path
1414
from typing import Any, Dict, List, Optional, Tuple
@@ -454,7 +454,9 @@ async def run_evolution(
454454
future = pending_futures.pop(completed_iteration)
455455

456456
try:
457-
result = future.result()
457+
# Use evaluator timeout + buffer to gracefully handle stuck processes
458+
timeout_seconds = self.config.evaluator.timeout + 30
459+
result = future.result(timeout=timeout_seconds)
458460

459461
if result.error:
460462
logger.warning(f"Iteration {completed_iteration} error: {result.error}")
@@ -612,6 +614,14 @@ async def run_evolution(
612614
)
613615
break
614616

617+
except FutureTimeoutError:
618+
logger.error(
619+
f"⏰ Iteration {completed_iteration} timed out after {timeout_seconds}s "
620+
f"(evaluator timeout: {self.config.evaluator.timeout}s + 30s buffer). "
621+
f"Canceling future and continuing with next iteration."
622+
)
623+
# Cancel the future to clean up the process
624+
future.cancel()
615625
except Exception as e:
616626
logger.error(f"Error processing result from iteration {completed_iteration}: {e}")
617627

tests/test_checkpoint_resume.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,9 @@ async def run_test():
308308
)
309309

310310
# Mock the parallel controller to avoid API calls
311-
with patch("openevolve.controller.ProcessParallelController") as mock_parallel_class:
311+
with patch(
312+
"openevolve.controller.ProcessParallelController"
313+
) as mock_parallel_class:
312314
mock_parallel = MagicMock()
313315
mock_parallel.run_evolution = AsyncMock(return_value=None)
314316
mock_parallel.start = MagicMock()

tests/test_feature_stats_persistence.py

Lines changed: 59 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def setUp(self):
2222
self.config = DatabaseConfig(
2323
db_path=self.test_dir,
2424
feature_dimensions=["score", "custom_metric1", "custom_metric2"],
25-
feature_bins=10
25+
feature_bins=10,
2626
)
2727

2828
def tearDown(self):
@@ -33,7 +33,7 @@ def test_feature_stats_saved_and_loaded(self):
3333
"""Test that feature_stats are correctly saved and loaded from checkpoints"""
3434
# Create database and add programs to build feature_stats
3535
db1 = ProgramDatabase(self.config)
36-
36+
3737
programs = []
3838
for i in range(5):
3939
program = Program(
@@ -42,8 +42,8 @@ def test_feature_stats_saved_and_loaded(self):
4242
metrics={
4343
"combined_score": 0.1 + i * 0.2,
4444
"custom_metric1": 10 + i * 20,
45-
"custom_metric2": 100 + i * 50
46-
}
45+
"custom_metric2": 100 + i * 50,
46+
},
4747
)
4848
programs.append(program)
4949
db1.add(program)
@@ -52,14 +52,10 @@ def test_feature_stats_saved_and_loaded(self):
5252
self.assertIn("score", db1.feature_stats)
5353
self.assertIn("custom_metric1", db1.feature_stats)
5454
self.assertIn("custom_metric2", db1.feature_stats)
55-
55+
5656
# Store original feature_stats for comparison
5757
original_stats = {
58-
dim: {
59-
"min": stats["min"],
60-
"max": stats["max"],
61-
"values": stats["values"].copy()
62-
}
58+
dim: {"min": stats["min"], "max": stats["max"], "values": stats["values"].copy()}
6359
for dim, stats in db1.feature_stats.items()
6460
}
6561

@@ -72,33 +68,33 @@ def test_feature_stats_saved_and_loaded(self):
7268

7369
# Verify feature_stats were loaded correctly
7470
self.assertEqual(len(db2.feature_stats), len(original_stats))
75-
71+
7672
for dim, original in original_stats.items():
7773
self.assertIn(dim, db2.feature_stats)
7874
loaded = db2.feature_stats[dim]
79-
75+
8076
self.assertAlmostEqual(loaded["min"], original["min"], places=5)
8177
self.assertAlmostEqual(loaded["max"], original["max"], places=5)
8278
self.assertEqual(loaded["values"], original["values"])
8379

8480
def test_empty_feature_stats_handling(self):
8581
"""Test handling of empty feature_stats"""
8682
db1 = ProgramDatabase(self.config)
87-
83+
8884
# Save without any programs (empty feature_stats)
8985
db1.save(self.test_dir, iteration=1)
90-
86+
9187
# Load and verify
9288
db2 = ProgramDatabase(self.config)
9389
db2.load(self.test_dir)
94-
90+
9591
self.assertEqual(db2.feature_stats, {})
9692

9793
def test_backward_compatibility_missing_feature_stats(self):
9894
"""Test loading checkpoints that don't have feature_stats (backward compatibility)"""
9995
# Create a checkpoint manually without feature_stats
10096
os.makedirs(self.test_dir, exist_ok=True)
101-
97+
10298
# Create metadata without feature_stats (simulating old checkpoint)
10399
metadata = {
104100
"feature_map": {},
@@ -112,60 +108,48 @@ def test_backward_compatibility_missing_feature_stats(self):
112108
"last_migration_generation": 0,
113109
# Note: no "feature_stats" key
114110
}
115-
111+
116112
with open(os.path.join(self.test_dir, "metadata.json"), "w") as f:
117113
json.dump(metadata, f)
118-
114+
119115
# Load should work without errors
120116
db = ProgramDatabase(self.config)
121117
db.load(self.test_dir)
122-
118+
123119
# feature_stats should be empty but not None
124120
self.assertEqual(db.feature_stats, {})
125121

126122
def test_feature_stats_serialization_edge_cases(self):
127123
"""Test feature_stats serialization handles edge cases correctly"""
128124
db = ProgramDatabase(self.config)
129-
125+
130126
# Test with various edge cases
131127
db.feature_stats = {
132-
"normal_case": {
133-
"min": 1.0,
134-
"max": 10.0,
135-
"values": [1.0, 5.0, 10.0]
136-
},
137-
"single_value": {
138-
"min": 5.0,
139-
"max": 5.0,
140-
"values": [5.0]
141-
},
128+
"normal_case": {"min": 1.0, "max": 10.0, "values": [1.0, 5.0, 10.0]},
129+
"single_value": {"min": 5.0, "max": 5.0, "values": [5.0]},
142130
"large_values_list": {
143131
"min": 0.0,
144132
"max": 200.0,
145-
"values": list(range(200)) # Should be truncated to 100
133+
"values": list(range(200)), # Should be truncated to 100
146134
},
147-
"empty_values": {
148-
"min": 0.0,
149-
"max": 1.0,
150-
"values": []
151-
}
135+
"empty_values": {"min": 0.0, "max": 1.0, "values": []},
152136
}
153-
137+
154138
# Test serialization
155139
serialized = db._serialize_feature_stats()
156-
140+
157141
# Check that large values list was truncated
158142
self.assertLessEqual(len(serialized["large_values_list"]["values"]), 100)
159-
143+
160144
# Test deserialization
161145
deserialized = db._deserialize_feature_stats(serialized)
162-
146+
163147
# Verify structure is maintained
164148
self.assertIn("normal_case", deserialized)
165149
self.assertIn("single_value", deserialized)
166150
self.assertIn("large_values_list", deserialized)
167151
self.assertIn("empty_values", deserialized)
168-
152+
169153
# Verify types are correct
170154
for dim, stats in deserialized.items():
171155
self.assertIsInstance(stats["min"], float)
@@ -176,29 +160,26 @@ def test_feature_stats_preservation_during_load(self):
176160
"""Test that feature_stats ranges are preserved when loading from checkpoint"""
177161
# Create database with programs
178162
db1 = ProgramDatabase(self.config)
179-
163+
180164
test_programs = []
181-
165+
182166
for i in range(3):
183167
program = Program(
184168
id=f"stats_test_{i}",
185169
code=f"# Stats test {i}",
186170
metrics={
187171
"combined_score": 0.2 + i * 0.3,
188172
"custom_metric1": 20 + i * 30,
189-
"custom_metric2": 200 + i * 100
190-
}
173+
"custom_metric2": 200 + i * 100,
174+
},
191175
)
192176
test_programs.append(program)
193177
db1.add(program)
194178

195179
# Record original feature ranges
196180
original_ranges = {}
197181
for dim, stats in db1.feature_stats.items():
198-
original_ranges[dim] = {
199-
"min": stats["min"],
200-
"max": stats["max"]
201-
}
182+
original_ranges[dim] = {"min": stats["min"], "max": stats["max"]}
202183

203184
# Save checkpoint
204185
db1.save(self.test_dir, iteration=50)
@@ -211,31 +192,35 @@ def test_feature_stats_preservation_during_load(self):
211192
for dim, original_range in original_ranges.items():
212193
self.assertIn(dim, db2.feature_stats)
213194
loaded_stats = db2.feature_stats[dim]
214-
195+
215196
self.assertAlmostEqual(
216-
loaded_stats["min"], original_range["min"], places=5,
217-
msg=f"Min value changed for {dim}: {original_range['min']} -> {loaded_stats['min']}"
197+
loaded_stats["min"],
198+
original_range["min"],
199+
places=5,
200+
msg=f"Min value changed for {dim}: {original_range['min']} -> {loaded_stats['min']}",
218201
)
219202
self.assertAlmostEqual(
220-
loaded_stats["max"], original_range["max"], places=5,
221-
msg=f"Max value changed for {dim}: {original_range['max']} -> {loaded_stats['max']}"
203+
loaded_stats["max"],
204+
original_range["max"],
205+
places=5,
206+
msg=f"Max value changed for {dim}: {original_range['max']} -> {loaded_stats['max']}",
222207
)
223-
208+
224209
# Test that adding a new program within existing ranges doesn't break anything
225210
new_program = Program(
226211
id="range_test",
227212
code="# Program to test range stability",
228213
metrics={
229214
"combined_score": 0.35, # Within existing range
230-
"custom_metric1": 35, # Within existing range
231-
"custom_metric2": 250 # Within existing range
232-
}
215+
"custom_metric1": 35, # Within existing range
216+
"custom_metric2": 250, # Within existing range
217+
},
233218
)
234-
219+
235220
# Adding this program should not cause issues
236221
db2.add(new_program)
237222
new_coords = db2._calculate_feature_coords(new_program)
238-
223+
239224
# Should get valid coordinates
240225
self.assertEqual(len(new_coords), len(self.config.feature_dimensions))
241226
for coord in new_coords:
@@ -245,25 +230,25 @@ def test_feature_stats_preservation_during_load(self):
245230
def test_feature_stats_with_numpy_types(self):
246231
"""Test that numpy types are correctly handled in serialization"""
247232
import numpy as np
248-
233+
249234
db = ProgramDatabase(self.config)
250-
235+
251236
# Simulate feature_stats with numpy types
252237
db.feature_stats = {
253238
"numpy_test": {
254239
"min": np.float64(1.5),
255240
"max": np.float64(9.5),
256-
"values": [np.float64(x) for x in [1.5, 5.0, 9.5]]
241+
"values": [np.float64(x) for x in [1.5, 5.0, 9.5]],
257242
}
258243
}
259-
244+
260245
# Test serialization doesn't fail
261246
serialized = db._serialize_feature_stats()
262-
247+
263248
# Verify numpy types were converted to Python types
264249
self.assertIsInstance(serialized["numpy_test"]["min"], float)
265250
self.assertIsInstance(serialized["numpy_test"]["max"], float)
266-
251+
267252
# Test deserialization
268253
deserialized = db._deserialize_feature_stats(serialized)
269254
self.assertIsInstance(deserialized["numpy_test"]["min"], float)
@@ -272,32 +257,28 @@ def test_feature_stats_with_numpy_types(self):
272257
def test_malformed_feature_stats_handling(self):
273258
"""Test handling of malformed feature_stats during deserialization"""
274259
db = ProgramDatabase(self.config)
275-
260+
276261
# Test with malformed data
277262
malformed_data = {
278-
"valid_entry": {
279-
"min": 1.0,
280-
"max": 10.0,
281-
"values": [1.0, 5.0, 10.0]
282-
},
263+
"valid_entry": {"min": 1.0, "max": 10.0, "values": [1.0, 5.0, 10.0]},
283264
"invalid_entry": "this is not a dict",
284265
"missing_keys": {
285266
"min": 1.0
286267
# missing "max" and "values"
287-
}
268+
},
288269
}
289-
290-
with patch('openevolve.database.logger') as mock_logger:
270+
271+
with patch("openevolve.database.logger") as mock_logger:
291272
deserialized = db._deserialize_feature_stats(malformed_data)
292-
273+
293274
# Should have valid entry and skip invalid ones
294275
self.assertIn("valid_entry", deserialized)
295276
self.assertNotIn("invalid_entry", deserialized)
296277
self.assertIn("missing_keys", deserialized) # Should be created with defaults
297-
278+
298279
# Should have logged warning for invalid entry
299280
mock_logger.warning.assert_called()
300281

301282

302283
if __name__ == "__main__":
303-
unittest.main()
284+
unittest.main()

0 commit comments

Comments
 (0)