Skip to content

Commit 930eaa5

Browse files
committed
Add MAP-Elites logging, island migration validation, and tests
Enhanced ProgramDatabase with detailed MAP-Elites cell logging, coverage milestones, and cell improvement events. Added validation and cleanup for island best program tracking and migration consistency. Improved Evaluator error context for cascade failures. Added comprehensive tests for cascade validation, island migration, and island best program tracking.
1 parent 545557a commit 930eaa5

File tree

6 files changed

+1158
-8
lines changed

6 files changed

+1158
-8
lines changed

openevolve/database.py

Lines changed: 122 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,28 @@ def add(
189189
should_replace = self._is_better(program, self.programs[existing_program_id])
190190

191191
if should_replace:
192+
# Log significant MAP-Elites events
193+
coords_dict = {self.config.feature_dimensions[i]: feature_coords[i] for i in range(len(feature_coords))}
194+
195+
if feature_key not in self.feature_map:
196+
# New cell occupation
197+
logging.info("New MAP-Elites cell occupied: %s", coords_dict)
198+
# Check coverage milestone
199+
total_possible_cells = self.feature_bins ** len(self.config.feature_dimensions)
200+
coverage = (len(self.feature_map) + 1) / total_possible_cells
201+
if coverage in [0.1, 0.25, 0.5, 0.75, 0.9]:
202+
logging.info("MAP-Elites coverage reached %.1f%% (%d/%d cells)",
203+
coverage * 100, len(self.feature_map) + 1, total_possible_cells)
204+
else:
205+
# Cell replacement - existing program being replaced
206+
existing_program_id = self.feature_map[feature_key]
207+
if existing_program_id in self.programs:
208+
existing_program = self.programs[existing_program_id]
209+
new_fitness = safe_numeric_average(program.metrics)
210+
existing_fitness = safe_numeric_average(existing_program.metrics)
211+
logging.info("MAP-Elites cell improved: %s (fitness: %.3f -> %.3f)",
212+
coords_dict, existing_fitness, new_fitness)
213+
192214
self.feature_map[feature_key] = program.id
193215

194216
# Add to specific island (not random!)
@@ -515,6 +537,9 @@ def _reconstruct_islands(self, saved_islands: List[List[str]]) -> None:
515537
feature_keys_to_remove.append(key)
516538
for key in feature_keys_to_remove:
517539
del self.feature_map[key]
540+
541+
# Clean up island best programs - remove stale references
542+
self._cleanup_stale_island_bests()
518543

519544
# Check best program
520545
if self.best_program_id and self.best_program_id not in self.programs:
@@ -641,7 +666,8 @@ def _calculate_feature_coords(self, program: Program) -> List[int]:
641666
else:
642667
# Default to middle bin if feature not found
643668
coords.append(self.feature_bins // 2)
644-
logging.info(
669+
# Only log coordinates at debug level for troubleshooting
670+
logging.debug(
645671
"MAP-Elites coords: %s",
646672
str({self.config.feature_dimensions[i]: coords[i] for i in range(len(coords))}),
647673
)
@@ -1138,6 +1164,9 @@ def _enforce_population_limit(self, exclude_program_id: Optional[str] = None) ->
11381164
logger.debug(f"Removed program {program_id} due to population limit")
11391165

11401166
logger.info(f"Population size after cleanup: {len(self.programs)}")
1167+
1168+
# Clean up any stale island best program references after removal
1169+
self._cleanup_stale_island_bests()
11411170

11421171
# Island management methods
11431172
def set_current_island(self, island_idx: int) -> None:
@@ -1215,13 +1244,102 @@ def migrate_programs(self) -> None:
12151244
# Update island-specific best program if migrant is better
12161245
self._update_island_best_program(migrant_copy, target_island)
12171246

1218-
logger.debug(
1219-
f"Migrated program {migrant.id} from island {i} to island {target_island}"
1220-
)
1247+
# Log migration with MAP-Elites coordinates
1248+
feature_coords = self._calculate_feature_coords(migrant_copy)
1249+
coords_dict = {self.config.feature_dimensions[j]: feature_coords[j] for j in range(len(feature_coords))}
1250+
logger.info("Program migrated to island %d at MAP-Elites coords: %s",
1251+
target_island, coords_dict)
12211252

12221253
# Update last migration generation
12231254
self.last_migration_generation = max(self.island_generations)
12241255
logger.info(f"Migration completed at generation {self.last_migration_generation}")
1256+
1257+
# Validate migration results
1258+
self._validate_migration_results()
1259+
1260+
def _validate_migration_results(self) -> None:
1261+
"""
1262+
Validate migration didn't create inconsistencies
1263+
1264+
Checks that:
1265+
1. Program island metadata matches actual island assignment
1266+
2. No programs are assigned to multiple islands
1267+
3. All island best programs exist and are in correct islands
1268+
"""
1269+
seen_program_ids = set()
1270+
1271+
for i, island in enumerate(self.islands):
1272+
for program_id in island:
1273+
# Check for duplicate assignments
1274+
if program_id in seen_program_ids:
1275+
logger.error(f"Program {program_id} assigned to multiple islands")
1276+
continue
1277+
seen_program_ids.add(program_id)
1278+
1279+
# Check program exists
1280+
if program_id not in self.programs:
1281+
logger.warning(f"Island {i} contains nonexistent program {program_id}")
1282+
continue
1283+
1284+
# Check metadata consistency
1285+
program = self.programs[program_id]
1286+
stored_island = program.metadata.get("island")
1287+
if stored_island != i:
1288+
logger.warning(
1289+
f"Island mismatch for program {program_id}: "
1290+
f"in island {i} but metadata says {stored_island}"
1291+
)
1292+
1293+
# Validate island best programs
1294+
for i, best_id in enumerate(self.island_best_programs):
1295+
if best_id is not None:
1296+
if best_id not in self.programs:
1297+
logger.warning(f"Island {i} best program {best_id} does not exist")
1298+
elif best_id not in self.islands[i]:
1299+
logger.warning(f"Island {i} best program {best_id} not in island")
1300+
1301+
def _cleanup_stale_island_bests(self) -> None:
1302+
"""
1303+
Remove stale island best program references
1304+
1305+
Cleans up references to programs that no longer exist in the database
1306+
or are not actually in their assigned islands.
1307+
"""
1308+
cleaned_count = 0
1309+
1310+
for i, best_id in enumerate(self.island_best_programs):
1311+
if best_id is not None:
1312+
should_clear = False
1313+
1314+
# Check if program still exists
1315+
if best_id not in self.programs:
1316+
logger.debug(f"Clearing stale island {i} best program {best_id} (program deleted)")
1317+
should_clear = True
1318+
# Check if program is still in the island
1319+
elif best_id not in self.islands[i]:
1320+
logger.debug(f"Clearing stale island {i} best program {best_id} (not in island)")
1321+
should_clear = True
1322+
1323+
if should_clear:
1324+
self.island_best_programs[i] = None
1325+
cleaned_count += 1
1326+
1327+
if cleaned_count > 0:
1328+
logger.info(f"Cleaned up {cleaned_count} stale island best program references")
1329+
1330+
# Recalculate best programs for islands that were cleared
1331+
for i, best_id in enumerate(self.island_best_programs):
1332+
if best_id is None and len(self.islands[i]) > 0:
1333+
# Find new best program for this island
1334+
island_programs = [self.programs[pid] for pid in self.islands[i] if pid in self.programs]
1335+
if island_programs:
1336+
# Sort by fitness and update
1337+
best_program = max(
1338+
island_programs,
1339+
key=lambda p: p.metrics.get("combined_score", safe_numeric_average(p.metrics))
1340+
)
1341+
self.island_best_programs[i] = best_program.id
1342+
logger.debug(f"Recalculated island {i} best program: {best_program.id}")
12251343

12261344
def get_island_stats(self) -> List[dict]:
12271345
"""Get statistics for each island"""

openevolve/evaluator.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -383,13 +383,14 @@ async def run_stage1():
383383
)
384384
except Exception as e:
385385
logger.error(f"Error in stage 1 evaluation: {str(e)}")
386-
# Capture stage 1 failure as artifacts
386+
# Capture stage 1 failure with enhanced context
387+
error_context = self._create_cascade_error_context("stage1", e)
387388
return EvaluationResult(
388389
metrics={"stage1_passed": 0.0, "error": 0.0},
389390
artifacts={
390391
"stderr": str(e),
391392
"traceback": traceback.format_exc(),
392-
"failure_stage": "stage1",
393+
**error_context,
393394
},
394395
)
395396

@@ -510,13 +511,14 @@ async def run_stage3():
510511

511512
except Exception as e:
512513
logger.error(f"Error in cascade evaluation: {str(e)}")
513-
# Return proper cascade failure result instead of re-raising
514+
# Return proper cascade failure result with enhanced context
515+
error_context = self._create_cascade_error_context("cascade_setup", e)
514516
return EvaluationResult(
515517
metrics={"stage1_passed": 0.0, "error": 0.0},
516518
artifacts={
517519
"stderr": str(e),
518520
"traceback": traceback.format_exc(),
519-
"failure_stage": "cascade_setup",
521+
**error_context,
520522
},
521523
)
522524

@@ -611,6 +613,29 @@ async def _llm_evaluate(self, program_code: str, program_id: str = "") -> Dict[s
611613
traceback.print_exc()
612614
return {}
613615

616+
def _create_cascade_error_context(self, stage: str, error: Exception) -> dict:
617+
"""
618+
Create rich error context for cascade failures
619+
620+
Args:
621+
stage: The stage where the error occurred
622+
error: The exception that was raised
623+
624+
Returns:
625+
Dictionary with enhanced error context
626+
"""
627+
import time
628+
return {
629+
"failure_stage": stage,
630+
"error_type": type(error).__name__,
631+
"error_message": str(error),
632+
"timestamp": time.time(),
633+
"cascade_config": self.config.cascade_evaluation,
634+
"cascade_thresholds": getattr(self.config, 'cascade_thresholds', []),
635+
"timeout_config": self.config.timeout,
636+
"evaluation_file": self.evaluation_file,
637+
}
638+
614639
def _passes_threshold(self, metrics: Dict[str, float], threshold: float) -> bool:
615640
"""
616641
Check if metrics pass a threshold

0 commit comments

Comments
 (0)