Skip to content

Commit 6d6d50e

Browse files
committed
Fix island initialization to use copies of best program
When initializing an empty island, a new copy of the best program is now created with a unique ID, rather than reusing the same program instance. This prevents a program from being assigned to multiple islands and ensures correct lineage tracking. Additional tests were added to verify correct migration behavior, unique program assignment per island, and proper handling of empty island initialization.
1 parent 4714754 commit 6d6d50e

File tree

2 files changed

+223
-8
lines changed

2 files changed

+223
-8
lines changed

openevolve/database.py

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import os
99
import random
1010
import time
11+
import uuid
1112
from dataclasses import asdict, dataclass, field, fields
1213

1314
# FileLock removed - no longer needed with threaded parallel processing
@@ -998,12 +999,29 @@ def _sample_exploration_parent(self) -> Program:
998999
if not current_island_programs:
9991000
# If current island is empty, initialize with best program or random program
10001001
if self.best_program_id and self.best_program_id in self.programs:
1001-
# Clone best program to current island
1002+
# Create a copy of best program for the empty island (don't reuse same ID)
10021003
best_program = self.programs[self.best_program_id]
1003-
self.islands[self.current_island].add(self.best_program_id)
1004-
best_program.metadata["island"] = self.current_island
1005-
logger.debug(f"Initialized empty island {self.current_island} with best program")
1006-
return best_program
1004+
copy_program = Program(
1005+
id=str(uuid.uuid4()),
1006+
code=best_program.code,
1007+
language=best_program.language,
1008+
parent_id=best_program.id,
1009+
generation=best_program.generation,
1010+
timestamp=time.time(),
1011+
iteration_found=self.last_iteration,
1012+
metrics=best_program.metrics.copy(),
1013+
complexity=best_program.complexity,
1014+
diversity=best_program.diversity,
1015+
metadata={"island": self.current_island},
1016+
artifacts_json=best_program.artifacts_json,
1017+
artifact_dir=best_program.artifact_dir,
1018+
)
1019+
self.programs[copy_program.id] = copy_program
1020+
self.islands[self.current_island].add(copy_program.id)
1021+
logger.debug(
1022+
f"Initialized empty island {self.current_island} with copy of best program"
1023+
)
1024+
return copy_program
10071025
else:
10081026
# Use any available program
10091027
return next(iter(self.programs.values()))
@@ -1026,10 +1044,29 @@ def _sample_exploration_parent(self) -> Program:
10261044
f"Island {self.current_island} has no valid programs after cleanup, reinitializing"
10271045
)
10281046
if self.best_program_id and self.best_program_id in self.programs:
1047+
# Create a copy of best program for the empty island (don't reuse same ID)
10291048
best_program = self.programs[self.best_program_id]
1030-
self.islands[self.current_island].add(self.best_program_id)
1031-
best_program.metadata["island"] = self.current_island
1032-
return best_program
1049+
copy_program = Program(
1050+
id=str(uuid.uuid4()),
1051+
code=best_program.code,
1052+
language=best_program.language,
1053+
parent_id=best_program.id,
1054+
generation=best_program.generation,
1055+
timestamp=time.time(),
1056+
iteration_found=self.last_iteration,
1057+
metrics=best_program.metrics.copy(),
1058+
complexity=best_program.complexity,
1059+
diversity=best_program.diversity,
1060+
metadata={"island": self.current_island},
1061+
artifacts_json=best_program.artifacts_json,
1062+
artifact_dir=best_program.artifact_dir,
1063+
)
1064+
self.programs[copy_program.id] = copy_program
1065+
self.islands[self.current_island].add(copy_program.id)
1066+
logger.debug(
1067+
f"Reinitialized empty island {self.current_island} with copy of best program"
1068+
)
1069+
return copy_program
10331070
else:
10341071
return next(iter(self.programs.values()))
10351072

tests/test_database.py

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import unittest
6+
import uuid
67
from openevolve.config import Config
78
from openevolve.database import Program, ProgramDatabase
89

@@ -457,6 +458,183 @@ def test_diversity_feature_integration(self):
457458
self.assertGreaterEqual(coord, 0)
458459
self.assertLess(coord, self.db.feature_bins)
459460

461+
def test_migration_prevents_re_migration(self):
462+
"""Test that programs marked as migrants don't migrate again"""
463+
# Create database with multiple islands
464+
config = Config()
465+
config.database.in_memory = True
466+
config.database.num_islands = 3
467+
config.database.migration_interval = 1 # Migrate every generation
468+
multi_db = ProgramDatabase(config.database)
469+
470+
# Add programs to each island (avoid "migrant" in original IDs)
471+
for i in range(3):
472+
program = Program(
473+
id=f"test_prog_{i}",
474+
code=f"def test_{i}(): return {i}",
475+
language="python",
476+
metrics={"score": 0.5 + i * 0.1},
477+
)
478+
multi_db.add(program, target_island=i)
479+
480+
# Manually mark one as a migrant
481+
migrant_program = multi_db.get("test_prog_0")
482+
migrant_program.metadata["migrant"] = True
483+
484+
# Store original ID
485+
original_id = migrant_program.id
486+
487+
# Count initial programs with "_migrant_" pattern (created by migration)
488+
initial_migrant_count = sum(1 for pid in multi_db.programs if "_migrant_" in pid)
489+
self.assertEqual(initial_migrant_count, 0) # Should be none initially
490+
491+
# Run migration
492+
multi_db.island_generations[0] = config.database.migration_interval
493+
multi_db.island_generations[1] = config.database.migration_interval
494+
multi_db.island_generations[2] = config.database.migration_interval
495+
multi_db.migrate_programs()
496+
497+
# Check that the migrant program wasn't re-migrated
498+
# It should still exist with the same ID (not a new migrant ID)
499+
still_exists = multi_db.get(original_id)
500+
self.assertIsNotNone(still_exists)
501+
502+
# Count new programs created by migration (identified by "_migrant_" pattern)
503+
new_migrant_ids = [pid for pid in multi_db.programs if "_migrant_" in pid]
504+
505+
# Each non-migrant program (2 of them) migrates to 2 adjacent islands
506+
# So we expect 2 * 2 = 4 new migrant programs
507+
# The already-marked migrant (test_prog_0) should NOT create any new copies
508+
self.assertEqual(len(new_migrant_ids), 4)
509+
510+
# Verify the already-migrant program didn't create new copies
511+
migrant_descendants = [pid for pid in new_migrant_ids if original_id in pid]
512+
self.assertEqual(len(migrant_descendants), 0,
513+
f"Program {original_id} should not have created migrant copies")
514+
515+
def test_empty_island_initialization_creates_copies(self):
516+
"""Test that empty islands are initialized with copies, not shared references"""
517+
# Create database with multiple islands
518+
config = Config()
519+
config.database.in_memory = True
520+
config.database.num_islands = 3
521+
# Force exploration mode to test empty island handling
522+
config.database.exploration_ratio = 1.0
523+
config.database.exploitation_ratio = 0.0
524+
multi_db = ProgramDatabase(config.database)
525+
526+
# Add a single program to island 1
527+
program = Program(
528+
id="original_program",
529+
code="def original(): return 42",
530+
language="python",
531+
metrics={"score": 0.9, "combined_score": 0.9},
532+
)
533+
multi_db.add(program, target_island=1)
534+
535+
# Make it the best program
536+
multi_db.best_program_id = "original_program"
537+
538+
# Switch to empty island 0 and sample
539+
multi_db.set_current_island(0)
540+
sampled_parent, _ = multi_db.sample()
541+
542+
# The sampled program should be a copy, not the original
543+
self.assertNotEqual(sampled_parent.id, "original_program")
544+
self.assertEqual(sampled_parent.code, program.code) # Same code
545+
self.assertEqual(sampled_parent.parent_id, "original_program") # Parent is the original
546+
547+
# Check island membership
548+
self.assertIn("original_program", multi_db.islands[1])
549+
self.assertNotIn("original_program", multi_db.islands[0])
550+
self.assertIn(sampled_parent.id, multi_db.islands[0])
551+
552+
# Run validation - should not raise any errors
553+
multi_db._validate_migration_results()
554+
555+
def test_no_program_assigned_to_multiple_islands(self):
556+
"""Test that programs are never assigned to multiple islands"""
557+
# Create database with multiple islands
558+
config = Config()
559+
config.database.in_memory = True
560+
config.database.num_islands = 4
561+
multi_db = ProgramDatabase(config.database)
562+
563+
# Add programs to different islands
564+
program_ids = []
565+
for i in range(4):
566+
program = Program(
567+
id=f"island_test_{i}",
568+
code=f"def test_{i}(): return {i}",
569+
language="python",
570+
metrics={"score": 0.5 + i * 0.1, "combined_score": 0.5 + i * 0.1},
571+
)
572+
multi_db.add(program, target_island=i)
573+
program_ids.append(program.id)
574+
575+
# Make the best program from island 3
576+
multi_db.best_program_id = "island_test_3"
577+
578+
# Sample from empty islands - this should create copies
579+
for empty_island in range(4):
580+
if len(multi_db.islands[empty_island]) == 0:
581+
multi_db.set_current_island(empty_island)
582+
parent, _ = multi_db.sample()
583+
584+
# Check that no program ID appears in multiple islands
585+
all_island_programs = {}
586+
for island_idx, island_programs in enumerate(multi_db.islands):
587+
for program_id in island_programs:
588+
if program_id in all_island_programs:
589+
self.fail(
590+
f"Program {program_id} found in both island {all_island_programs[program_id]} "
591+
f"and island {island_idx}"
592+
)
593+
all_island_programs[program_id] = island_idx
594+
595+
# Run validation - should not raise any errors
596+
multi_db._validate_migration_results()
597+
598+
def test_migration_validation_passes(self):
599+
"""Test that migration validation passes after our fixes"""
600+
# Create database with multiple islands
601+
config = Config()
602+
config.database.in_memory = True
603+
config.database.num_islands = 3
604+
config.database.migration_interval = 1
605+
multi_db = ProgramDatabase(config.database)
606+
607+
# Add programs and run several migration cycles
608+
for i in range(6):
609+
program = Program(
610+
id=f"test_program_{i}",
611+
code=f"def test_{i}(): return {i * 2}",
612+
language="python",
613+
metrics={"score": 0.4 + i * 0.1, "combined_score": 0.4 + i * 0.1},
614+
)
615+
multi_db.add(program, target_island=i % 3)
616+
617+
# Run multiple migration cycles
618+
for cycle in range(3):
619+
# Increment generations to trigger migration
620+
for island in range(3):
621+
multi_db.island_generations[island] += 1
622+
623+
# Migrate programs
624+
multi_db.migrate_programs()
625+
626+
# Validation should pass without warnings
627+
multi_db._validate_migration_results()
628+
629+
# Verify no program has exponential ID growth
630+
for program_id in multi_db.programs:
631+
# Count occurrences of "migrant" in ID
632+
migrant_count = program_id.count("migrant")
633+
self.assertLessEqual(
634+
migrant_count, 1,
635+
f"Program ID {program_id} has been migrated multiple times"
636+
)
637+
460638

461639
if __name__ == "__main__":
462640
unittest.main()

0 commit comments

Comments
 (0)