Skip to content

Commit 4e2b2f6

Browse files
committed
Skip migration of programs with duplicate code
Prevents migration of a program to a target island if a program with identical code already exists on that island. Adds a test to verify that duplicate code is not migrated, ensuring no redundant programs are created during migration.
1 parent 5dd0434 commit 4e2b2f6

File tree

2 files changed

+80
-0
lines changed

2 files changed

+80
-0
lines changed

openevolve/database.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1658,6 +1658,20 @@ def migrate_programs(self) -> None:
16581658
continue
16591659

16601660
for target_island in target_islands:
1661+
# Skip migration if target island already has a program with identical code
1662+
# Identical code produces identical metrics, so migration would be wasteful
1663+
target_island_programs = [
1664+
self.programs[pid] for pid in self.islands[target_island]
1665+
if pid in self.programs
1666+
]
1667+
has_duplicate_code = any(p.code == migrant.code for p in target_island_programs)
1668+
1669+
if has_duplicate_code:
1670+
logger.debug(
1671+
f"Skipping migration of program {migrant.id[:8]} to island {target_island} "
1672+
f"(duplicate code already exists)"
1673+
)
1674+
continue
16611675
# Create a copy for migration with simple new UUID
16621676
import uuid
16631677
migrant_copy = Program(

tests/test_migration_no_duplicates.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,72 @@ def test_migration_uses_map_elites_deduplication(self):
334334
self.assertEqual(migrant_program.metrics["combined_score"], 0.9,
335335
"Migrant should preserve high score")
336336

337+
def test_migration_skips_duplicate_code_on_target_island(self):
338+
"""Test that migration skips programs if target island already has identical code"""
339+
# Create a program on island 0
340+
prog_island_0 = Program(
341+
id="prog_island_0",
342+
code="def shared_code(): return 42", # This code will be on both islands
343+
language="python",
344+
metrics={
345+
"complexity": 50.0,
346+
"diversity": 30.0,
347+
"score": 0.8,
348+
"combined_score": 0.8
349+
},
350+
metadata={"island": 0, "generation": 3},
351+
)
352+
self.db.add(prog_island_0)
353+
354+
# Create a program with IDENTICAL CODE on island 1 (target island)
355+
prog_island_1 = Program(
356+
id="prog_island_1",
357+
code="def shared_code(): return 42", # Same exact code
358+
language="python",
359+
metrics={
360+
"complexity": 50.0,
361+
"diversity": 30.0,
362+
"score": 0.7, # Different score, but same code
363+
"combined_score": 0.7
364+
},
365+
metadata={"island": 1, "generation": 3},
366+
)
367+
self.db.add(prog_island_1, target_island=1)
368+
369+
# Set generations to trigger migration
370+
self.db.island_generations[0] = 3
371+
self.db.island_generations[1] = 3
372+
373+
# Count programs before migration
374+
island_1_before = len([pid for pid in self.db.islands[1] if pid in self.db.programs])
375+
376+
# Trigger migration (island 0 should try to migrate to island 1)
377+
self.db.migrate_programs()
378+
379+
# Count programs after migration
380+
island_1_after = len([pid for pid in self.db.islands[1] if pid in self.db.programs])
381+
382+
# Check if any new programs were added to island 1
383+
# Currently this will ADD a duplicate because we don't check for code duplication
384+
# After the fix, island_1_after should equal island_1_before (no new programs)
385+
386+
# Count programs with the shared code on island 1
387+
island_1_programs = [self.db.programs[pid] for pid in self.db.islands[1] if pid in self.db.programs]
388+
shared_code_count = sum(1 for p in island_1_programs if p.code == "def shared_code(): return 42")
389+
390+
# CRITICAL TEST: Should be exactly 1 (the original prog_island_1)
391+
# Migration should be skipped because identical code already exists
392+
# This will FAIL with current implementation
393+
self.assertEqual(shared_code_count, 1,
394+
f"Should not migrate duplicate code - found {shared_code_count} programs with identical code on island 1")
395+
396+
# Verify no unnecessary migration occurred
397+
# The only program with this code should be the original
398+
if shared_code_count == 1:
399+
shared_code_programs = [p for p in island_1_programs if p.code == "def shared_code(): return 42"]
400+
self.assertEqual(shared_code_programs[0].id, "prog_island_1",
401+
"Original program should remain, no migrant copy needed")
402+
337403

338404
if __name__ == '__main__':
339405
unittest.main()

0 commit comments

Comments
 (0)