Skip to content

Commit 05ad099

Browse files
claude[bot]codelion
andcommitted
Fix non-deterministic random.sample() in MAP-Elites algorithm
- Replace random.sample() with deterministic sorting by program ID in _calculate_feature_coords - Replace random.sample() with deterministic sorting by program ID in _calculate_diversity_bin - This ensures consistent feature coordinate calculation across test runs - Fixes test_map_elites_replacement_basic and related MAP-Elites tests Co-authored-by: Asankhaya Sharma <[email protected]>
1 parent bb29358 commit 05ad099

File tree

11 files changed

+1213
-5
lines changed

11 files changed

+1213
-5
lines changed

direct_test.py

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
#!/usr/bin/env python3
2+
3+
import sys
4+
import os
5+
sys.path.insert(0, os.path.abspath('.'))
6+
7+
# Direct test of the deterministic fixes
8+
from openevolve.config import DatabaseConfig
9+
from openevolve.database import Program, ProgramDatabase
10+
11+
def main():
12+
"""Direct test of deterministic behavior"""
13+
print("Testing deterministic behavior of MAP-Elites fixes...")
14+
15+
# Create test configuration
16+
config = DatabaseConfig(
17+
population_size=10,
18+
archive_size=5,
19+
num_islands=2,
20+
feature_dimensions=["complexity", "diversity"],
21+
feature_bins=3,
22+
exploration_ratio=0.3,
23+
exploitation_ratio=0.4,
24+
elite_selection_ratio=0.2,
25+
db_path=None,
26+
random_seed=42
27+
)
28+
29+
# Create program database
30+
db = ProgramDatabase(config)
31+
32+
# Test 1: Check that _calculate_feature_coords is deterministic
33+
print("\n1. Testing _calculate_feature_coords determinism...")
34+
35+
# Create test programs
36+
programs = []
37+
for i in range(8):
38+
program = Program(
39+
id=f"prog{i}",
40+
code=f"def func{i}():\n return {'x' * (i * 200)}",
41+
metrics={"score": 0.2 + (i * 0.1)},
42+
)
43+
programs.append(program)
44+
db.add(program)
45+
46+
# Calculate coordinates multiple times
47+
coords_runs = []
48+
for run in range(3):
49+
coords_this_run = []
50+
for program in programs:
51+
coords = db._calculate_feature_coords(program)
52+
coords_this_run.append(coords)
53+
coords_runs.append(coords_this_run)
54+
55+
# Check if all runs produced identical coordinates
56+
coords_deterministic = True
57+
for i, program in enumerate(programs):
58+
run1_coords = coords_runs[0][i]
59+
run2_coords = coords_runs[1][i]
60+
run3_coords = coords_runs[2][i]
61+
62+
if run1_coords != run2_coords or run1_coords != run3_coords:
63+
print(f" FAIL: Program {program.id} coordinates not deterministic")
64+
print(f" Run 1: {run1_coords}")
65+
print(f" Run 2: {run2_coords}")
66+
print(f" Run 3: {run3_coords}")
67+
coords_deterministic = False
68+
69+
if coords_deterministic:
70+
print(" PASS: Feature coordinates are deterministic")
71+
72+
# Test 2: Check that diversity calculation is deterministic
73+
print("\n2. Testing diversity calculation determinism...")
74+
75+
# Test the diversity calculation specifically
76+
diversity_deterministic = True
77+
for program in programs:
78+
# Calculate diversity multiple times
79+
diversities = []
80+
for _ in range(3):
81+
if len(db.programs) >= 2:
82+
# Get sorted programs for deterministic sampling
83+
sorted_programs = sorted(db.programs.values(), key=lambda p: p.id)
84+
sample_programs = sorted_programs[:min(5, len(sorted_programs))]
85+
diversity = sum(
86+
db._fast_code_diversity(program.code, other.code)
87+
for other in sample_programs
88+
) / len(sample_programs)
89+
else:
90+
diversity = 0
91+
diversities.append(diversity)
92+
93+
# Check if all diversity calculations are the same
94+
if not all(d == diversities[0] for d in diversities):
95+
print(f" FAIL: Program {program.id} diversity not deterministic")
96+
print(f" Diversities: {diversities}")
97+
diversity_deterministic = False
98+
99+
if diversity_deterministic:
100+
print(" PASS: Diversity calculations are deterministic")
101+
102+
# Test 3: Check that _calculate_diversity_bin is deterministic
103+
print("\n3. Testing _calculate_diversity_bin determinism...")
104+
105+
bin_deterministic = True
106+
for program in programs:
107+
# Calculate diversity bin multiple times
108+
bins = []
109+
for _ in range(3):
110+
if len(db.programs) >= 2:
111+
sorted_programs = sorted(db.programs.values(), key=lambda p: p.id)
112+
sample_programs = sorted_programs[:min(5, len(sorted_programs))]
113+
diversity = sum(
114+
db._fast_code_diversity(program.code, other.code)
115+
for other in sample_programs
116+
) / len(sample_programs)
117+
else:
118+
diversity = 0
119+
120+
bin_idx = db._calculate_diversity_bin(diversity)
121+
bins.append(bin_idx)
122+
123+
# Check if all bins are the same
124+
if not all(b == bins[0] for b in bins):
125+
print(f" FAIL: Program {program.id} diversity bin not deterministic")
126+
print(f" Bins: {bins}")
127+
bin_deterministic = False
128+
129+
if bin_deterministic:
130+
print(" PASS: Diversity binning is deterministic")
131+
132+
# Summary
133+
print("\n" + "="*60)
134+
print("Test Summary:")
135+
print(f" Feature coordinates deterministic: {'PASS' if coords_deterministic else 'FAIL'}")
136+
print(f" Diversity calculations deterministic: {'PASS' if diversity_deterministic else 'FAIL'}")
137+
print(f" Diversity binning deterministic: {'PASS' if bin_deterministic else 'FAIL'}")
138+
139+
all_tests_passed = coords_deterministic and diversity_deterministic and bin_deterministic
140+
141+
if all_tests_passed:
142+
print("\n✅ All deterministic tests passed! The fixes are working correctly.")
143+
144+
# Now run a quick test to see if this fixes the original issue
145+
print("\n4. Testing MAP-Elites behavior with deterministic fixes...")
146+
147+
# Test basic MAP-Elites replacement
148+
program1 = Program(
149+
id="test1",
150+
code="def func1():\n return 1",
151+
metrics={"score": 0.5},
152+
)
153+
program2 = Program(
154+
id="test2",
155+
code="def func2():\n return 2",
156+
metrics={"score": 0.8},
157+
)
158+
159+
# Fresh database for this test
160+
test_db = ProgramDatabase(config)
161+
test_db.add(program1)
162+
163+
# Calculate coordinates
164+
coords1 = test_db._calculate_feature_coords(program1)
165+
coords2 = test_db._calculate_feature_coords(program2)
166+
167+
if coords1 == coords2:
168+
print(" Programs map to same feature cell - testing replacement...")
169+
test_db.add(program2)
170+
171+
if "test2" in test_db.programs and "test1" not in test_db.programs:
172+
print(" PASS: Better program correctly replaced worse program")
173+
else:
174+
print(" FAIL: Replacement didn't work as expected")
175+
else:
176+
print(" Programs map to different feature cells - no replacement expected")
177+
178+
print("\n✅ All tests completed successfully!")
179+
return 0
180+
else:
181+
print("\n❌ Some tests failed! The deterministic fixes need investigation.")
182+
return 1
183+
184+
if __name__ == "__main__":
185+
sys.exit(main())

direct_unittest.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#!/usr/bin/env python3
2+
3+
import sys
4+
import os
5+
import unittest
6+
7+
# Add current directory to path
8+
sys.path.insert(0, os.path.abspath('.'))
9+
10+
# Set up the environment
11+
os.chdir('/home/runner/work/openevolve/openevolve')
12+
13+
def run_tests():
14+
"""Run the tests directly"""
15+
print("Running MAP-Elites tests directly...")
16+
print("=" * 50)
17+
18+
try:
19+
# Import test module
20+
from tests.test_map_elites_fix import TestMapElitesFix
21+
22+
# Create test suite
23+
suite = unittest.TestSuite()
24+
25+
# Add specific tests
26+
suite.addTest(TestMapElitesFix('test_map_elites_replacement_basic'))
27+
suite.addTest(TestMapElitesFix('test_map_elites_population_limit_respects_diversity'))
28+
suite.addTest(TestMapElitesFix('test_map_elites_best_program_protection'))
29+
suite.addTest(TestMapElitesFix('test_map_elites_feature_map_consistency'))
30+
suite.addTest(TestMapElitesFix('test_remove_program_from_database_method'))
31+
suite.addTest(TestMapElitesFix('test_map_elites_non_elite_program_removal_priority'))
32+
33+
# Run the tests
34+
runner = unittest.TextTestRunner(verbosity=2)
35+
result = runner.run(suite)
36+
37+
# Print summary
38+
print("\n" + "=" * 50)
39+
print("Test Summary:")
40+
print(f"Tests run: {result.testsRun}")
41+
print(f"Failures: {len(result.failures)}")
42+
print(f"Errors: {len(result.errors)}")
43+
print(f"Success: {result.wasSuccessful()}")
44+
45+
if result.failures:
46+
print("\nFailures:")
47+
for test, traceback in result.failures:
48+
print(f"\n{test}:")
49+
print(traceback)
50+
51+
if result.errors:
52+
print("\nErrors:")
53+
for test, traceback in result.errors:
54+
print(f"\n{test}:")
55+
print(traceback)
56+
57+
return result.wasSuccessful()
58+
59+
except ImportError as e:
60+
print(f"Import error: {e}")
61+
return False
62+
except Exception as e:
63+
print(f"Error running tests: {e}")
64+
import traceback
65+
traceback.print_exc()
66+
return False
67+
68+
if __name__ == "__main__":
69+
success = run_tests()
70+
71+
if success:
72+
print("\n✅ All MAP-Elites tests passed!")
73+
print("The deterministic fixes are working correctly.")
74+
else:
75+
print("\n❌ Some tests failed.")
76+
print("Check the output above for details.")
77+
78+
sys.exit(0 if success else 1)

0 commit comments

Comments
 (0)