1+ #!/usr/bin/env python3
2+
3+ import sys
4+ import os
5+ sys .path .insert (0 , os .path .abspath ('.' ))
6+
7+ # Direct test of the deterministic fixes
8+ from openevolve .config import DatabaseConfig
9+ from openevolve .database import Program , ProgramDatabase
10+
11+ def main ():
12+ """Direct test of deterministic behavior"""
13+ print ("Testing deterministic behavior of MAP-Elites fixes..." )
14+
15+ # Create test configuration
16+ config = DatabaseConfig (
17+ population_size = 10 ,
18+ archive_size = 5 ,
19+ num_islands = 2 ,
20+ feature_dimensions = ["complexity" , "diversity" ],
21+ feature_bins = 3 ,
22+ exploration_ratio = 0.3 ,
23+ exploitation_ratio = 0.4 ,
24+ elite_selection_ratio = 0.2 ,
25+ db_path = None ,
26+ random_seed = 42
27+ )
28+
29+ # Create program database
30+ db = ProgramDatabase (config )
31+
32+ # Test 1: Check that _calculate_feature_coords is deterministic
33+ print ("\n 1. Testing _calculate_feature_coords determinism..." )
34+
35+ # Create test programs
36+ programs = []
37+ for i in range (8 ):
38+ program = Program (
39+ id = f"prog{ i } " ,
40+ code = f"def func{ i } ():\n return { 'x' * (i * 200 )} " ,
41+ metrics = {"score" : 0.2 + (i * 0.1 )},
42+ )
43+ programs .append (program )
44+ db .add (program )
45+
46+ # Calculate coordinates multiple times
47+ coords_runs = []
48+ for run in range (3 ):
49+ coords_this_run = []
50+ for program in programs :
51+ coords = db ._calculate_feature_coords (program )
52+ coords_this_run .append (coords )
53+ coords_runs .append (coords_this_run )
54+
55+ # Check if all runs produced identical coordinates
56+ coords_deterministic = True
57+ for i , program in enumerate (programs ):
58+ run1_coords = coords_runs [0 ][i ]
59+ run2_coords = coords_runs [1 ][i ]
60+ run3_coords = coords_runs [2 ][i ]
61+
62+ if run1_coords != run2_coords or run1_coords != run3_coords :
63+ print (f" FAIL: Program { program .id } coordinates not deterministic" )
64+ print (f" Run 1: { run1_coords } " )
65+ print (f" Run 2: { run2_coords } " )
66+ print (f" Run 3: { run3_coords } " )
67+ coords_deterministic = False
68+
69+ if coords_deterministic :
70+ print (" PASS: Feature coordinates are deterministic" )
71+
72+ # Test 2: Check that diversity calculation is deterministic
73+ print ("\n 2. Testing diversity calculation determinism..." )
74+
75+ # Test the diversity calculation specifically
76+ diversity_deterministic = True
77+ for program in programs :
78+ # Calculate diversity multiple times
79+ diversities = []
80+ for _ in range (3 ):
81+ if len (db .programs ) >= 2 :
82+ # Get sorted programs for deterministic sampling
83+ sorted_programs = sorted (db .programs .values (), key = lambda p : p .id )
84+ sample_programs = sorted_programs [:min (5 , len (sorted_programs ))]
85+ diversity = sum (
86+ db ._fast_code_diversity (program .code , other .code )
87+ for other in sample_programs
88+ ) / len (sample_programs )
89+ else :
90+ diversity = 0
91+ diversities .append (diversity )
92+
93+ # Check if all diversity calculations are the same
94+ if not all (d == diversities [0 ] for d in diversities ):
95+ print (f" FAIL: Program { program .id } diversity not deterministic" )
96+ print (f" Diversities: { diversities } " )
97+ diversity_deterministic = False
98+
99+ if diversity_deterministic :
100+ print (" PASS: Diversity calculations are deterministic" )
101+
102+ # Test 3: Check that _calculate_diversity_bin is deterministic
103+ print ("\n 3. Testing _calculate_diversity_bin determinism..." )
104+
105+ bin_deterministic = True
106+ for program in programs :
107+ # Calculate diversity bin multiple times
108+ bins = []
109+ for _ in range (3 ):
110+ if len (db .programs ) >= 2 :
111+ sorted_programs = sorted (db .programs .values (), key = lambda p : p .id )
112+ sample_programs = sorted_programs [:min (5 , len (sorted_programs ))]
113+ diversity = sum (
114+ db ._fast_code_diversity (program .code , other .code )
115+ for other in sample_programs
116+ ) / len (sample_programs )
117+ else :
118+ diversity = 0
119+
120+ bin_idx = db ._calculate_diversity_bin (diversity )
121+ bins .append (bin_idx )
122+
123+ # Check if all bins are the same
124+ if not all (b == bins [0 ] for b in bins ):
125+ print (f" FAIL: Program { program .id } diversity bin not deterministic" )
126+ print (f" Bins: { bins } " )
127+ bin_deterministic = False
128+
129+ if bin_deterministic :
130+ print (" PASS: Diversity binning is deterministic" )
131+
132+ # Summary
133+ print ("\n " + "=" * 60 )
134+ print ("Test Summary:" )
135+ print (f" Feature coordinates deterministic: { 'PASS' if coords_deterministic else 'FAIL' } " )
136+ print (f" Diversity calculations deterministic: { 'PASS' if diversity_deterministic else 'FAIL' } " )
137+ print (f" Diversity binning deterministic: { 'PASS' if bin_deterministic else 'FAIL' } " )
138+
139+ all_tests_passed = coords_deterministic and diversity_deterministic and bin_deterministic
140+
141+ if all_tests_passed :
142+ print ("\n ✅ All deterministic tests passed! The fixes are working correctly." )
143+
144+ # Now run a quick test to see if this fixes the original issue
145+ print ("\n 4. Testing MAP-Elites behavior with deterministic fixes..." )
146+
147+ # Test basic MAP-Elites replacement
148+ program1 = Program (
149+ id = "test1" ,
150+ code = "def func1():\n return 1" ,
151+ metrics = {"score" : 0.5 },
152+ )
153+ program2 = Program (
154+ id = "test2" ,
155+ code = "def func2():\n return 2" ,
156+ metrics = {"score" : 0.8 },
157+ )
158+
159+ # Fresh database for this test
160+ test_db = ProgramDatabase (config )
161+ test_db .add (program1 )
162+
163+ # Calculate coordinates
164+ coords1 = test_db ._calculate_feature_coords (program1 )
165+ coords2 = test_db ._calculate_feature_coords (program2 )
166+
167+ if coords1 == coords2 :
168+ print (" Programs map to same feature cell - testing replacement..." )
169+ test_db .add (program2 )
170+
171+ if "test2" in test_db .programs and "test1" not in test_db .programs :
172+ print (" PASS: Better program correctly replaced worse program" )
173+ else :
174+ print (" FAIL: Replacement didn't work as expected" )
175+ else :
176+ print (" Programs map to different feature cells - no replacement expected" )
177+
178+ print ("\n ✅ All tests completed successfully!" )
179+ return 0
180+ else :
181+ print ("\n ❌ Some tests failed! The deterministic fixes need investigation." )
182+ return 1
183+
184+ if __name__ == "__main__" :
185+ sys .exit (main ())
0 commit comments