Skip to content

Commit ffcd01a

Browse files
claude[bot]codelion
andcommitted
Implement adaptive diversity binning for MAP-Elite algorithm
- Replace fixed-range diversity binning with adaptive binning based on actual program diversity - Use _fast_code_diversity instead of expensive calculate_edit_distance for performance - Add comprehensive tests for both complexity and diversity binning methods - Ensure proper cold start handling and edge case coverage Fixes issue identified by @yyh-sjtu where fixed ranges caused all programs to cluster in same bins. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: Asankhaya Sharma <[email protected]>
1 parent a86ab90 commit ffcd01a

File tree

2 files changed

+189
-22
lines changed

2 files changed

+189
-22
lines changed

openevolve/database.py

Lines changed: 39 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -639,18 +639,18 @@ def _calculate_feature_coords(self, program: Program) -> List[int]:
639639
bin_idx = self._calculate_complexity_bin(complexity)
640640
coords.append(bin_idx)
641641
elif dim == "diversity":
642-
# Use average edit distance to other programs
643-
if len(self.programs) < 5:
642+
# Use average fast code diversity to other programs
643+
if len(self.programs) < 2:
644644
bin_idx = 0
645645
else:
646646
sample_programs = random.sample(
647647
list(self.programs.values()), min(5, len(self.programs))
648648
)
649-
avg_distance = sum(
650-
calculate_edit_distance(program.code, other.code)
649+
avg_diversity = sum(
650+
self._fast_code_diversity(program.code, other.code)
651651
for other in sample_programs
652652
) / len(sample_programs)
653-
bin_idx = self._calculate_diversity_bin(avg_distance)
653+
bin_idx = self._calculate_diversity_bin(avg_diversity)
654654
coords.append(bin_idx)
655655
elif dim == "score":
656656
# Use average of numeric metrics
@@ -717,40 +717,57 @@ def _calculate_complexity_bin(self, complexity: int) -> int:
717717

718718
return bin_idx
719719

720-
def _calculate_diversity_bin(self, avg_distance: float) -> int:
720+
def _calculate_diversity_bin(self, diversity: float) -> int:
721721
"""
722722
Calculate the bin index for a given diversity value using adaptive binning.
723723
724724
Args:
725-
avg_distance: The average edit distance to other programs
725+
diversity: The average fast code diversity to other programs
726726
727727
Returns:
728728
Bin index in range [0, self.feature_bins - 1]
729729
"""
730+
def _fast_diversity(program, sample_programs):
731+
"""Calculate average fast diversity for a program against sample programs"""
732+
avg_diversity = sum(
733+
self._fast_code_diversity(program.code, other.code)
734+
for other in sample_programs
735+
) / len(sample_programs)
736+
return avg_diversity
737+
730738
if len(self.programs) < 2:
731739
# Cold start: use fixed range binning
732-
# Assume reasonable range of 0-10000 for edit distance
733-
max_distance = 10000
734-
min_distance = 0
740+
# Assume reasonable range of 0-10000 for fast diversity
741+
max_diversity = 10000
742+
min_diversity = 0
735743
else:
736-
# For diversity, we could calculate the actual range, but edit distance
737-
# computation is expensive. Use a reasonable fixed range instead.
738-
# Edit distances typically range from 0 to several thousand
739-
max_distance = 5000
740-
min_distance = 0
741-
742-
# Normalize distance to [0, 1] range
743-
if max_distance > min_distance:
744-
normalized = (avg_distance - min_distance) / (max_distance - min_distance)
744+
# Sample programs for calculating diversity range (limit to 5 for performance)
745+
sample_programs = list(self.programs.values())
746+
if len(sample_programs) > 5:
747+
import random
748+
sample_programs = random.sample(sample_programs, 5)
749+
750+
# Adaptive binning: use actual range from existing programs
751+
existing_diversities = [_fast_diversity(p, sample_programs) for p in self.programs.values()]
752+
min_diversity = min(existing_diversities)
753+
max_diversity = max(existing_diversities)
754+
755+
# Ensure range is not zero
756+
if max_diversity == min_diversity:
757+
max_diversity = min_diversity + 1
758+
759+
# Normalize diversity to [0, 1] range
760+
if max_diversity > min_diversity:
761+
normalized = (diversity - min_diversity) / (max_diversity - min_diversity)
745762
else:
746763
normalized = 0.0
747-
764+
748765
# Clamp to [0, 1] range
749766
normalized = max(0.0, min(1.0, normalized))
750-
767+
751768
# Convert to bin index
752769
bin_idx = int(normalized * self.feature_bins)
753-
770+
754771
# Ensure bin index is within valid range
755772
bin_idx = max(0, min(self.feature_bins - 1, bin_idx))
756773

tests/test_database.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,156 @@ def test_population_limit_enforcement(self):
268268
# Restore original limit
269269
self.db.config.population_size = original_limit
270270

271+
def test_calculate_complexity_bin_adaptive(self):
272+
"""Test adaptive complexity binning with multiple programs"""
273+
# Add programs with different complexities
274+
programs = [
275+
Program(id="short", code="x=1", metrics={"score": 0.5}),
276+
Program(id="medium", code="def func():\n return x*2\n pass", metrics={"score": 0.5}),
277+
Program(id="long", code="def complex_function():\n result = []\n for i in range(100):\n result.append(i*2)\n return result", metrics={"score": 0.5}),
278+
]
279+
280+
for program in programs:
281+
self.db.add(program)
282+
283+
# Test binning for different complexity values
284+
short_bin = self.db._calculate_complexity_bin(len("x=1"))
285+
medium_bin = self.db._calculate_complexity_bin(len("def func():\n return x*2\n pass"))
286+
long_bin = self.db._calculate_complexity_bin(len("def complex_function():\n result = []\n for i in range(100):\n result.append(i*2)\n return result"))
287+
288+
# Bins should be different and within valid range
289+
self.assertNotEqual(short_bin, long_bin)
290+
self.assertGreaterEqual(short_bin, 0)
291+
self.assertLess(short_bin, self.db.feature_bins)
292+
self.assertGreaterEqual(long_bin, 0)
293+
self.assertLess(long_bin, self.db.feature_bins)
294+
295+
def test_calculate_complexity_bin_cold_start(self):
296+
"""Test complexity binning during cold start (< 2 programs)"""
297+
# Empty database - should use fixed range
298+
bin_idx = self.db._calculate_complexity_bin(500)
299+
300+
self.assertGreaterEqual(bin_idx, 0)
301+
self.assertLess(bin_idx, self.db.feature_bins)
302+
303+
# Add one program - still cold start
304+
program = Program(id="single", code="x=1", metrics={"score": 0.5})
305+
self.db.add(program)
306+
307+
bin_idx = self.db._calculate_complexity_bin(500)
308+
self.assertGreaterEqual(bin_idx, 0)
309+
self.assertLess(bin_idx, self.db.feature_bins)
310+
311+
def test_calculate_diversity_bin_adaptive(self):
312+
"""Test adaptive diversity binning with multiple programs"""
313+
# Add programs with different code structures for diversity testing
314+
programs = [
315+
Program(id="simple", code="x = 1", metrics={"score": 0.5}),
316+
Program(id="function", code="def add(a, b):\n return a + b", metrics={"score": 0.5}),
317+
Program(id="loop", code="for i in range(10):\n print(i)\n x += i", metrics={"score": 0.5}),
318+
Program(id="complex", code="class MyClass:\n def __init__(self):\n self.data = []\n def process(self, items):\n return [x*2 for x in items]", metrics={"score": 0.5}),
319+
]
320+
321+
for program in programs:
322+
self.db.add(program)
323+
324+
# Test binning for different diversity values
325+
# Use fast diversity to calculate test values
326+
simple_prog = programs[0]
327+
complex_prog = programs[3]
328+
329+
# Calculate diversity for simple vs complex programs
330+
simple_diversity = self.db._fast_code_diversity(simple_prog.code, complex_prog.code)
331+
332+
# Test the binning
333+
bin_idx = self.db._calculate_diversity_bin(simple_diversity)
334+
335+
# Should be within valid range
336+
self.assertGreaterEqual(bin_idx, 0)
337+
self.assertLess(bin_idx, self.db.feature_bins)
338+
339+
def test_calculate_diversity_bin_cold_start(self):
340+
"""Test diversity binning during cold start (< 2 programs)"""
341+
# Empty database - should use fixed range
342+
bin_idx = self.db._calculate_diversity_bin(500.0)
343+
344+
self.assertGreaterEqual(bin_idx, 0)
345+
self.assertLess(bin_idx, self.db.feature_bins)
346+
347+
# Add one program - still cold start
348+
program = Program(id="single", code="x=1", metrics={"score": 0.5})
349+
self.db.add(program)
350+
351+
bin_idx = self.db._calculate_diversity_bin(500.0)
352+
self.assertGreaterEqual(bin_idx, 0)
353+
self.assertLess(bin_idx, self.db.feature_bins)
354+
355+
def test_calculate_diversity_bin_identical_programs(self):
356+
"""Test diversity binning when all programs have identical diversity"""
357+
# Add multiple identical programs
358+
for i in range(3):
359+
program = Program(
360+
id=f"identical_{i}",
361+
code="x = 1", # Same code
362+
metrics={"score": 0.5}
363+
)
364+
self.db.add(program)
365+
366+
# Test binning - should handle zero range gracefully
367+
bin_idx = self.db._calculate_diversity_bin(0.0)
368+
369+
self.assertGreaterEqual(bin_idx, 0)
370+
self.assertLess(bin_idx, self.db.feature_bins)
371+
372+
def test_fast_code_diversity_function(self):
373+
"""Test the _fast_code_diversity function"""
374+
# Test identical code
375+
code1 = "def test(): pass"
376+
code2 = "def test(): pass"
377+
diversity = self.db._fast_code_diversity(code1, code2)
378+
self.assertEqual(diversity, 0.0)
379+
380+
# Test different code
381+
code1 = "x = 1"
382+
code2 = "def complex_function():\n return [i*2 for i in range(100)]"
383+
diversity = self.db._fast_code_diversity(code1, code2)
384+
self.assertGreater(diversity, 0.0)
385+
386+
# Test length difference
387+
short_code = "x = 1"
388+
long_code = "x = 1" + "a" * 100
389+
diversity = self.db._fast_code_diversity(short_code, long_code)
390+
self.assertGreater(diversity, 0.0)
391+
392+
def test_diversity_feature_integration(self):
393+
"""Test diversity feature calculation in feature coordinates"""
394+
# Add programs with different structures
395+
programs = [
396+
Program(id="prog1", code="x = 1", metrics={"score": 0.5}),
397+
Program(id="prog2", code="def func():\n return 2", metrics={"score": 0.5}),
398+
Program(id="prog3", code="for i in range(5):\n print(i)", metrics={"score": 0.5}),
399+
]
400+
401+
for program in programs:
402+
self.db.add(program)
403+
404+
# Create a test program with diversity feature enabled
405+
test_config = self.db.config
406+
test_config.feature_dimensions = ["score", "complexity", "diversity"]
407+
408+
test_program = Program(id="test", code="def test(): return 42", metrics={"score": 0.7})
409+
410+
# Calculate feature coordinates - should include diversity dimension
411+
coords = self.db._calculate_feature_coords(test_program)
412+
413+
# Should have 3 coordinates for score, complexity, and diversity
414+
self.assertEqual(len(coords), 3)
415+
416+
# All coordinates should be within valid range
417+
for coord in coords:
418+
self.assertGreaterEqual(coord, 0)
419+
self.assertLess(coord, self.db.feature_bins)
420+
271421

272422
if __name__ == "__main__":
273423
unittest.main()

0 commit comments

Comments
 (0)