66import json
77import logging
88import os
9- import shutil
109import random
1110import time
1211from dataclasses import asdict , dataclass , field , fields
13- from pathlib import Path
14- from Levenshtein import ratio
1512from filelock import FileLock , Timeout
1613from typing import Any , Dict , List , Optional , Set , Tuple , Union
1714
@@ -358,9 +355,6 @@ def save(self, path: Optional[str] = None, iteration: int = 0) -> None:
358355 lock_path = os .path .join ("tmp/locks" , lock_name )
359356 try :
360357 with FileLock (lock_path , timeout = 10 ):
361- # Create directory and remove old path if it exists
362- # if os.path.exists(save_path):
363- # shutil.rmtree(save_path)
364358 # create directory if it doesn't exist
365359 os .makedirs (save_path , exist_ok = True )
366360
@@ -590,7 +584,7 @@ def _calculate_feature_coords(self, program: Program) -> List[int]:
590584 if dim == "complexity" :
591585 # Use code length as complexity measure
592586 complexity = len (program .code )
593- bin_idx = min (int (complexity / 1000 ), self .feature_bins - 1 )
587+ bin_idx = min (int (complexity / 1000 * self . feature_bins ), self .feature_bins - 1 )
594588 coords .append (bin_idx )
595589 elif dim == "diversity" :
596590 # Use average edit distance to other programs
@@ -600,18 +594,21 @@ def _calculate_feature_coords(self, program: Program) -> List[int]:
600594 sample_programs = random .sample (
601595 list (self .programs .values ()), min (5 , len (self .programs ))
602596 )
603- avg_distance_ratio = sum (
604- 1 - calculate_edit_distance (program .code , other .code ) for other in sample_programs
597+ avg_distance = sum (
598+ calculate_edit_distance (program .code , other .code )
599+ for other in sample_programs
605600 ) / len (sample_programs )
606- bin_idx = min (int (avg_distance_ratio * 20 ), self .feature_bins - 1 )
601+ bin_idx = min (
602+ int (avg_distance / 1000 * self .feature_bins ), self .feature_bins - 1
603+ )
607604 coords .append (bin_idx )
608605 elif dim == "score" :
609606 # Use average of numeric metrics
610607 if not program .metrics :
611608 bin_idx = 0
612609 else :
613610 avg_score = safe_numeric_average (program .metrics )
614- bin_idx = max ( 0 , min (int (avg_score * self .feature_bins ), self .feature_bins - 1 ) )
611+ bin_idx = min (int (avg_score * self .feature_bins ), self .feature_bins - 1 )
615612 coords .append (bin_idx )
616613 elif dim in program .metrics :
617614 # Use specific metric
@@ -960,10 +957,7 @@ def _enforce_population_limit(self) -> None:
960957 """
961958 Enforce the population size limit by removing worst programs if needed
962959 """
963- if (
964- len (self .programs )
965- <= self .config .population_size + self .config .allowed_population_overflow
966- ):
960+ if len (self .programs ) <= self .config .population_size :
967961 return
968962
969963 # Calculate how many programs to remove
@@ -1148,7 +1142,7 @@ def _calculate_island_diversity(self, programs: List[Program]) -> float:
11481142 if len (programs ) < 2 :
11491143 return 0.0
11501144
1151- total_diversity_ratio = 0
1145+ total_diversity = 0
11521146 comparisons = 0
11531147
11541148 # Use deterministic sampling instead of random.sample() to ensure consistent results
@@ -1165,12 +1159,46 @@ def _calculate_island_diversity(self, programs: List[Program]) -> float:
11651159
11661160 for i , prog1 in enumerate (sample_programs ):
11671161 for prog2 in sample_programs [i + 1 :]:
1168- total_diversity_ratio += 1 - ratio (
1169- prog1 .code , prog2 .code
1170- ) # ratio measures similarity
1162+ if comparisons >= max_comparisons :
1163+ break
1164+
1165+ # Use fast approximation instead of expensive edit distance
1166+ diversity = self ._fast_code_diversity (prog1 .code , prog2 .code )
1167+ total_diversity += diversity
11711168 comparisons += 1
11721169
1173- return total_diversity_ratio / max (1 , comparisons )
1170+ if comparisons >= max_comparisons :
1171+ break
1172+
1173+ return total_diversity / max (1 , comparisons )
1174+
1175+ def _fast_code_diversity (self , code1 : str , code2 : str ) -> float :
1176+ """
1177+ Fast approximation of code diversity using simple metrics
1178+
1179+ Returns diversity score (higher = more diverse)
1180+ """
1181+ if code1 == code2 :
1182+ return 0.0
1183+
1184+ # Length difference (scaled to reasonable range)
1185+ len1 , len2 = len (code1 ), len (code2 )
1186+ length_diff = abs (len1 - len2 )
1187+
1188+ # Line count difference
1189+ lines1 = code1 .count ("\n " )
1190+ lines2 = code2 .count ("\n " )
1191+ line_diff = abs (lines1 - lines2 )
1192+
1193+ # Simple character set difference
1194+ chars1 = set (code1 )
1195+ chars2 = set (code2 )
1196+ char_diff = len (chars1 .symmetric_difference (chars2 ))
1197+
1198+ # Combine metrics (scaled to match original edit distance range)
1199+ diversity = length_diff * 0.1 + line_diff * 10 + char_diff * 0.5
1200+
1201+ return diversity
11741202
11751203 def log_island_status (self ) -> None :
11761204 """Log current status of all islands"""
0 commit comments