algorithmicsuperintelligence
diff --git a/‎openevolve/config.py‎
Lines changed: 9 additions & 7 deletions b/‎openevolve/config.py‎
Lines changed: 9 additions & 7 deletions
diff --git a/‎openevolve/controller.py‎
Lines changed: 24 additions & 26 deletions b/‎openevolve/controller.py‎
Lines changed: 24 additions & 26 deletions
diff --git a/‎openevolve/database.py‎
Lines changed: 36 additions & 15 deletions b/‎openevolve/database.py‎
Lines changed: 36 additions & 15 deletions
diff --git a/‎openevolve/iteration.py‎
Lines changed: 9 additions & 12 deletions b/‎openevolve/iteration.py‎
Lines changed: 9 additions & 12 deletions
diff --git a/‎openevolve/llm/ensemble.py‎
Lines changed: 10 additions & 4 deletions b/‎openevolve/llm/ensemble.py‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎openevolve/llm/openai.py‎
Lines changed: 4 additions & 4 deletions b/‎openevolve/llm/openai.py‎
Lines changed: 4 additions & 4 deletions
@@ -32,7 +32,7 @@ class LLMModelConfig:
     timeout: int = None
     retries: int = None
     retry_delay: int = None
-    
+
     # Reproducibility
     random_seed: Optional[int] = None
 
@@ -56,10 +56,12 @@ class LLMConfig(LLMModelConfig):
     retry_delay: int = 5
 
     # n-model configuration for evolution LLM ensemble
-    models: List[LLMModelConfig] = field(default_factory=lambda: [
-        LLMModelConfig(name="gpt-4o-mini", weight=0.8),
-        LLMModelConfig(name="gpt-4o", weight=0.2)
-    ])
+    models: List[LLMModelConfig] = field(
+        default_factory=lambda: [
+            LLMModelConfig(name="gpt-4o-mini", weight=0.8),
+            LLMModelConfig(name="gpt-4o", weight=0.2),
+        ]
+    )
 
     # n-model configuration for evaluator LLM ensemble
     evaluator_models: List[LLMModelConfig] = field(default_factory=lambda: [])
@@ -264,7 +266,7 @@ def from_dict(cls, config_dict: Dict[str, Any]) -> "Config":
             config.prompt = PromptConfig(**config_dict["prompt"])
         if "database" in config_dict:
             config.database = DatabaseConfig(**config_dict["database"])
-        
+
         # Ensure database inherits the random seed if not explicitly set
         if config.database.random_seed is None and config.random_seed is not None:
             config.database.random_seed = config.random_seed
@@ -365,4 +367,4 @@ def load_config(config_path: Optional[Union[str, Path]] = None) -> Config:
     # Make the system message available to the individual models, in case it is not provided from the prompt sampler
     config.llm.update_model_params({"system_message": config.prompt.system_message})
 
-    return config
+    return config
@@ -104,20 +104,20 @@ def __init__(
             # Set global random seeds
             random.seed(self.config.random_seed)
             np.random.seed(self.config.random_seed)
-            
+
             # Create hash-based seeds for different components
-            base_seed = str(self.config.random_seed).encode('utf-8')
-            llm_seed = int(hashlib.md5(base_seed + b'llm').hexdigest()[:8], 16) % (2**31)
-            
+            base_seed = str(self.config.random_seed).encode("utf-8")
+            llm_seed = int(hashlib.md5(base_seed + b"llm").hexdigest()[:8], 16) % (2**31)
+
             # Propagate seed to LLM configurations
             self.config.llm.random_seed = llm_seed
             for model_cfg in self.config.llm.models:
-                if not hasattr(model_cfg, 'random_seed') or model_cfg.random_seed is None:
+                if not hasattr(model_cfg, "random_seed") or model_cfg.random_seed is None:
                     model_cfg.random_seed = llm_seed
             for model_cfg in self.config.llm.evaluator_models:
-                if not hasattr(model_cfg, 'random_seed') or model_cfg.random_seed is None:
+                if not hasattr(model_cfg, "random_seed") or model_cfg.random_seed is None:
                     model_cfg.random_seed = llm_seed
-            
+
             logger.info(f"Set random seed to {self.config.random_seed} for reproducibility")
             logger.debug(f"Generated LLM seed: {llm_seed}")
 
@@ -161,7 +161,7 @@ def __init__(
         self.evaluation_file = evaluation_file
 
         logger.info(f"Initialized OpenEvolve with {initial_program_path}")
-        
+
         # Initialize improved parallel processing components
         self.parallel_controller = None
 
@@ -212,7 +212,7 @@ async def run(
             Best program found
         """
         max_iterations = iterations or self.config.max_iterations
-        
+
         # Determine starting iteration
         start_iteration = 0
         if checkpoint_path and os.path.exists(checkpoint_path):
@@ -260,30 +260,31 @@ async def run(
             self.parallel_controller = ImprovedParallelController(
                 self.config, self.evaluation_file, self.database
             )
-            
+
             # Set up signal handlers for graceful shutdown
             def signal_handler(signum, frame):
                 logger.info(f"Received signal {signum}, initiating graceful shutdown...")
                 self.parallel_controller.request_shutdown()
-                
+
                 # Set up a secondary handler for immediate exit if user presses Ctrl+C again
                 def force_exit_handler(signum, frame):
                     logger.info("Force exit requested - terminating immediately")
                     import sys
+
                     sys.exit(0)
-                
+
                 signal.signal(signal.SIGINT, force_exit_handler)
-            
+
             signal.signal(signal.SIGINT, signal_handler)
             signal.signal(signal.SIGTERM, signal_handler)
-            
+
             self.parallel_controller.start()
-            
+
             # Run evolution with improved parallel processing and checkpoint callback
             await self._run_evolution_with_checkpoints(
                 start_iteration, max_iterations, target_score
             )
-            
+
         finally:
             # Clean up parallel processing resources
             if self.parallel_controller:
@@ -420,31 +421,28 @@ def _load_checkpoint(self, checkpoint_path: str) -> None:
         """Load state from a checkpoint directory"""
         if not os.path.exists(checkpoint_path):
             raise FileNotFoundError(f"Checkpoint directory {checkpoint_path} not found")
-        
+
         logger.info(f"Loading checkpoint from {checkpoint_path}")
         self.database.load(checkpoint_path)
-        logger.info(
-            f"Checkpoint loaded successfully (iteration {self.database.last_iteration})"
-        )
+        logger.info(f"Checkpoint loaded successfully (iteration {self.database.last_iteration})")
 
     async def _run_evolution_with_checkpoints(
         self, start_iteration: int, max_iterations: int, target_score: Optional[float]
     ) -> None:
         """Run evolution with checkpoint saving support"""
         logger.info(f"Using island-based evolution with {self.config.database.num_islands} islands")
         self.database.log_island_status()
-        
+
         # Run the evolution process with checkpoint callback
         await self.parallel_controller.run_evolution(
-            start_iteration, max_iterations, target_score,
-            checkpoint_callback=self._save_checkpoint
+            start_iteration, max_iterations, target_score, checkpoint_callback=self._save_checkpoint
         )
-        
+
         # Check if shutdown was requested
         if self.parallel_controller.shutdown_flag.is_set():
             logger.info("Evolution stopped due to shutdown request")
             return
-        
+
         # Save final checkpoint if needed
         final_iteration = start_iteration + max_iterations - 1
         if final_iteration > 0 and final_iteration % self.config.checkpoint_interval == 0:
@@ -499,4 +497,4 @@ def _save_best_program(self, program: Optional[Program] = None) -> None:
                 indent=2,
             )
 
-        logger.info(f"Saved best program to {code_path} with program info to {info_path}")
+        logger.info(f"Saved best program to {code_path} with program info to {info_path}")
@@ -9,6 +9,7 @@
 import random
 import time
 from dataclasses import asdict, dataclass, field, fields
+
 # FileLock removed - no longer needed with threaded parallel processing
 from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
@@ -198,7 +199,11 @@ def add(
         # Update archive
         self._update_archive(program)
 
-        # Update the absolute best program tracking
+        # Enforce population size limit BEFORE updating best program tracking
+        # This ensures newly added programs aren't immediately removed
+        self._enforce_population_limit(exclude_program_id=program.id)
+
+        # Update the absolute best program tracking (after population enforcement)
         self._update_best_program(program)
 
         # Save to disk if configured
@@ -207,9 +212,6 @@ def add(
 
         logger.debug(f"Added program {program.id} to island {island_idx}")
 
-        # Enforce population size limit
-        self._enforce_population_limit()
-
         return program.id
 
     def get(self, program_id: str) -> Optional[Program]:
@@ -254,9 +256,15 @@ def get_best_program(self, metric: Optional[str] = None) -> Optional[Program]:
             return None
 
         # If no specific metric and we have a tracked best program, return it
-        if metric is None and self.best_program_id and self.best_program_id in self.programs:
-            logger.debug(f"Using tracked best program: {self.best_program_id}")
-            return self.programs[self.best_program_id]
+        if metric is None and self.best_program_id:
+            if self.best_program_id in self.programs:
+                logger.debug(f"Using tracked best program: {self.best_program_id}")
+                return self.programs[self.best_program_id]
+            else:
+                logger.warning(
+                    f"Tracked best program {self.best_program_id} no longer exists, will recalculate"
+                )
+                self.best_program_id = None
 
         if metric:
             # Sort by specific metric
@@ -713,7 +721,15 @@ def _update_best_program(self, program: Program) -> None:
             logger.debug(f"Set initial best program to {program.id}")
             return
 
-        # Compare with current best program
+        # Compare with current best program (if it still exists)
+        if self.best_program_id not in self.programs:
+            logger.warning(
+                f"Best program {self.best_program_id} no longer exists, clearing reference"
+            )
+            self.best_program_id = program.id
+            logger.info(f"Set new best program to {program.id}")
+            return
+
         current_best = self.programs[self.best_program_id]
 
         # Update if the new program is better
@@ -940,9 +956,12 @@ def _sample_inspirations(self, parent: Program, n: int = 5) -> List[Program]:
 
         return inspirations[:n]
 
-    def _enforce_population_limit(self) -> None:
+    def _enforce_population_limit(self, exclude_program_id: Optional[str] = None) -> None:
         """
         Enforce the population size limit by removing worst programs if needed
+
+        Args:
+            exclude_program_id: Program ID to never remove (e.g., newly added program)
         """
         if len(self.programs) <= self.config.population_size:
             return
@@ -963,22 +982,24 @@ def _enforce_population_limit(self) -> None:
             key=lambda p: safe_numeric_average(p.metrics),
         )
 
-        # Remove worst programs, but never remove the best program
+        # Remove worst programs, but never remove the best program or excluded program
         programs_to_remove = []
+        protected_ids = {self.best_program_id, exclude_program_id} - {None}
+
         for program in sorted_programs:
             if len(programs_to_remove) >= num_to_remove:
                 break
-            # Don't remove the best program
-            if program.id != self.best_program_id:
+            # Don't remove the best program or excluded program
+            if program.id not in protected_ids:
                 programs_to_remove.append(program)
 
-        # If we still need to remove more and only have the best program protected,
-        # remove from the remaining programs anyway (but keep the absolute best)
+        # If we still need to remove more and only have protected programs,
+        # remove from the remaining programs anyway (but keep the protected ones)
         if len(programs_to_remove) < num_to_remove:
             remaining_programs = [
                 p
                 for p in sorted_programs
-                if p not in programs_to_remove and p.id != self.best_program_id
+                if p not in programs_to_remove and p.id not in protected_ids
             ]
             additional_removals = remaining_programs[: num_to_remove - len(programs_to_remove)]
             programs_to_remove.extend(additional_removals)
 
@@ -31,24 +31,21 @@ class Result:
     artifacts: dict = None
 
 
-
-
-
 async def run_iteration_with_shared_db(
-    iteration: int, 
-    config: Config, 
+    iteration: int,
+    config: Config,
     database: ProgramDatabase,
     evaluator: Evaluator,
     llm_ensemble: LLMEnsemble,
-    prompt_sampler: PromptSampler
+    prompt_sampler: PromptSampler,
 ):
     """
     Run a single iteration using shared memory database
-    
+
     This is optimized for use with persistent worker processes.
     """
     logger = logging.getLogger(__name__)
-    
+
     try:
         # Sample parent and inspirations from database
         parent, inspirations = database.sample()
@@ -115,10 +112,10 @@ async def run_iteration_with_shared_db(
         # Evaluate the child program
         child_id = str(uuid.uuid4())
         result.child_metrics = await evaluator.evaluate_program(child_code, child_id)
-        
+
         # Handle artifacts if they exist
         artifacts = evaluator.get_pending_artifacts(child_id)
-        
+
         # Create a child program
         result.child_program = Program(
             id=child_id,
@@ -133,13 +130,13 @@ async def run_iteration_with_shared_db(
                 "parent_metrics": parent.metrics,
             },
         )
-        
+
         result.prompt = prompt
         result.llm_response = llm_response
         result.artifacts = artifacts
         result.iteration_time = time.time() - iteration_start
         result.iteration = iteration
-        
+
         return result
 
     except Exception as e:
 
@@ -27,16 +27,22 @@ def __init__(self, models_cfg: List[LLMModelConfig]):
         self.weights = [model.weight for model in models_cfg]
         total = sum(self.weights)
         self.weights = [w / total for w in self.weights]
-        
+
         # Set up random state for deterministic model selection
         self.random_state = random.Random()
         # Initialize with seed from first model's config if available
-        if models_cfg and hasattr(models_cfg[0], 'random_seed') and models_cfg[0].random_seed is not None:
+        if (
+            models_cfg
+            and hasattr(models_cfg[0], "random_seed")
+            and models_cfg[0].random_seed is not None
+        ):
             self.random_state.seed(models_cfg[0].random_seed)
-            logger.debug(f"LLMEnsemble: Set random seed to {models_cfg[0].random_seed} for deterministic model selection")
+            logger.debug(
+                f"LLMEnsemble: Set random seed to {models_cfg[0].random_seed} for deterministic model selection"
+            )
 
         # Only log if we have multiple models or this is the first ensemble
-        if len(models_cfg) > 1 or not hasattr(logger, '_ensemble_logged'):
+        if len(models_cfg) > 1 or not hasattr(logger, "_ensemble_logged"):
             logger.info(
                 f"Initialized LLM ensemble with models: "
                 + ", ".join(
 
@@ -32,7 +32,7 @@ def __init__(
         self.retry_delay = model_cfg.retry_delay
         self.api_base = model_cfg.api_base
         self.api_key = model_cfg.api_key
-        self.random_seed = getattr(model_cfg, 'random_seed', None)
+        self.random_seed = getattr(model_cfg, "random_seed", None)
 
         # Set up API client
         self.client = openai.OpenAI(
@@ -41,9 +41,9 @@ def __init__(
         )
 
         # Only log unique models to reduce duplication
-        if not hasattr(logger, '_initialized_models'):
+        if not hasattr(logger, "_initialized_models"):
             logger._initialized_models = set()
-        
+
         if self.model not in logger._initialized_models:
             logger.info(f"Initialized OpenAI LLM with model: {self.model}")
             logger._initialized_models.add(self.model)
@@ -80,7 +80,7 @@ async def generate_with_context(
                 "top_p": kwargs.get("top_p", self.top_p),
                 "max_tokens": kwargs.get("max_tokens", self.max_tokens),
             }
-        
+
         # Add seed parameter for reproducibility if configured
         # Skip seed parameter for Google AI Studio endpoint as it doesn't support it
         seed = kwargs.get("seed", self.random_seed)