more config fixes

codelion · codelion · commit 5782888d955f · 2025-06-02T10:42:51.000+08:00
diff --git a/configs/default_config.yaml b/configs/default_config.yaml
@@ -55,9 +55,7 @@ prompt:
       - "I suggest the following improvements:"
       - "We can enhance this code by:"
 
-  # Meta-prompting (experimental)
-  use_meta_prompting: false           # Use LLM to generate parts of the prompt
-  meta_prompt_weight: 0.1             # Weight for meta-prompting influence
+  # Note: meta-prompting features are not yet implemented
 
 # Database configuration
 database:
@@ -80,7 +78,7 @@ database:
   elite_selection_ratio: 0.1          # Ratio of elite programs to select
   exploration_ratio: 0.2              # Ratio of exploration vs exploitation
   exploitation_ratio: 0.7             # Ratio of exploitation vs random selection
-  diversity_metric: "edit_distance"   # Diversity metric (edit_distance, feature_based)
+  # Note: diversity_metric is fixed to "edit_distance" (feature_based not implemented)
 
   # Feature map dimensions for MAP-Elites
   feature_dimensions:                 # Dimensions for MAP-Elites feature map
@@ -94,9 +92,7 @@ evaluator:
   timeout: 300                        # Maximum evaluation time in seconds
   max_retries: 3                      # Maximum number of retries for evaluation
 
-  # Resource limits
-  memory_limit_mb: null               # Memory limit for evaluation (null = no limit)
-  cpu_limit: null                     # CPU limit for evaluation (null = no limit)
+  # Note: resource limits (memory_limit_mb, cpu_limit) are not yet implemented
 
   # Evaluation strategies
   cascade_evaluation: true            # Use cascade evaluation to filter bad solutions early
@@ -107,7 +103,7 @@ evaluator:
 
   # Parallel evaluation
   parallel_evaluations: 4             # Number of parallel evaluations
-  distributed: false                  # Use distributed evaluation
+  # Note: distributed evaluation is not yet implemented
 
   # LLM-based feedback (experimental)
   use_llm_feedback: false             # Use LLM to evaluate code quality
diff --git a/configs/island_config_example.yaml b/configs/island_config_example.yaml
@@ -30,6 +30,7 @@ database:
   elite_selection_ratio: 0.1
   exploration_ratio: 0.3
   exploitation_ratio: 0.7
+  # Note: diversity_metric fixed to "edit_distance"
   
   # Feature map dimensions for MAP-Elites
   feature_dimensions: ["score", "complexity"]
diff --git a/openevolve/config.py b/openevolve/config.py
@@ -195,8 +195,9 @@ def to_dict(self) -> Dict[str, Any]:
                 "num_diverse_programs": self.prompt.num_diverse_programs,
                 "use_template_stochasticity": self.prompt.use_template_stochasticity,
                 "template_variations": self.prompt.template_variations,
-                "use_meta_prompting": self.prompt.use_meta_prompting,
-                "meta_prompt_weight": self.prompt.meta_prompt_weight,
+                # Note: meta-prompting features not implemented
+                # "use_meta_prompting": self.prompt.use_meta_prompting,
+                # "meta_prompt_weight": self.prompt.meta_prompt_weight,
             },
             "database": {
                 "db_path": self.database.db_path,
@@ -207,7 +208,8 @@ def to_dict(self) -> Dict[str, Any]:
                 "elite_selection_ratio": self.database.elite_selection_ratio,
                 "exploration_ratio": self.database.exploration_ratio,
                 "exploitation_ratio": self.database.exploitation_ratio,
-                "diversity_metric": self.database.diversity_metric,
+                # Note: diversity_metric fixed to "edit_distance"
+                # "diversity_metric": self.database.diversity_metric,
                 "feature_dimensions": self.database.feature_dimensions,
                 "feature_bins": self.database.feature_bins,
                 "migration_interval": self.database.migration_interval,
@@ -217,12 +219,14 @@ def to_dict(self) -> Dict[str, Any]:
             "evaluator": {
                 "timeout": self.evaluator.timeout,
                 "max_retries": self.evaluator.max_retries,
-                "memory_limit_mb": self.evaluator.memory_limit_mb,
-                "cpu_limit": self.evaluator.cpu_limit,
+                # Note: resource limits not implemented
+                # "memory_limit_mb": self.evaluator.memory_limit_mb,
+                # "cpu_limit": self.evaluator.cpu_limit,
                 "cascade_evaluation": self.evaluator.cascade_evaluation,
                 "cascade_thresholds": self.evaluator.cascade_thresholds,
                 "parallel_evaluations": self.evaluator.parallel_evaluations,
-                "distributed": self.evaluator.distributed,
+                # Note: distributed evaluation not implemented
+                # "distributed": self.evaluator.distributed,
                 "use_llm_feedback": self.evaluator.use_llm_feedback,
                 "llm_feedback_weight": self.evaluator.llm_feedback_weight,
             },
diff --git a/openevolve/database.py b/openevolve/database.py
@@ -130,6 +130,9 @@ def add(
 
         self.programs[program.id] = program
 
+        # Enforce population size limit
+        self._enforce_population_limit()
+
         # Calculate feature coordinates for MAP-Elites
         feature_coords = self._calculate_feature_coords(program)
 
@@ -552,25 +555,23 @@ def _sample_parent(self) -> Program:
         Returns:
             Parent program from current island
         """
-        # Decide between exploitation and exploration
-        if random.random() < self.config.exploitation_ratio and self.archive:
-            # Even for exploitation, prefer programs from current island
-            archive_programs_in_island = [
-                pid
-                for pid in self.archive
-                if pid in self.programs
-                and self.programs[pid].metadata.get("island") == self.current_island
-            ]
-
-            if archive_programs_in_island:
-                parent_id = random.choice(archive_programs_in_island)
-                return self.programs[parent_id]
-            else:
-                # Fall back to any archive program if current island has none
-                parent_id = random.choice(list(self.archive))
-                return self.programs[parent_id]
+        # Use exploration_ratio and exploitation_ratio to decide sampling strategy
+        rand_val = random.random()
+        
+        if rand_val < self.config.exploration_ratio:
+            # EXPLORATION: Sample from current island (diverse sampling)
+            return self._sample_exploration_parent()
+        elif rand_val < self.config.exploration_ratio + self.config.exploitation_ratio:
+            # EXPLOITATION: Sample from archive (elite programs)
+            return self._sample_exploitation_parent()
+        else:
+            # RANDOM: Sample from any program (remaining probability)
+            return self._sample_random_parent()
 
-        # Exploration: Sample from current island only
+    def _sample_exploration_parent(self) -> Program:
+        """
+        Sample a parent for exploration (from current island)
+        """
         current_island_programs = self.islands[self.current_island]
 
         if not current_island_programs:
@@ -589,6 +590,41 @@ def _sample_parent(self) -> Program:
         # Sample from current island
         parent_id = random.choice(list(current_island_programs))
         return self.programs[parent_id]
+    
+    def _sample_exploitation_parent(self) -> Program:
+        """
+        Sample a parent for exploitation (from archive/elite programs)
+        """
+        if not self.archive:
+            # Fallback to exploration if no archive
+            return self._sample_exploration_parent()
+            
+        # Prefer programs from current island in archive
+        archive_programs_in_island = [
+            pid
+            for pid in self.archive
+            if pid in self.programs
+            and self.programs[pid].metadata.get("island") == self.current_island
+        ]
+
+        if archive_programs_in_island:
+            parent_id = random.choice(archive_programs_in_island)
+            return self.programs[parent_id]
+        else:
+            # Fall back to any archive program if current island has none
+            parent_id = random.choice(list(self.archive))
+            return self.programs[parent_id]
+    
+    def _sample_random_parent(self) -> Program:
+        """
+        Sample a completely random parent from all programs
+        """
+        if not self.programs:
+            raise ValueError("No programs available for sampling")
+        
+        # Sample randomly from all programs
+        program_id = random.choice(list(self.programs.keys()))
+        return self.programs[program_id]
 
     def _sample_inspirations(self, parent: Program, n: int = 5) -> List[Program]:
         """
@@ -616,14 +652,17 @@ def _sample_inspirations(self, parent: Program, n: int = 5) -> List[Program]:
             if program.id not in [p.id for p in inspirations] and program.id != parent.id:
                 inspirations.append(program)
 
-        # Add diverse programs
+        # Add diverse programs using config.num_diverse_programs
         if len(self.programs) > n and len(inspirations) < n:
-            # Sample from different feature cells
+            # Calculate how many diverse programs to add (up to remaining slots)
+            remaining_slots = n - len(inspirations)
+            
+            # Sample from different feature cells for diversity
             feature_coords = self._calculate_feature_coords(parent)
 
             # Get programs from nearby feature cells
             nearby_programs = []
-            for _ in range(n - len(inspirations)):
+            for _ in range(remaining_slots):
                 # Perturb coordinates
                 perturbed_coords = [
                     max(0, min(self.feature_bins - 1, c + random.randint(-1, 1)))
@@ -657,6 +696,70 @@ def _sample_inspirations(self, parent: Program, n: int = 5) -> List[Program]:
 
         return inspirations[:n]
 
+    def _enforce_population_limit(self) -> None:
+        """
+        Enforce the population size limit by removing worst programs if needed
+        """
+        if len(self.programs) <= self.config.population_size:
+            return
+
+        # Calculate how many programs to remove
+        num_to_remove = len(self.programs) - self.config.population_size
+        
+        logger.info(f"Population size ({len(self.programs)}) exceeds limit ({self.config.population_size}), removing {num_to_remove} programs")
+
+        # Get programs sorted by fitness (worst first)
+        all_programs = list(self.programs.values())
+        
+        # Sort by average metric (worst first)
+        sorted_programs = sorted(
+            all_programs,
+            key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics)) if p.metrics else 0.0
+        )
+        
+        # Remove worst programs, but never remove the best program
+        programs_to_remove = []
+        for program in sorted_programs:
+            if len(programs_to_remove) >= num_to_remove:
+                break
+            # Don't remove the best program
+            if program.id != self.best_program_id:
+                programs_to_remove.append(program)
+                
+        # If we still need to remove more and only have the best program protected,
+        # remove from the remaining programs anyway (but keep the absolute best)
+        if len(programs_to_remove) < num_to_remove:
+            remaining_programs = [p for p in sorted_programs if p not in programs_to_remove and p.id != self.best_program_id]
+            additional_removals = remaining_programs[:num_to_remove - len(programs_to_remove)]
+            programs_to_remove.extend(additional_removals)
+        
+        # Remove the selected programs
+        for program in programs_to_remove:
+            program_id = program.id
+            
+            # Remove from main programs dict
+            if program_id in self.programs:
+                del self.programs[program_id]
+            
+            # Remove from feature map
+            keys_to_remove = []
+            for key, pid in self.feature_map.items():
+                if pid == program_id:
+                    keys_to_remove.append(key)
+            for key in keys_to_remove:
+                del self.feature_map[key]
+            
+            # Remove from islands
+            for island in self.islands:
+                island.discard(program_id)
+            
+            # Remove from archive
+            self.archive.discard(program_id)
+            
+            logger.debug(f"Removed program {program_id} due to population limit")
+        
+        logger.info(f"Population size after cleanup: {len(self.programs)}")
+
     # Island management methods
     def set_current_island(self, island_idx: int) -> None:
         """Set which island is currently being evolved"""
diff --git a/openevolve/evaluator.py b/openevolve/evaluator.py
@@ -89,46 +89,57 @@ async def evaluate_program(
             Dictionary of metric name to score
         """
         start_time = time.time()
+        program_id_str = f" {program_id}" if program_id else ""
+        
+        # Retry logic for evaluation
+        last_exception = None
+        for attempt in range(self.config.max_retries + 1):
+            # Create a temporary file for the program
+            with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as temp_file:
+                temp_file.write(program_code.encode("utf-8"))
+                temp_file_path = temp_file.name
 
-        # Create a temporary file for the program
-        with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as temp_file:
-            temp_file.write(program_code.encode("utf-8"))
-            temp_file_path = temp_file.name
+            try:
+                # Run evaluation
+                if self.config.cascade_evaluation:
+                    # Run cascade evaluation
+                    metrics = await self._cascade_evaluate(temp_file_path)
+                else:
+                    # Run direct evaluation
+                    metrics = await self._direct_evaluate(temp_file_path)
 
-        try:
-            # Run evaluation
-            if self.config.cascade_evaluation:
-                # Run cascade evaluation
-                metrics = await self._cascade_evaluate(temp_file_path)
-            else:
-                # Run direct evaluation
-                metrics = await self._direct_evaluate(temp_file_path)
-
-            # Add LLM feedback if configured
-            if self.config.use_llm_feedback and self.llm_ensemble:
-                feedback_metrics = await self._llm_evaluate(program_code)
-
-                # Combine metrics
-                for name, value in feedback_metrics.items():
-                    metrics[f"llm_{name}"] = value * self.config.llm_feedback_weight
-
-            elapsed = time.time() - start_time
-            program_id_str = f" {program_id}" if program_id else ""
-            logger.info(
-                f"Evaluated program{program_id_str} in {elapsed:.2f}s: "
-                f"{', '.join(f'{name}={value:.4f}' for name, value in metrics.items())}"
-            )
-
-            return metrics
+                # Add LLM feedback if configured
+                if self.config.use_llm_feedback and self.llm_ensemble:
+                    feedback_metrics = await self._llm_evaluate(program_code)
 
-        except Exception as e:
-            logger.error(f"Error evaluating program: {str(e)}")
-            return {"error": 0.0}
+                    # Combine metrics
+                    for name, value in feedback_metrics.items():
+                        metrics[f"llm_{name}"] = value * self.config.llm_feedback_weight
 
-        finally:
-            # Clean up temporary file
-            if os.path.exists(temp_file_path):
-                os.unlink(temp_file_path)
+                elapsed = time.time() - start_time
+                logger.info(
+                    f"Evaluated program{program_id_str} in {elapsed:.2f}s: "
+                    f"{', '.join(f'{name}={value:.4f}' for name, value in metrics.items())}"
+                )
+
+                return metrics
+
+            except Exception as e:
+                last_exception = e
+                logger.warning(f"Evaluation attempt {attempt + 1}/{self.config.max_retries + 1} failed for program{program_id_str}: {str(e)}")
+                
+                # If this is not the last attempt, wait a bit before retrying
+                if attempt < self.config.max_retries:
+                    await asyncio.sleep(1.0)  # Wait 1 second before retry
+                    
+            finally:
+                # Clean up temporary file
+                if os.path.exists(temp_file_path):
+                    os.unlink(temp_file_path)
+        
+        # All retries failed
+        logger.error(f"All evaluation attempts failed for program{program_id_str}. Last error: {str(last_exception)}")
+        return {"error": 0.0}
 
     @run_in_executor
     def _direct_evaluate(self, program_path: str) -> Dict[str, float]:
diff --git a/openevolve/prompt/sampler.py b/openevolve/prompt/sampler.py