Merge pull request #238 from codelion/feat-add-early-stopping

codelion · web-flow · commit 8e8879f25968 · 2025-08-26T18:16:32.000+08:00
add early stopping
diff --git a/configs/default_config.yaml b/configs/default_config.yaml
@@ -13,6 +13,11 @@ random_seed: 42                       # Random seed for reproducibility (null =
 diff_based_evolution: true            # Use diff-based evolution (true) or full rewrites (false)
 max_code_length: 10000                # Maximum allowed code length in characters
 
+# Early stopping settings
+early_stopping_patience: null         # Stop after N iterations without improvement (null = disabled)
+convergence_threshold: 0.001          # Minimum improvement required to reset patience counter
+early_stopping_metric: "combined_score"  # Metric to track for early stopping
+
 # LLM configuration
 llm:
   # Models for evolution
diff --git a/configs/early_stopping_example.yaml b/configs/early_stopping_example.yaml
@@ -0,0 +1,38 @@
+# OpenEvolve Configuration with Early Stopping Example
+# This configuration demonstrates how to use the early stopping feature
+
+# Basic settings
+max_iterations: 1000
+checkpoint_interval: 50
+log_level: "INFO"
+
+# Early stopping configuration - stops evolution if no improvement for 30 iterations
+early_stopping_patience: 30          # Stop after 30 iterations without improvement
+convergence_threshold: 0.01          # Minimum improvement of 0.01 required to reset patience
+early_stopping_metric: "combined_score"  # Track the combined_score metric
+
+# LLM configuration
+llm:
+  models:
+    - name: "gpt-4o-mini"
+      weight: 1.0
+  
+  api_base: "https://api.openai.com/v1"
+  temperature: 0.7
+  max_tokens: 4096
+
+# Database configuration
+database:
+  population_size: 50
+  num_islands: 3
+  migration_interval: 20
+
+# Evaluation settings
+evaluator:
+  timeout: 60
+  max_retries: 2
+  parallel_evaluations: 2
+
+# Evolution settings
+diff_based_evolution: true
+max_code_length: 8000
diff --git a/examples/attention_optimization/configs/failing_config.yaml b/examples/attention_optimization/configs/failing_config.yaml
@@ -35,13 +35,16 @@ checkpoints:
   keep_best: true
   save_all_programs: false
 
+# Early stopping settings (moved to top level)
+early_stopping_patience: 50          # Stop after 50 iterations without improvement
+convergence_threshold: 0.001         # Minimum improvement required
+early_stopping_metric: "speedup"     # Track speedup metric
+
 # Optimization targets
 optimization:
   target_metric: "speedup"
   target_value: 1.32  # 32% speedup like AlphaEvolve paper
   minimize: false
-  convergence_threshold: 0.001
-  early_stopping_patience: 50
 
 # Logging
 logging:
diff --git a/openevolve/_version.py b/openevolve/_version.py
@@ -1,3 +1,3 @@
 """Version information for openevolve package."""
 
-__version__ = "0.2.7"
+__version__ = "0.2.8"
diff --git a/openevolve/config.py b/openevolve/config.py
@@ -271,6 +271,11 @@ class Config:
     # Evolution settings
     diff_based_evolution: bool = True
     max_code_length: int = 10000
+    
+    # Early stopping settings
+    early_stopping_patience: Optional[int] = None
+    convergence_threshold: float = 0.001
+    early_stopping_metric: str = "combined_score"
 
     @classmethod
     def from_yaml(cls, path: Union[str, Path]) -> "Config":
@@ -381,6 +386,10 @@ def to_dict(self) -> Dict[str, Any]:
             # Evolution settings
             "diff_based_evolution": self.diff_based_evolution,
             "max_code_length": self.max_code_length,
+            # Early stopping settings
+            "early_stopping_patience": self.early_stopping_patience,
+            "convergence_threshold": self.convergence_threshold,
+            "early_stopping_metric": self.early_stopping_metric,
         }
 
     def to_yaml(self, path: Union[str, Path]) -> None:
diff --git a/openevolve/process_parallel.py b/openevolve/process_parallel.py
@@ -15,6 +15,7 @@
 
 from openevolve.config import Config
 from openevolve.database import Program, ProgramDatabase
+from openevolve.utils.metrics_utils import safe_numeric_average
 
 logger = logging.getLogger(__name__)
 
@@ -145,8 +146,6 @@ def _run_iteration_worker(
         ]
 
         # Sort by metrics for top programs
-        from openevolve.utils.metrics_utils import safe_numeric_average
-
         island_programs.sort(
             key=lambda p: p.metrics.get("combined_score", safe_numeric_average(p.metrics)),
             reverse=True,
@@ -425,6 +424,17 @@ async def run_evolution(
         # Island management
         programs_per_island = max(1, max_iterations // (self.config.database.num_islands * 10))
         current_island_counter = 0
+        
+        # Early stopping tracking
+        early_stopping_enabled = self.config.early_stopping_patience is not None
+        if early_stopping_enabled:
+            best_score = float('-inf')
+            iterations_without_improvement = 0
+            logger.info(f"Early stopping enabled: patience={self.config.early_stopping_patience}, "
+                       f"threshold={self.config.convergence_threshold}, "
+                       f"metric={self.config.early_stopping_metric}")
+        else:
+            logger.info("Early stopping disabled")
 
         # Process results as they complete
         while (
@@ -519,8 +529,6 @@ async def run_evolution(
                             "combined_score" not in child_program.metrics
                             and not self._warned_about_combined_score
                         ):
-                            from openevolve.utils.metrics_utils import safe_numeric_average
-
                             avg_score = safe_numeric_average(child_program.metrics)
                             logger.warning(
                                 f"⚠️  No 'combined_score' metric found in evaluation results. "
@@ -563,6 +571,40 @@ async def run_evolution(
                                 )
                                 break
 
+                    # Check early stopping
+                    if early_stopping_enabled and child_program.metrics:
+                        # Get the metric to track for early stopping
+                        current_score = None
+                        if self.config.early_stopping_metric in child_program.metrics:
+                            current_score = child_program.metrics[self.config.early_stopping_metric]
+                        elif self.config.early_stopping_metric == "combined_score":
+                            # Default metric not found, use safe average (standard pattern)
+                            current_score = safe_numeric_average(child_program.metrics)
+                        else:
+                            # User specified a custom metric that doesn't exist
+                            logger.warning(f"Early stopping metric '{self.config.early_stopping_metric}' not found, using safe numeric average")
+                            current_score = safe_numeric_average(child_program.metrics)
+
+                        if current_score is not None and isinstance(current_score, (int, float)):
+                            # Check for improvement
+                            improvement = current_score - best_score
+                            if improvement >= self.config.convergence_threshold:
+                                best_score = current_score
+                                iterations_without_improvement = 0
+                                logger.debug(f"New best score: {best_score:.4f} (improvement: {improvement:+.4f})")
+                            else:
+                                iterations_without_improvement += 1
+                                logger.debug(f"No improvement: {iterations_without_improvement}/{self.config.early_stopping_patience}")
+
+                            # Check if we should stop
+                            if iterations_without_improvement >= self.config.early_stopping_patience:
+                                logger.info(
+                                    f"Early stopping triggered at iteration {completed_iteration}: "
+                                    f"No improvement for {iterations_without_improvement} iterations "
+                                    f"(best score: {best_score:.4f})"
+                                )
+                                break
+
             except Exception as e:
                 logger.error(f"Error processing result from iteration {completed_iteration}: {e}")
 

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`"""Version information for openevolve package."""`
`2`	`2`
`3`		`-__version__ = "0.2.7"`
	`3`	`+__version__ = "0.2.8"`