init

MashAliK · MashAliK · commit eb1421b0e4f4 · 2025-05-25T23:49:44.000-04:00
diff --git a/examples/function_minimization/README.md b/examples/function_minimization/README.md
@@ -62,56 +62,73 @@ def search_algorithm(iterations=1000, bounds=(-5, 5)):
 After running OpenEvolve, it discovered a simulated annealing algorithm with a completely different approach:
 
 ```python
-def simulated_annealing(bounds=(-5, 5), iterations=1000, step_size=0.1, initial_temperature=100, cooling_rate=0.99):
+def search_algorithm(bounds=(-5, 5), iterations=2000, initial_temperature=100, cooling_rate=0.97, step_size_factor=0.2, step_size_increase_threshold=20):
     """
     Simulated Annealing algorithm for function minimization.
     
     Args:
         bounds: Bounds for the search space (min, max)
         iterations: Number of iterations to run
-        step_size: Step size for perturbing the solution
         initial_temperature: Initial temperature for the simulated annealing process
         cooling_rate: Cooling rate for the simulated annealing process
-        
+        step_size_factor: Factor to scale the initial step size by the range
+        step_size_increase_threshold: Number of iterations without improvement before increasing step size
+
     Returns:
         Tuple of (best_x, best_y, best_value)
     """
-    # Initialize with a random point
+    # Initialize
     best_x = np.random.uniform(bounds[0], bounds[1])
     best_y = np.random.uniform(bounds[0], bounds[1])
     best_value = evaluate_function(best_x, best_y)
 
     current_x, current_y = best_x, best_y
     current_value = best_value
     temperature = initial_temperature
+    step_size = (bounds[1] - bounds[0]) * step_size_factor  # Initial step size
+    min_temperature = 1e-6 # Avoid premature convergence
+    no_improvement_count = 0 # Counter for tracking stagnation
+
+    for i in range(iterations):
+        # Adaptive step size and temperature control
+        if i > iterations * 0.75:  # Reduce step size towards the end
+            step_size *= 0.5
+        if no_improvement_count > step_size_increase_threshold: # Increase step size if stuck
+            step_size *= 1.1
+            no_improvement_count = 0 # Reset the counter
+
+        step_size = min(step_size, (bounds[1] - bounds[0]) * 0.5) # Limit step size
 
-    for _ in range(iterations):
-        # Perturb the current solution
         new_x = current_x + np.random.uniform(-step_size, step_size)
         new_y = current_y + np.random.uniform(-step_size, step_size)
 
-        # Ensure the new solution is within bounds
+        # Keep the new points within the bounds
         new_x = max(bounds[0], min(new_x, bounds[1]))
         new_y = max(bounds[0], min(new_y, bounds[1]))
 
         new_value = evaluate_function(new_x, new_y)
 
-        # Calculate the acceptance probability
         if new_value < current_value:
+            # Accept the move if it's better
             current_x, current_y = new_x, new_y
             current_value = new_value
+            no_improvement_count = 0  # Reset counter
 
             if new_value < best_value:
+                # Update the best found solution
                 best_x, best_y = new_x, new_y
                 best_value = new_value
         else:
+            # Accept with a certain probability (Simulated Annealing)
             probability = np.exp((current_value - new_value) / temperature)
             if np.random.rand() < probability:
                 current_x, current_y = new_x, new_y
                 current_value = new_value
+                no_improvement_count = 0  # Reset counter
+            else:
+                no_improvement_count += 1 # Increment counter if not improving
 
-        # Cool down the temperature
-        temperature *= cooling_rate
+        temperature = max(temperature * cooling_rate, min_temperature) #Cool down
 
     return best_x, best_y, best_value
 ```
@@ -120,41 +137,31 @@ def simulated_annealing(bounds=(-5, 5), iterations=1000, step_size=0.1, initial_
 
 Through evolutionary iterations, OpenEvolve discovered several key algorithmic concepts:
 
-1. **Local Search**: Instead of random sampling across the entire space, the evolved algorithm makes small perturbations to promising solutions:
-   ```python
-   new_x = current_x + np.random.uniform(-step_size, step_size)
-   new_y = current_y + np.random.uniform(-step_size, step_size)
-   ```
-
-2. **Temperature-based Acceptance**: The algorithm can escape local minima by occasionally accepting worse solutions:
-   ```python
-   probability = np.exp((current_value - new_value) / temperature)
-   if np.random.rand() < probability:
-       current_x, current_y = new_x, new_y
-       current_value = new_value
-   ```
-
-3. **Cooling Schedule**: The temperature gradually decreases, transitioning from exploration to exploitation:
-   ```python
-   temperature *= cooling_rate
-   ```
-
-4. **Parameter Introduction**: The system discovered the need for additional parameters to control the algorithm's behavior:
-   ```python
-   def simulated_annealing(bounds=(-5, 5), iterations=1000, step_size=0.1, initial_temperature=100, cooling_rate=0.99):
-   ```
+1. **Memory and Exploitation**: The evolved algorithm tracks and updates the best solution seen so far, allowing for continual improvement rather than random restarting.
+
+2. **Exploration via Temperature**: Simulated annealing uses a “temperature” parameter to allow uphill moves early in the search, helping escape local minima that would trap simpler methods.
+
+3. **Adaptive Step Size**: The step size is adjusted dynamically—shrinking as the search converges and expanding if progress stalls—leading to better coverage and faster convergence.
+
+4. **Bounded Moves**: The algorithm ensures all candidate solutions remain within the feasible domain, avoiding wasted evaluations.
+
+5. **Stagnation Handling**: By counting iterations without improvement, the algorithm responds by boosting exploration when progress stalls.
+
+6. **Probabilistic Acceptance**: Moves to worse solutions are allowed with a probability that decays over time, providing a principled way to balance exploration and exploitation.
 
 ## Results
 
 The evolved algorithm shows substantial improvement in finding better solutions:
 
 | Metric | Value |
 |--------|-------|
-| Value Score | 0.677 |
-| Distance Score | 0.258 |
+| Value Score | 0.990 |
+| Distance Score | 0.921 |
+| Standard Deviation Score | 0.900 |
+| Speed Score | 0.466 |
 | Reliability Score | 1.000 |
-| Overall Score | 0.917 |
-| Combined Score | 0.584 |
+| Overall Score | 0.984 |
+| Combined Score | 0.922 |
 
 The simulated annealing algorithm:
 - Achieves higher quality solutions (closer to the global minimum)
diff --git a/examples/function_minimization/evaluator.py b/examples/function_minimization/evaluator.py
@@ -5,10 +5,8 @@
 import importlib.util
 import numpy as np
 import time
-import concurrent.futures
-import threading
+import multiprocessing
 import traceback
-import sys
 
 
 def run_with_timeout(func, args=(), kwargs={}, timeout_seconds=5):
@@ -24,14 +22,30 @@ def run_with_timeout(func, args=(), kwargs={}, timeout_seconds=5):
     Returns:
         Result of the function or raises TimeoutError
     """
-    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
-        future = executor.submit(func, *args, **kwargs)
+    def wrapper(queue, func, args, kwargs):
         try:
-            return future.result(timeout=timeout_seconds)
-        except concurrent.futures.TimeoutError:
-            raise TimeoutError(
-                f"Function {func.__name__} timed out after {timeout_seconds} seconds"
-            )
+            result = func(*args, **kwargs)
+            queue.put(('success', result))
+        except Exception as e:
+            queue.put(('error', e))
+    
+    queue = multiprocessing.Queue()
+    process = multiprocessing.Process(target=wrapper, args=(queue, func, args, kwargs))
+    process.start()
+    process.join(timeout=timeout_seconds)
+    
+    if process.is_alive():
+        process.terminate()
+        process.join()
+        raise TimeoutError(f"Function timed out after {timeout_seconds} seconds")
+    
+    if queue.empty():
+        raise TimeoutError("Function ended without returning a result")
+    
+    status, result = queue.get()
+    if status == 'error':
+        raise result
+    return result
 
 
 def safe_float(value):
@@ -78,6 +92,8 @@ def evaluate(program_path):
 
         # Run multiple trials
         num_trials = 10
+        x_values = []
+        y_values = []
         values = []
         distances = []
         times = []
@@ -119,14 +135,15 @@ def evaluate(program_path):
                     continue
 
                 # Calculate metrics
-                x_diff = safe_float(x) - GLOBAL_MIN_X
-                y_diff = safe_float(y) - GLOBAL_MIN_Y
+                x_diff = x - GLOBAL_MIN_X
+                y_diff = y - GLOBAL_MIN_Y
                 distance_to_global = np.sqrt(x_diff**2 + y_diff**2)
-                value_difference = abs(value - GLOBAL_MIN_VALUE)
 
-                values.append(float(value))
-                distances.append(float(distance_to_global))
-                times.append(float(end_time - start_time))
+                x_values.append(x)
+                y_values.append(y)
+                values.append(value)
+                distances.append(distance_to_global)
+                times.append(end_time - start_time)
                 success_count += 1
 
             except TimeoutError as e:
@@ -164,6 +181,11 @@ def evaluate(program_path):
         distance_score = float(1.0 / (1.0 + avg_distance))
         speed_score = float(1.0 / avg_time) if avg_time > 0 else 0.0
 
+        # calculate standard deviation scores
+        x_std_score = float(1.0 / (1.0 + np.std(x_values)))
+        y_std_score = float(1.0 / (1.0 + np.std(x_values)))
+        standard_deviation_score = (x_std_score + y_std_score) / 2.0
+
         # Normalize speed score (so it doesn't dominate)
         speed_score = float(min(speed_score, 10.0) / 10.0)
 
@@ -175,7 +197,7 @@ def evaluate(program_path):
         # Value and distance scores (quality of solution) get 90% of the weight
         # Speed and reliability get only 10% combined
         combined_score = float(
-            0.6 * value_score + 0.3 * distance_score + 0.05 * speed_score + 0.05 * reliability_score
+            0.35 * value_score + 0.35 * distance_score + standard_deviation_score * 0.20 + 0.05 * speed_score + 0.05 * reliability_score
         )
 
         # Also compute an "overall" score that will be the primary metric for selection
@@ -194,6 +216,7 @@ def evaluate(program_path):
         return {
             "value_score": value_score,
             "distance_score": distance_score,
+            "standard_deviation_score": standard_deviation_score,
             "speed_score": speed_score,
             "reliability_score": reliability_score,
             "combined_score": combined_score,
@@ -282,8 +305,6 @@ def evaluate_stage1(program_path):
             # Basic metrics with overall score
             return {
                 "runs_successfully": 1.0,
-                "value": float(value),
-                "distance": distance,
                 "value_score": value_score,
                 "distance_score": distance_score,
                 "overall_score": solution_quality,  # This becomes a strong guiding metric