algorithmicsuperintelligence
diff --git a/‎examples/rust_adaptive_sort/config.yaml‎
Lines changed: 2 additions & 6 deletions b/‎examples/rust_adaptive_sort/config.yaml‎
Lines changed: 2 additions & 6 deletions
diff --git a/‎openevolve/database.py‎
Lines changed: 279 additions & 47 deletions b/‎openevolve/database.py‎
Lines changed: 279 additions & 47 deletions
diff --git a/‎openevolve/evaluator.py‎
Lines changed: 65 additions & 11 deletions b/‎openevolve/evaluator.py‎
Lines changed: 65 additions & 11 deletions
diff --git a/‎openevolve/iteration.py‎
Lines changed: 6 additions & 4 deletions b/‎openevolve/iteration.py‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎setup.py‎
Lines changed: 1 addition & 1 deletion b/‎setup.py‎
Lines changed: 1 addition & 1 deletion
@@ -49,9 +49,5 @@ evaluator:
   timeout: 60  # Rust compilation can take time
   parallel_evaluations: 3
 
-  # Use cascade evaluation for performance testing
-  cascade_evaluation: true
-  cascade_thresholds:
-    - 0.5  # Compilation success and basic correctness
-    - 0.7  # Good performance
-    - 0.85 # Excellent adaptability
+  # Direct evaluation - evaluator doesn't implement cascade functions
+  cascade_evaluation: false
@@ -89,10 +89,42 @@ def _load_evaluation_function(self) -> None:
 
             self.evaluate_function = module.evaluate
             logger.info(f"Successfully loaded evaluation function from {self.evaluation_file}")
+            
+            # Validate cascade configuration
+            self._validate_cascade_configuration(module)
         except Exception as e:
             logger.error(f"Error loading evaluation function: {str(e)}")
             raise
 
+    def _validate_cascade_configuration(self, module) -> None:
+        """
+        Validate cascade evaluation configuration and warn about potential issues
+        
+        Args:
+            module: The loaded evaluation module
+        """
+        if self.config.cascade_evaluation:
+            # Check if cascade functions exist
+            has_stage1 = hasattr(module, "evaluate_stage1")
+            has_stage2 = hasattr(module, "evaluate_stage2") 
+            has_stage3 = hasattr(module, "evaluate_stage3")
+            
+            if not has_stage1:
+                logger.warning(
+                    f"Configuration has 'cascade_evaluation: true' but evaluator "
+                    f"'{self.evaluation_file}' does not define 'evaluate_stage1' function. "
+                    f"This will fall back to direct evaluation, making the cascade setting useless. "
+                    f"Consider setting 'cascade_evaluation: false' or implementing cascade functions."
+                )
+            elif not (has_stage2 or has_stage3):
+                logger.warning(
+                    f"Evaluator '{self.evaluation_file}' defines 'evaluate_stage1' but no additional "
+                    f"cascade stages (evaluate_stage2, evaluate_stage3). Consider implementing "
+                    f"multi-stage evaluation for better cascade benefits."
+                )
+            else:
+                logger.debug(f"Cascade evaluation properly configured with available stage functions")
+
     async def evaluate_program(
         self,
         program_code: str,
@@ -273,15 +305,15 @@ def get_pending_artifacts(self, program_id: str) -> Optional[Dict[str, Union[str
         """
         return self._pending_artifacts.pop(program_id, None)
 
-    async def _direct_evaluate(self, program_path: str) -> Dict[str, float]:
+    async def _direct_evaluate(self, program_path: str) -> Union[Dict[str, float], EvaluationResult]:
         """
         Directly evaluate a program using the evaluation function with timeout
 
         Args:
             program_path: Path to the program file
 
         Returns:
-            Dictionary of metric name to score
+            Dictionary of metrics or EvaluationResult with metrics and artifacts
 
         Raises:
             asyncio.TimeoutError: If evaluation exceeds timeout
@@ -296,11 +328,8 @@ async def run_evaluation():
         # Run the evaluation with timeout - let exceptions bubble up for retry handling
         result = await asyncio.wait_for(run_evaluation(), timeout=self.config.timeout)
 
-        # Validate result
-        if not isinstance(result, dict):
-            logger.warning(f"Evaluation returned non-dictionary result: {result}")
-            return {"error": 0.0}
-
+        # Return result as-is to be processed by _process_evaluation_result
+        # This supports both dict and EvaluationResult returns, just like _cascade_evaluate
         return result
 
     async def _cascade_evaluate(
@@ -354,13 +383,14 @@ async def run_stage1():
                 )
             except Exception as e:
                 logger.error(f"Error in stage 1 evaluation: {str(e)}")
-                # Capture stage 1 failure as artifacts
+                # Capture stage 1 failure with enhanced context
+                error_context = self._create_cascade_error_context("stage1", e)
                 return EvaluationResult(
                     metrics={"stage1_passed": 0.0, "error": 0.0},
                     artifacts={
                         "stderr": str(e),
                         "traceback": traceback.format_exc(),
-                        "failure_stage": "stage1",
+                        **error_context,
                     },
                 )
 
@@ -481,13 +511,14 @@ async def run_stage3():
 
         except Exception as e:
             logger.error(f"Error in cascade evaluation: {str(e)}")
-            # Return proper cascade failure result instead of re-raising
+            # Return proper cascade failure result with enhanced context
+            error_context = self._create_cascade_error_context("cascade_setup", e)
             return EvaluationResult(
                 metrics={"stage1_passed": 0.0, "error": 0.0},
                 artifacts={
                     "stderr": str(e),
                     "traceback": traceback.format_exc(),
-                    "failure_stage": "cascade_setup",
+                    **error_context,
                 },
             )
 
@@ -582,6 +613,29 @@ async def _llm_evaluate(self, program_code: str, program_id: str = "") -> Dict[s
             traceback.print_exc()
             return {}
 
+    def _create_cascade_error_context(self, stage: str, error: Exception) -> dict:
+        """
+        Create rich error context for cascade failures
+        
+        Args:
+            stage: The stage where the error occurred
+            error: The exception that was raised
+            
+        Returns:
+            Dictionary with enhanced error context
+        """
+        import time
+        return {
+            "failure_stage": stage,
+            "error_type": type(error).__name__,
+            "error_message": str(error),
+            "timestamp": time.time(),
+            "cascade_config": self.config.cascade_evaluation,
+            "cascade_thresholds": getattr(self.config, 'cascade_thresholds', []),
+            "timeout_config": self.config.timeout,
+            "evaluation_file": self.evaluation_file,
+        }
+
     def _passes_threshold(self, metrics: Dict[str, float], threshold: float) -> bool:
         """
         Check if metrics pass a threshold
 
@@ -53,16 +53,18 @@ async def run_iteration_with_shared_db(
         # Get artifacts for the parent program if available
         parent_artifacts = database.get_artifacts(parent.id)
 
-        # Get actual top programs for prompt context (separate from inspirations)
-        actual_top_programs = database.get_top_programs(5)
+        # Get island-specific top programs for prompt context (maintain island isolation)
+        parent_island = parent.metadata.get("island", database.current_island)
+        island_top_programs = database.get_top_programs(5, island_idx=parent_island)
+        island_previous_programs = database.get_top_programs(3, island_idx=parent_island)
 
         # Build prompt
         prompt = prompt_sampler.build_prompt(
             current_program=parent.code,
             parent_program=parent.code,
             program_metrics=parent.metrics,
-            previous_programs=[p.to_dict() for p in database.get_top_programs(3)],
-            top_programs=[p.to_dict() for p in actual_top_programs],
+            previous_programs=[p.to_dict() for p in island_previous_programs],
+            top_programs=[p.to_dict() for p in island_top_programs],
             inspirations=[p.to_dict() for p in inspirations],
             language=config.language,
             evolution_round=iteration,
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "openevolve"
-version = "0.0.14"
+version = "0.0.15"
 description = "Open-source implementation of AlphaEvolve"
 readme = "README.md"
 requires-python = ">=3.9"
 
@@ -2,7 +2,7 @@
 
 setup(
     name="openevolve",
-    version="0.0.14",
+    version="0.0.15",
     packages=find_packages(),
     include_package_data=True,
 )
Original file line number	Diff line number	Diff line change
`@@ -2,7 +2,7 @@`
`2`	`2`
`3`	`3`	`setup(`
`4`	`4`	`name="openevolve",`
`5`		`- version="0.0.14",`
	`5`	`+ version="0.0.15",`
`6`	`6`	`packages=find_packages(),`
`7`	`7`	`include_package_data=True,`
`8`	`8`	`)`