diff --git a/examples/rust_adaptive_sort/config.yaml b/examples/rust_adaptive_sort/config.yaml
index 0f5649d5f..497942891 100644
--- a/examples/rust_adaptive_sort/config.yaml
+++ b/examples/rust_adaptive_sort/config.yaml
@@ -49,9 +49,5 @@ evaluator:
   timeout: 60  # Rust compilation can take time
   parallel_evaluations: 3
   
-  # Use cascade evaluation for performance testing
-  cascade_evaluation: true
-  cascade_thresholds:
-    - 0.5  # Compilation success and basic correctness
-    - 0.7  # Good performance
-    - 0.85 # Excellent adaptability
\ No newline at end of file
+  # Direct evaluation - evaluator doesn't implement cascade functions
+  cascade_evaluation: false
\ No newline at end of file
diff --git a/openevolve/database.py b/openevolve/database.py
index 8aba55283..253b66fd5 100644
--- a/openevolve/database.py
+++ b/openevolve/database.py
@@ -122,6 +122,9 @@ def __init__(self, config: DatabaseConfig):
 
         # Track the absolute best program separately
         self.best_program_id: Optional[str] = None
+        
+        # Track best program per island for proper island-based evolution
+        self.island_best_programs: List[Optional[str]] = [None] * config.num_islands
 
         # Track the last iteration number (for resuming)
         self.last_iteration: int = 0
@@ -186,6 +189,28 @@ def add(
                 should_replace = self._is_better(program, self.programs[existing_program_id])
 
         if should_replace:
+            # Log significant MAP-Elites events
+            coords_dict = {self.config.feature_dimensions[i]: feature_coords[i] for i in range(len(feature_coords))}
+            
+            if feature_key not in self.feature_map:
+                # New cell occupation
+                logging.info("New MAP-Elites cell occupied: %s", coords_dict)
+                # Check coverage milestone
+                total_possible_cells = self.feature_bins ** len(self.config.feature_dimensions)
+                coverage = (len(self.feature_map) + 1) / total_possible_cells
+                if coverage in [0.1, 0.25, 0.5, 0.75, 0.9]:
+                    logging.info("MAP-Elites coverage reached %.1f%% (%d/%d cells)", 
+                               coverage * 100, len(self.feature_map) + 1, total_possible_cells)
+            else:
+                # Cell replacement - existing program being replaced
+                existing_program_id = self.feature_map[feature_key]
+                if existing_program_id in self.programs:
+                    existing_program = self.programs[existing_program_id]
+                    new_fitness = safe_numeric_average(program.metrics)
+                    existing_fitness = safe_numeric_average(existing_program.metrics)
+                    logging.info("MAP-Elites cell improved: %s (fitness: %.3f -> %.3f)", 
+                               coords_dict, existing_fitness, new_fitness)
+            
             self.feature_map[feature_key] = program.id
 
         # Add to specific island (not random!)
@@ -205,6 +230,9 @@ def add(
 
         # Update the absolute best program tracking (after population enforcement)
         self._update_best_program(program)
+        
+        # Update island-specific best program tracking
+        self._update_island_best_program(program, island_idx)
 
         # Save to disk if configured
         if self.config.db_path:
@@ -315,13 +343,14 @@ def get_best_program(self, metric: Optional[str] = None) -> Optional[Program]:
 
         return sorted_programs[0] if sorted_programs else None
 
-    def get_top_programs(self, n: int = 10, metric: Optional[str] = None) -> List[Program]:
+    def get_top_programs(self, n: int = 10, metric: Optional[str] = None, island_idx: Optional[int] = None) -> List[Program]:
         """
         Get the top N programs based on a metric
 
         Args:
             n: Number of programs to return
             metric: Metric to use for ranking (uses average if None)
+            island_idx: If specified, only return programs from this island
 
         Returns:
             List of top programs
@@ -329,17 +358,32 @@ def get_top_programs(self, n: int = 10, metric: Optional[str] = None) -> List[Pr
         if not self.programs:
             return []
 
+        # Get candidate programs
+        if island_idx is not None:
+            # Island-specific query
+            island_programs = [
+                self.programs[pid] for pid in self.islands[island_idx]
+                if pid in self.programs
+            ]
+            candidates = island_programs
+        else:
+            # Global query
+            candidates = list(self.programs.values())
+        
+        if not candidates:
+            return []
+
         if metric:
             # Sort by specific metric
             sorted_programs = sorted(
-                [p for p in self.programs.values() if metric in p.metrics],
+                [p for p in candidates if metric in p.metrics],
                 key=lambda p: p.metrics[metric],
                 reverse=True,
             )
         else:
             # Sort by average of all numeric metrics
             sorted_programs = sorted(
-                self.programs.values(),
+                candidates,
                 key=lambda p: safe_numeric_average(p.metrics),
                 reverse=True,
             )
@@ -379,6 +423,7 @@ def save(self, path: Optional[str] = None, iteration: int = 0) -> None:
             "islands": [list(island) for island in self.islands],
             "archive": list(self.archive),
             "best_program_id": self.best_program_id,
+            "island_best_programs": self.island_best_programs,
             "last_iteration": iteration or self.last_iteration,
             "current_island": self.current_island,
             "island_generations": self.island_generations,
@@ -412,6 +457,7 @@ def load(self, path: str) -> None:
             saved_islands = metadata.get("islands", [])
             self.archive = set(metadata.get("archive", []))
             self.best_program_id = metadata.get("best_program_id")
+            self.island_best_programs = metadata.get("island_best_programs", [None] * len(saved_islands))
             self.last_iteration = metadata.get("last_iteration", 0)
             self.current_island = metadata.get("current_island", 0)
             self.island_generations = metadata.get("island_generations", [0] * len(saved_islands))
@@ -440,6 +486,10 @@ def load(self, path: str) -> None:
         # Ensure island_generations list has correct length
         if len(self.island_generations) != len(self.islands):
             self.island_generations = [0] * len(self.islands)
+            
+        # Ensure island_best_programs list has correct length
+        if len(self.island_best_programs) != len(self.islands):
+            self.island_best_programs = [None] * len(self.islands)
 
         logger.info(f"Loaded database with {len(self.programs)} programs from {path}")
 
@@ -487,6 +537,9 @@ def _reconstruct_islands(self, saved_islands: List[List[str]]) -> None:
                 feature_keys_to_remove.append(key)
         for key in feature_keys_to_remove:
             del self.feature_map[key]
+        
+        # Clean up island best programs - remove stale references
+        self._cleanup_stale_island_bests()
 
         # Check best program
         if self.best_program_id and self.best_program_id not in self.programs:
@@ -613,7 +666,8 @@ def _calculate_feature_coords(self, program: Program) -> List[int]:
             else:
                 # Default to middle bin if feature not found
                 coords.append(self.feature_bins // 2)
-        logging.info(
+        # Only log coordinates at debug level for troubleshooting
+        logging.debug(
             "MAP-Elites coords: %s",
             str({self.config.feature_dimensions[i]: coords[i] for i in range(len(coords))}),
         )
@@ -748,6 +802,53 @@ def _update_best_program(self, program: Program) -> None:
             else:
                 logger.info(f"New best program {program.id} replaces {old_id}")
 
+    def _update_island_best_program(self, program: Program, island_idx: int) -> None:
+        """
+        Update the best program tracking for a specific island
+        
+        Args:
+            program: Program to consider as the new best for the island
+            island_idx: Island index
+        """
+        # Ensure island_idx is valid
+        if island_idx >= len(self.island_best_programs):
+            logger.warning(f"Invalid island index {island_idx}, skipping island best update")
+            return
+            
+        # If island doesn't have a best program yet, this becomes the best
+        current_island_best_id = self.island_best_programs[island_idx]
+        if current_island_best_id is None:
+            self.island_best_programs[island_idx] = program.id
+            logger.debug(f"Set initial best program for island {island_idx} to {program.id}")
+            return
+            
+        # Check if current best still exists
+        if current_island_best_id not in self.programs:
+            logger.warning(
+                f"Island {island_idx} best program {current_island_best_id} no longer exists, updating to {program.id}"
+            )
+            self.island_best_programs[island_idx] = program.id
+            return
+            
+        current_island_best = self.programs[current_island_best_id]
+        
+        # Update if the new program is better
+        if self._is_better(program, current_island_best):
+            old_id = current_island_best_id
+            self.island_best_programs[island_idx] = program.id
+            
+            # Log the change
+            if "combined_score" in program.metrics and "combined_score" in current_island_best.metrics:
+                old_score = current_island_best.metrics["combined_score"]
+                new_score = program.metrics["combined_score"]
+                score_diff = new_score - old_score
+                logger.debug(
+                    f"Island {island_idx}: New best program {program.id} replaces {old_id} "
+                    f"(combined_score: {old_score:.4f} → {new_score:.4f}, +{score_diff:.4f})"
+                )
+            else:
+                logger.debug(f"Island {island_idx}: New best program {program.id} replaces {old_id}")
+
     def _sample_parent(self) -> Program:
         """
         Sample a parent program from the current island for the next evolution step
@@ -869,91 +970,124 @@ def _sample_random_parent(self) -> Program:
 
     def _sample_inspirations(self, parent: Program, n: int = 5) -> List[Program]:
         """
-        Sample inspiration programs for the next evolution step
+        Sample inspiration programs for the next evolution step.
+        
+        For proper island-based evolution, inspirations are sampled ONLY from the
+        current island, maintaining genetic isolation between islands.
 
         Args:
             parent: Parent program
             n: Number of inspirations to sample
 
         Returns:
-            List of inspiration programs
+            List of inspiration programs from the current island
         """
         inspirations = []
+        
+        # Get the parent's island (should be current_island)
+        parent_island = parent.metadata.get("island", self.current_island)
+        
+        # Get all programs from the current island
+        island_program_ids = list(self.islands[parent_island])
+        island_programs = [self.programs[pid] for pid in island_program_ids if pid in self.programs]
+        
+        if not island_programs:
+            logger.warning(f"Island {parent_island} has no programs for inspiration sampling")
+            return []
 
-        # Always include the absolute best program if available and different from parent
+        # Include the island's best program if available and different from parent
+        island_best_id = self.island_best_programs[parent_island]
         if (
-            self.best_program_id is not None
-            and self.best_program_id != parent.id
-            and self.best_program_id in self.programs
+            island_best_id is not None
+            and island_best_id != parent.id
+            and island_best_id in self.programs
         ):
-            best_program = self.programs[self.best_program_id]
-            inspirations.append(best_program)
-            logger.debug(f"Including best program {self.best_program_id} in inspirations")
-        elif self.best_program_id is not None and self.best_program_id not in self.programs:
-            # Clean up stale best program reference
+            island_best = self.programs[island_best_id]
+            inspirations.append(island_best)
+            logger.debug(f"Including island {parent_island} best program {island_best_id} in inspirations")
+        elif island_best_id is not None and island_best_id not in self.programs:
+            # Clean up stale island best reference
             logger.warning(
-                f"Best program {self.best_program_id} no longer exists, clearing reference"
+                f"Island {parent_island} best program {island_best_id} no longer exists, clearing reference"
             )
-            self.best_program_id = None
+            self.island_best_programs[parent_island] = None
 
-        # Add top programs as inspirations
+        # Add top programs from the island as inspirations
         top_n = max(1, int(n * self.config.elite_selection_ratio))
-        top_programs = self.get_top_programs(n=top_n)
-        for program in top_programs:
+        top_island_programs = self.get_top_programs(n=top_n, island_idx=parent_island)
+        for program in top_island_programs:
             if program.id not in [p.id for p in inspirations] and program.id != parent.id:
                 inspirations.append(program)
 
-        # Add diverse programs using config.num_diverse_programs
-        if len(self.programs) > n and len(inspirations) < n:
-            # Calculate how many diverse programs to add (up to remaining slots)
+        # Add diverse programs from within the island
+        if len(island_programs) > n and len(inspirations) < n:
             remaining_slots = n - len(inspirations)
 
-            # Sample from different feature cells for diversity
+            # Try to sample from different feature cells within the island
             feature_coords = self._calculate_feature_coords(parent)
-
-            # Get programs from nearby feature cells
             nearby_programs = []
-            for _ in range(remaining_slots):
+            
+            # Create a mapping of feature cells to island programs for efficient lookup
+            island_feature_map = {}
+            for prog_id in island_program_ids:
+                if prog_id in self.programs:
+                    prog = self.programs[prog_id]
+                    prog_coords = self._calculate_feature_coords(prog)
+                    cell_key = self._feature_coords_to_key(prog_coords)
+                    island_feature_map[cell_key] = prog_id
+            
+            # Try to find programs from nearby feature cells within the island
+            for _ in range(remaining_slots * 3):  # Try more times to find nearby programs
                 # Perturb coordinates
                 perturbed_coords = [
-                    max(0, min(self.feature_bins - 1, c + random.randint(-1, 1)))
+                    max(0, min(self.feature_bins - 1, c + random.randint(-2, 2)))
                     for c in feature_coords
                 ]
-
-                # Try to get program from this cell
+                
                 cell_key = self._feature_coords_to_key(perturbed_coords)
-                if cell_key in self.feature_map:
-                    program_id = self.feature_map[cell_key]
-                    # Check if program still exists before adding
+                if cell_key in island_feature_map:
+                    program_id = island_feature_map[cell_key]
                     if (
                         program_id != parent.id
                         and program_id not in [p.id for p in inspirations]
+                        and program_id not in [p.id for p in nearby_programs]
                         and program_id in self.programs
                     ):
                         nearby_programs.append(self.programs[program_id])
-                    elif program_id not in self.programs:
-                        # Clean up stale reference in feature_map
-                        logger.debug(f"Removing stale program {program_id} from feature_map")
-                        del self.feature_map[cell_key]
+                        if len(nearby_programs) >= remaining_slots:
+                            break
 
-            # If we need more, add random programs
+            # If we still need more, add random programs from the island
             if len(inspirations) + len(nearby_programs) < n:
                 remaining = n - len(inspirations) - len(nearby_programs)
-                all_ids = set(self.programs.keys())
+                
+                # Get available programs from the island
                 excluded_ids = (
                     {parent.id}
                     .union(p.id for p in inspirations)
                     .union(p.id for p in nearby_programs)
                 )
-                available_ids = list(all_ids - excluded_ids)
-
-                if available_ids:
-                    random_ids = random.sample(available_ids, min(remaining, len(available_ids)))
+                available_island_ids = [
+                    pid for pid in island_program_ids 
+                    if pid not in excluded_ids and pid in self.programs
+                ]
+                
+                if available_island_ids:
+                    random_ids = random.sample(
+                        available_island_ids, 
+                        min(remaining, len(available_island_ids))
+                    )
                     random_programs = [self.programs[pid] for pid in random_ids]
                     nearby_programs.extend(random_programs)
 
             inspirations.extend(nearby_programs)
 
+        # Log island isolation info
+        logger.debug(
+            f"Sampled {len(inspirations)} inspirations from island {parent_island} "
+            f"(island has {len(island_programs)} programs total)"
+        )
+
         return inspirations[:n]
 
     def _enforce_population_limit(self, exclude_program_id: Optional[str] = None) -> None:
@@ -1030,6 +1164,9 @@ def _enforce_population_limit(self, exclude_program_id: Optional[str] = None) ->
             logger.debug(f"Removed program {program_id} due to population limit")
 
         logger.info(f"Population size after cleanup: {len(self.programs)}")
+        
+        # Clean up any stale island best program references after removal
+        self._cleanup_stale_island_bests()
 
     # Island management methods
     def set_current_island(self, island_idx: int) -> None:
@@ -1103,14 +1240,106 @@ def migrate_programs(self) -> None:
                     # Add to target island
                     self.islands[target_island].add(migrant_copy.id)
                     self.programs[migrant_copy.id] = migrant_copy
+                    
+                    # Update island-specific best program if migrant is better
+                    self._update_island_best_program(migrant_copy, target_island)
 
-                    logger.debug(
-                        f"Migrated program {migrant.id} from island {i} to island {target_island}"
-                    )
+                    # Log migration with MAP-Elites coordinates
+                    feature_coords = self._calculate_feature_coords(migrant_copy)
+                    coords_dict = {self.config.feature_dimensions[j]: feature_coords[j] for j in range(len(feature_coords))}
+                    logger.info("Program migrated to island %d at MAP-Elites coords: %s", 
+                              target_island, coords_dict)
 
         # Update last migration generation
         self.last_migration_generation = max(self.island_generations)
         logger.info(f"Migration completed at generation {self.last_migration_generation}")
+        
+        # Validate migration results
+        self._validate_migration_results()
+
+    def _validate_migration_results(self) -> None:
+        """
+        Validate migration didn't create inconsistencies
+        
+        Checks that:
+        1. Program island metadata matches actual island assignment
+        2. No programs are assigned to multiple islands
+        3. All island best programs exist and are in correct islands
+        """
+        seen_program_ids = set()
+        
+        for i, island in enumerate(self.islands):
+            for program_id in island:
+                # Check for duplicate assignments
+                if program_id in seen_program_ids:
+                    logger.error(f"Program {program_id} assigned to multiple islands")
+                    continue
+                seen_program_ids.add(program_id)
+                
+                # Check program exists
+                if program_id not in self.programs:
+                    logger.warning(f"Island {i} contains nonexistent program {program_id}")
+                    continue
+                    
+                # Check metadata consistency
+                program = self.programs[program_id]
+                stored_island = program.metadata.get("island")
+                if stored_island != i:
+                    logger.warning(
+                        f"Island mismatch for program {program_id}: "
+                        f"in island {i} but metadata says {stored_island}"
+                    )
+        
+        # Validate island best programs
+        for i, best_id in enumerate(self.island_best_programs):
+            if best_id is not None:
+                if best_id not in self.programs:
+                    logger.warning(f"Island {i} best program {best_id} does not exist")
+                elif best_id not in self.islands[i]:
+                    logger.warning(f"Island {i} best program {best_id} not in island")
+
+    def _cleanup_stale_island_bests(self) -> None:
+        """
+        Remove stale island best program references
+        
+        Cleans up references to programs that no longer exist in the database
+        or are not actually in their assigned islands.
+        """
+        cleaned_count = 0
+        
+        for i, best_id in enumerate(self.island_best_programs):
+            if best_id is not None:
+                should_clear = False
+                
+                # Check if program still exists
+                if best_id not in self.programs:
+                    logger.debug(f"Clearing stale island {i} best program {best_id} (program deleted)")
+                    should_clear = True
+                # Check if program is still in the island
+                elif best_id not in self.islands[i]:
+                    logger.debug(f"Clearing stale island {i} best program {best_id} (not in island)")
+                    should_clear = True
+                
+                if should_clear:
+                    self.island_best_programs[i] = None
+                    cleaned_count += 1
+        
+        if cleaned_count > 0:
+            logger.info(f"Cleaned up {cleaned_count} stale island best program references")
+            
+            # Recalculate best programs for islands that were cleared
+            for i, best_id in enumerate(self.island_best_programs):
+                if best_id is None and len(self.islands[i]) > 0:
+                    # Find new best program for this island
+                    island_programs = [self.programs[pid] for pid in self.islands[i] if pid in self.programs]
+                    if island_programs:
+                        # Sort by fitness and update
+                        best_program = max(
+                            island_programs,
+                            key=lambda p: p.metrics.get("combined_score", safe_numeric_average(p.metrics))
+                        )
+                        self.island_best_programs[i] = best_program.id
+                        logger.debug(f"Recalculated island {i} best program: {best_program.id}")
 
     def get_island_stats(self) -> List[dict]:
         """Get statistics for each island"""
@@ -1214,10 +1443,13 @@ def log_island_status(self) -> None:
         logger.info("Island Status:")
         for stat in stats:
             current_marker = " *" if stat["is_current"] else "  "
+            island_idx = stat['island']
+            island_best_id = self.island_best_programs[island_idx] if island_idx < len(self.island_best_programs) else None
+            best_indicator = f" (best: {island_best_id})" if island_best_id else ""
             logger.info(
                 f"{current_marker} Island {stat['island']}: {stat['population_size']} programs, "
                 f"best={stat['best_score']:.4f}, avg={stat['average_score']:.4f}, "
-                f"diversity={stat['diversity']:.2f}, gen={stat['generation']}"
+                f"diversity={stat['diversity']:.2f}, gen={stat['generation']}{best_indicator}"
             )
 
     # Artifact storage and retrieval methods
diff --git a/openevolve/evaluator.py b/openevolve/evaluator.py
index dfe966f50..2ab93f361 100644
--- a/openevolve/evaluator.py
+++ b/openevolve/evaluator.py
@@ -89,10 +89,42 @@ def _load_evaluation_function(self) -> None:
 
             self.evaluate_function = module.evaluate
             logger.info(f"Successfully loaded evaluation function from {self.evaluation_file}")
+            
+            # Validate cascade configuration
+            self._validate_cascade_configuration(module)
         except Exception as e:
             logger.error(f"Error loading evaluation function: {str(e)}")
             raise
 
+    def _validate_cascade_configuration(self, module) -> None:
+        """
+        Validate cascade evaluation configuration and warn about potential issues
+        
+        Args:
+            module: The loaded evaluation module
+        """
+        if self.config.cascade_evaluation:
+            # Check if cascade functions exist
+            has_stage1 = hasattr(module, "evaluate_stage1")
+            has_stage2 = hasattr(module, "evaluate_stage2") 
+            has_stage3 = hasattr(module, "evaluate_stage3")
+            
+            if not has_stage1:
+                logger.warning(
+                    f"Configuration has 'cascade_evaluation: true' but evaluator "
+                    f"'{self.evaluation_file}' does not define 'evaluate_stage1' function. "
+                    f"This will fall back to direct evaluation, making the cascade setting useless. "
+                    f"Consider setting 'cascade_evaluation: false' or implementing cascade functions."
+                )
+            elif not (has_stage2 or has_stage3):
+                logger.warning(
+                    f"Evaluator '{self.evaluation_file}' defines 'evaluate_stage1' but no additional "
+                    f"cascade stages (evaluate_stage2, evaluate_stage3). Consider implementing "
+                    f"multi-stage evaluation for better cascade benefits."
+                )
+            else:
+                logger.debug(f"Cascade evaluation properly configured with available stage functions")
+
     async def evaluate_program(
         self,
         program_code: str,
@@ -273,7 +305,7 @@ def get_pending_artifacts(self, program_id: str) -> Optional[Dict[str, Union[str
         """
         return self._pending_artifacts.pop(program_id, None)
 
-    async def _direct_evaluate(self, program_path: str) -> Dict[str, float]:
+    async def _direct_evaluate(self, program_path: str) -> Union[Dict[str, float], EvaluationResult]:
         """
         Directly evaluate a program using the evaluation function with timeout
 
@@ -281,7 +313,7 @@ async def _direct_evaluate(self, program_path: str) -> Dict[str, float]:
             program_path: Path to the program file
 
         Returns:
-            Dictionary of metric name to score
+            Dictionary of metrics or EvaluationResult with metrics and artifacts
 
         Raises:
             asyncio.TimeoutError: If evaluation exceeds timeout
@@ -296,11 +328,8 @@ async def run_evaluation():
         # Run the evaluation with timeout - let exceptions bubble up for retry handling
         result = await asyncio.wait_for(run_evaluation(), timeout=self.config.timeout)
 
-        # Validate result
-        if not isinstance(result, dict):
-            logger.warning(f"Evaluation returned non-dictionary result: {result}")
-            return {"error": 0.0}
-
+        # Return result as-is to be processed by _process_evaluation_result
+        # This supports both dict and EvaluationResult returns, just like _cascade_evaluate
         return result
 
     async def _cascade_evaluate(
@@ -354,13 +383,14 @@ async def run_stage1():
                 )
             except Exception as e:
                 logger.error(f"Error in stage 1 evaluation: {str(e)}")
-                # Capture stage 1 failure as artifacts
+                # Capture stage 1 failure with enhanced context
+                error_context = self._create_cascade_error_context("stage1", e)
                 return EvaluationResult(
                     metrics={"stage1_passed": 0.0, "error": 0.0},
                     artifacts={
                         "stderr": str(e),
                         "traceback": traceback.format_exc(),
-                        "failure_stage": "stage1",
+                        **error_context,
                     },
                 )
 
@@ -481,13 +511,14 @@ async def run_stage3():
 
         except Exception as e:
             logger.error(f"Error in cascade evaluation: {str(e)}")
-            # Return proper cascade failure result instead of re-raising
+            # Return proper cascade failure result with enhanced context
+            error_context = self._create_cascade_error_context("cascade_setup", e)
             return EvaluationResult(
                 metrics={"stage1_passed": 0.0, "error": 0.0},
                 artifacts={
                     "stderr": str(e),
                     "traceback": traceback.format_exc(),
-                    "failure_stage": "cascade_setup",
+                    **error_context,
                 },
             )
 
@@ -582,6 +613,29 @@ async def _llm_evaluate(self, program_code: str, program_id: str = "") -> Dict[s
             traceback.print_exc()
             return {}
 
+    def _create_cascade_error_context(self, stage: str, error: Exception) -> dict:
+        """
+        Create rich error context for cascade failures
+        
+        Args:
+            stage: The stage where the error occurred
+            error: The exception that was raised
+            
+        Returns:
+            Dictionary with enhanced error context
+        """
+        import time
+        return {
+            "failure_stage": stage,
+            "error_type": type(error).__name__,
+            "error_message": str(error),
+            "timestamp": time.time(),
+            "cascade_config": self.config.cascade_evaluation,
+            "cascade_thresholds": getattr(self.config, 'cascade_thresholds', []),
+            "timeout_config": self.config.timeout,
+            "evaluation_file": self.evaluation_file,
+        }
+
     def _passes_threshold(self, metrics: Dict[str, float], threshold: float) -> bool:
         """
         Check if metrics pass a threshold
diff --git a/openevolve/iteration.py b/openevolve/iteration.py
index 98db88f09..11d3453a8 100644
--- a/openevolve/iteration.py
+++ b/openevolve/iteration.py
@@ -53,16 +53,18 @@ async def run_iteration_with_shared_db(
         # Get artifacts for the parent program if available
         parent_artifacts = database.get_artifacts(parent.id)
 
-        # Get actual top programs for prompt context (separate from inspirations)
-        actual_top_programs = database.get_top_programs(5)
+        # Get island-specific top programs for prompt context (maintain island isolation)
+        parent_island = parent.metadata.get("island", database.current_island)
+        island_top_programs = database.get_top_programs(5, island_idx=parent_island)
+        island_previous_programs = database.get_top_programs(3, island_idx=parent_island)
 
         # Build prompt
         prompt = prompt_sampler.build_prompt(
             current_program=parent.code,
             parent_program=parent.code,
             program_metrics=parent.metrics,
-            previous_programs=[p.to_dict() for p in database.get_top_programs(3)],
-            top_programs=[p.to_dict() for p in actual_top_programs],
+            previous_programs=[p.to_dict() for p in island_previous_programs],
+            top_programs=[p.to_dict() for p in island_top_programs],
             inspirations=[p.to_dict() for p in inspirations],
             language=config.language,
             evolution_round=iteration,
diff --git a/pyproject.toml b/pyproject.toml
index abe90c44f..cc41df178 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "openevolve"
-version = "0.0.14"
+version = "0.0.15"
 description = "Open-source implementation of AlphaEvolve"
 readme = "README.md"
 requires-python = ">=3.9"
diff --git a/setup.py b/setup.py
index e876b1c90..4db6920e8 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name="openevolve",
-    version="0.0.14",
+    version="0.0.15",
     packages=find_packages(),
     include_package_data=True,
 )
diff --git a/tests/test_cascade_validation.py b/tests/test_cascade_validation.py
new file mode 100644
index 000000000..0464b4278
--- /dev/null
+++ b/tests/test_cascade_validation.py
@@ -0,0 +1,301 @@
+"""
+Tests for cascade evaluation validation functionality in openevolve.evaluator
+"""
+
+import unittest
+import tempfile
+import os
+from unittest.mock import patch, MagicMock
+from openevolve.config import Config
+from openevolve.evaluator import Evaluator
+from openevolve.database import EvaluationResult
+
+
+class TestCascadeValidation(unittest.TestCase):
+    """Tests for cascade evaluation configuration validation"""
+
+    def setUp(self):
+        """Set up test evaluator with cascade validation"""
+        self.config = Config()
+        
+        # Create temporary evaluator files for testing
+        self.temp_dir = tempfile.mkdtemp()
+        
+    def tearDown(self):
+        """Clean up temporary files"""
+        # Clean up temp files
+        for file in os.listdir(self.temp_dir):
+            os.remove(os.path.join(self.temp_dir, file))
+        os.rmdir(self.temp_dir)
+
+    def _create_evaluator_file(self, filename: str, content: str) -> str:
+        """Helper to create temporary evaluator file"""
+        file_path = os.path.join(self.temp_dir, filename)
+        with open(file_path, 'w') as f:
+            f.write(content)
+        return file_path
+
+    def test_cascade_validation_with_valid_evaluator(self):
+        """Test cascade validation with evaluator that has cascade functions"""
+        # Create evaluator with cascade functions
+        evaluator_content = '''
+def evaluate_stage1(program_path):
+    return {"stage1_score": 0.5}
+
+def evaluate_stage2(program_path):
+    return {"stage2_score": 0.7}
+
+def evaluate_stage3(program_path):
+    return {"stage3_score": 0.9}
+
+def evaluate(program_path):
+    return {"final_score": 1.0}
+'''
+        evaluator_path = self._create_evaluator_file("valid_cascade.py", evaluator_content)
+        
+        # Configure for cascade evaluation
+        self.config.evaluator.cascade_evaluation = True
+        self.config.evaluator.evaluation_file = evaluator_path
+        
+        # Should not raise warnings for valid cascade evaluator
+        with patch('openevolve.evaluator.logger') as mock_logger:
+            evaluator = Evaluator(self.config.evaluator, None)
+            
+            # Should not have called warning
+            mock_logger.warning.assert_not_called()
+
+    def test_cascade_validation_warning_for_missing_functions(self):
+        """Test cascade validation warns when cascade functions are missing"""
+        # Create evaluator without cascade functions
+        evaluator_content = '''
+def evaluate(program_path):
+    return {"score": 0.5}
+'''
+        evaluator_path = self._create_evaluator_file("no_cascade.py", evaluator_content)
+        
+        # Configure for cascade evaluation
+        self.config.evaluator.cascade_evaluation = True
+        self.config.evaluator.evaluation_file = evaluator_path
+        
+        # Should warn about missing cascade functions
+        with patch('openevolve.evaluator.logger') as mock_logger:
+            evaluator = Evaluator(self.config.evaluator, None)
+            
+            # Should have warned about missing stage functions
+            mock_logger.warning.assert_called()
+            warning_call = mock_logger.warning.call_args[0][0]
+            self.assertIn("cascade_evaluation: true", warning_call)
+            self.assertIn("evaluate_stage1", warning_call)
+
+    def test_cascade_validation_partial_functions(self):
+        """Test cascade validation with only some cascade functions"""
+        # Create evaluator with only stage1
+        evaluator_content = '''
+def evaluate_stage1(program_path):
+    return {"stage1_score": 0.5}
+
+def evaluate(program_path):
+    return {"score": 0.5}
+'''
+        evaluator_path = self._create_evaluator_file("partial_cascade.py", evaluator_content)
+        
+        # Configure for cascade evaluation
+        self.config.evaluator.cascade_evaluation = True
+        self.config.evaluator.evaluation_file = evaluator_path
+        
+        # Should not warn since stage1 exists (minimum requirement)
+        with patch('openevolve.evaluator.logger') as mock_logger:
+            evaluator = Evaluator(self.config.evaluator, None)
+            
+            # Should not warn since stage1 exists
+            mock_logger.warning.assert_not_called()
+
+    def test_no_cascade_validation_when_disabled(self):
+        """Test no validation when cascade evaluation is disabled"""
+        # Create evaluator without cascade functions
+        evaluator_content = '''
+def evaluate(program_path):
+    return {"score": 0.5}
+'''
+        evaluator_path = self._create_evaluator_file("no_cascade.py", evaluator_content)
+        
+        # Configure WITHOUT cascade evaluation
+        self.config.evaluator.cascade_evaluation = False
+        self.config.evaluator.evaluation_file = evaluator_path
+        
+        # Should not perform validation or warn
+        with patch('openevolve.evaluator.logger') as mock_logger:
+            evaluator = Evaluator(self.config.evaluator, None)
+            
+            # Should not warn when cascade evaluation is disabled
+            mock_logger.warning.assert_not_called()
+
+    def test_direct_evaluate_supports_evaluation_result(self):
+        """Test that _direct_evaluate supports EvaluationResult returns"""
+        # Create evaluator that returns EvaluationResult
+        evaluator_content = '''
+from openevolve.database import EvaluationResult
+
+def evaluate(program_path):
+    return EvaluationResult(
+        metrics={"score": 0.8, "accuracy": 0.9},
+        artifacts={"debug_info": "test data"}
+    )
+'''
+        evaluator_path = self._create_evaluator_file("result_evaluator.py", evaluator_content)
+        
+        self.config.evaluator.cascade_evaluation = False
+        self.config.evaluator.evaluation_file = evaluator_path
+        self.config.evaluator.timeout = 10
+        
+        evaluator = Evaluator(self.config.evaluator, None)
+        
+        # Create a dummy program file
+        program_path = self._create_evaluator_file("test_program.py", "def test(): pass")
+        
+        # Mock the evaluation process
+        with patch('openevolve.evaluator.run_external_evaluator') as mock_run:
+            mock_run.return_value = EvaluationResult(
+                metrics={"score": 0.8, "accuracy": 0.9},
+                artifacts={"debug_info": "test data"}
+            )
+            
+            # Should handle EvaluationResult without issues
+            result = evaluator._direct_evaluate(program_path)
+            
+            # Should return the EvaluationResult as-is
+            self.assertIsInstance(result, EvaluationResult)
+            self.assertEqual(result.metrics["score"], 0.8)
+            self.assertEqual(result.artifacts["debug_info"], "test data")
+
+    def test_direct_evaluate_supports_dict_result(self):
+        """Test that _direct_evaluate still supports dict returns"""
+        # Create evaluator that returns dict
+        evaluator_content = '''
+def evaluate(program_path):
+    return {"score": 0.7, "performance": 0.85}
+'''
+        evaluator_path = self._create_evaluator_file("dict_evaluator.py", evaluator_content)
+        
+        self.config.evaluator.cascade_evaluation = False
+        self.config.evaluator.evaluation_file = evaluator_path
+        self.config.evaluator.timeout = 10
+        
+        evaluator = Evaluator(self.config.evaluator, None)
+        
+        # Create a dummy program file
+        program_path = self._create_evaluator_file("test_program.py", "def test(): pass")
+        
+        # Mock the evaluation process
+        with patch('openevolve.evaluator.run_external_evaluator') as mock_run:
+            mock_run.return_value = {"score": 0.7, "performance": 0.85}
+            
+            # Should handle dict result without issues
+            result = evaluator._direct_evaluate(program_path)
+            
+            # Should return the dict as-is
+            self.assertIsInstance(result, dict)
+            self.assertEqual(result["score"], 0.7)
+            self.assertEqual(result["performance"], 0.85)
+
+    def test_cascade_validation_with_class_based_evaluator(self):
+        """Test cascade validation with class-based evaluator"""
+        # Create class-based evaluator
+        evaluator_content = '''
+class Evaluator:
+    def evaluate_stage1(self, program_path):
+        return {"stage1_score": 0.5}
+    
+    def evaluate(self, program_path):
+        return {"score": 0.5}
+
+# Module-level functions (what validation looks for)
+def evaluate_stage1(program_path):
+    evaluator = Evaluator()
+    return evaluator.evaluate_stage1(program_path)
+
+def evaluate(program_path):
+    evaluator = Evaluator()
+    return evaluator.evaluate(program_path)
+'''
+        evaluator_path = self._create_evaluator_file("class_cascade.py", evaluator_content)
+        
+        # Configure for cascade evaluation
+        self.config.evaluator.cascade_evaluation = True
+        self.config.evaluator.evaluation_file = evaluator_path
+        
+        # Should not warn since module-level functions exist
+        with patch('openevolve.evaluator.logger') as mock_logger:
+            evaluator = Evaluator(self.config.evaluator, None)
+            
+            mock_logger.warning.assert_not_called()
+
+    def test_cascade_validation_with_syntax_error(self):
+        """Test cascade validation handles syntax errors gracefully"""
+        # Create evaluator with syntax error
+        evaluator_content = '''
+def evaluate_stage1(program_path)  # Missing colon
+    return {"stage1_score": 0.5}
+'''
+        evaluator_path = self._create_evaluator_file("syntax_error.py", evaluator_content)
+        
+        # Configure for cascade evaluation
+        self.config.evaluator.cascade_evaluation = True
+        self.config.evaluator.evaluation_file = evaluator_path
+        
+        # Should handle syntax error and still warn about cascade
+        with patch('openevolve.evaluator.logger') as mock_logger:
+            evaluator = Evaluator(self.config.evaluator, None)
+            
+            # Should have warned about missing functions (due to import failure)
+            mock_logger.warning.assert_called()
+
+    def test_cascade_validation_nonexistent_file(self):
+        """Test cascade validation with nonexistent evaluator file"""
+        # Configure with nonexistent file
+        self.config.evaluator.cascade_evaluation = True
+        self.config.evaluator.evaluation_file = "/nonexistent/path.py"
+        
+        # Should handle missing file gracefully
+        with patch('openevolve.evaluator.logger') as mock_logger:
+            evaluator = Evaluator(self.config.evaluator, None)
+            
+            # Should have warned about missing functions (due to import failure)
+            mock_logger.warning.assert_called()
+
+    def test_process_evaluation_result_with_artifacts(self):
+        """Test that _process_evaluation_result handles artifacts correctly"""
+        evaluator_path = self._create_evaluator_file("dummy.py", "def evaluate(p): pass")
+        
+        self.config.evaluator.evaluation_file = evaluator_path
+        evaluator = Evaluator(self.config.evaluator, None)
+        
+        # Test with EvaluationResult containing artifacts
+        eval_result = EvaluationResult(
+            metrics={"score": 0.9},
+            artifacts={"log": "test log", "data": [1, 2, 3]}
+        )
+        
+        metrics, artifacts = evaluator._process_evaluation_result(eval_result)
+        
+        self.assertEqual(metrics, {"score": 0.9})
+        self.assertEqual(artifacts, {"log": "test log", "data": [1, 2, 3]})
+
+    def test_process_evaluation_result_with_dict(self):
+        """Test that _process_evaluation_result handles dict results correctly"""
+        evaluator_path = self._create_evaluator_file("dummy.py", "def evaluate(p): pass")
+        
+        self.config.evaluator.evaluation_file = evaluator_path
+        evaluator = Evaluator(self.config.evaluator, None)
+        
+        # Test with dict result
+        dict_result = {"score": 0.7, "accuracy": 0.8}
+        
+        metrics, artifacts = evaluator._process_evaluation_result(dict_result)
+        
+        self.assertEqual(metrics, {"score": 0.7, "accuracy": 0.8})
+        self.assertEqual(artifacts, {})
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file
diff --git a/tests/test_database.py b/tests/test_database.py
index bfa35040c..883538eb3 100644
--- a/tests/test_database.py
+++ b/tests/test_database.py
@@ -80,6 +80,194 @@ def test_sample(self):
         self.assertIsNotNone(parent)
         self.assertIn(parent.id, ["test1", "test2"])
 
+    def test_island_operations_basic(self):
+        """Test basic island operations"""
+        # Test with default islands (should be 5 by default)
+        self.assertEqual(len(self.db.islands), 5)
+        
+        program = Program(
+            id="island_test",
+            code="def island_test(): pass",
+            language="python",
+            metrics={"score": 0.6},
+        )
+        
+        self.db.add(program)
+        
+        # Should be in island 0
+        self.assertIn("island_test", self.db.islands[0])
+        self.assertEqual(program.metadata.get("island"), 0)
+
+    def test_multi_island_setup(self):
+        """Test database with multiple islands"""
+        # Create new database with multiple islands
+        config = Config()
+        config.database.in_memory = True
+        config.database.num_islands = 3
+        multi_db = ProgramDatabase(config.database)
+        
+        self.assertEqual(len(multi_db.islands), 3)
+        self.assertEqual(len(multi_db.island_best_programs), 3)
+        
+        # Add programs to specific islands
+        for i in range(3):
+            program = Program(
+                id=f"test_island_{i}",
+                code=f"def test_{i}(): pass",
+                language="python",
+                metrics={"score": 0.5 + i * 0.1},
+            )
+            multi_db.add(program, target_island=i)
+            
+            # Verify assignment
+            self.assertIn(f"test_island_{i}", multi_db.islands[i])
+            self.assertEqual(program.metadata.get("island"), i)
+
+    def test_feature_coordinates_calculation(self):
+        """Test MAP-Elites feature coordinate calculation"""
+        program = Program(
+            id="feature_test",
+            code="def test(): pass",  # Short code
+            language="python",
+            metrics={"score": 0.8},
+        )
+        
+        coords = self.db._calculate_feature_coords(program)
+        
+        # Should return list of coordinates
+        self.assertIsInstance(coords, list)
+        self.assertEqual(len(coords), len(self.db.config.feature_dimensions))
+        
+        # All coordinates should be within valid range
+        for coord in coords:
+            self.assertGreaterEqual(coord, 0)
+            self.assertLess(coord, self.db.feature_bins)
+
+    def test_feature_map_operations(self):
+        """Test feature map operations for MAP-Elites"""
+        program1 = Program(
+            id="map_test1",
+            code="def short(): pass",  # Similar complexity
+            language="python",
+            metrics={"score": 0.5},
+        )
+        
+        program2 = Program(
+            id="map_test2", 
+            code="def also_short(): pass",  # Similar complexity
+            language="python",
+            metrics={"score": 0.8},  # Better score
+        )
+        
+        self.db.add(program1)
+        self.db.add(program2)
+        
+        # Both programs might land in same cell due to similar features
+        # The better program should be kept in the feature map
+        feature_coords1 = self.db._calculate_feature_coords(program1)
+        feature_coords2 = self.db._calculate_feature_coords(program2)
+        
+        key1 = self.db._feature_coords_to_key(feature_coords1)
+        key2 = self.db._feature_coords_to_key(feature_coords2)
+        
+        if key1 == key2:  # Same cell
+            # Better program should be in feature map
+            self.assertEqual(self.db.feature_map[key1], "map_test2")
+        else:  # Different cells
+            # Both should be in feature map
+            self.assertEqual(self.db.feature_map[key1], "map_test1")
+            self.assertEqual(self.db.feature_map[key2], "map_test2")
+
+    def test_get_top_programs_with_metrics(self):
+        """Test get_top_programs with specific metrics"""
+        program1 = Program(
+            id="metric_test1",
+            code="def test1(): pass",
+            language="python",
+            metrics={"accuracy": 0.9, "speed": 0.3},
+        )
+        
+        program2 = Program(
+            id="metric_test2",
+            code="def test2(): pass", 
+            language="python",
+            metrics={"accuracy": 0.7, "speed": 0.8},
+        )
+        
+        self.db.add(program1)
+        self.db.add(program2)
+        
+        # Test sorting by specific metric
+        top_by_accuracy = self.db.get_top_programs(n=2, metric="accuracy")
+        self.assertEqual(top_by_accuracy[0].id, "metric_test1")  # Higher accuracy
+        
+        top_by_speed = self.db.get_top_programs(n=2, metric="speed")
+        self.assertEqual(top_by_speed[0].id, "metric_test2")  # Higher speed
+
+    def test_archive_operations(self):
+        """Test archive functionality"""
+        # Add programs that should go into archive
+        for i in range(5):
+            program = Program(
+                id=f"archive_test_{i}",
+                code=f"def test_{i}(): return {i}",
+                language="python",
+                metrics={"score": i * 0.1},
+            )
+            self.db.add(program)
+        
+        # Archive should contain program IDs
+        self.assertGreater(len(self.db.archive), 0)
+        self.assertLessEqual(len(self.db.archive), self.db.config.archive_size)
+        
+        # Archive should contain program IDs that exist
+        for program_id in self.db.archive:
+            self.assertIn(program_id, self.db.programs)
+
+    def test_best_program_tracking(self):
+        """Test absolute best program tracking"""
+        program1 = Program(
+            id="best_test1",
+            code="def test1(): pass",
+            language="python",
+            metrics={"combined_score": 0.6},
+        )
+        
+        program2 = Program(
+            id="best_test2",
+            code="def test2(): pass",
+            language="python", 
+            metrics={"combined_score": 0.9},
+        )
+        
+        self.db.add(program1)
+        self.assertEqual(self.db.best_program_id, "best_test1")
+        
+        self.db.add(program2)
+        self.assertEqual(self.db.best_program_id, "best_test2")  # Should update to better program
+
+    def test_population_limit_enforcement(self):
+        """Test population size limit enforcement"""
+        # Set small population limit
+        original_limit = self.db.config.population_size
+        self.db.config.population_size = 3
+        
+        # Add more programs than limit
+        for i in range(5):
+            program = Program(
+                id=f"limit_test_{i}",
+                code=f"def test_{i}(): pass",
+                language="python",
+                metrics={"score": i * 0.1},
+            )
+            self.db.add(program)
+        
+        # Population should be at or below limit
+        self.assertLessEqual(len(self.db.programs), 3)
+        
+        # Restore original limit
+        self.db.config.population_size = original_limit
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_island_migration.py b/tests/test_island_migration.py
new file mode 100644
index 000000000..efde4e37b
--- /dev/null
+++ b/tests/test_island_migration.py
@@ -0,0 +1,252 @@
+"""
+Tests for island migration functionality in openevolve.database
+"""
+
+import unittest
+from openevolve.config import Config
+from openevolve.database import Program, ProgramDatabase
+
+
+class TestIslandMigration(unittest.TestCase):
+    """Tests for island migration in program database"""
+
+    def setUp(self):
+        """Set up test database with multiple islands"""
+        config = Config()
+        config.database.in_memory = True
+        config.database.num_islands = 3
+        config.database.migration_rate = 0.5  # 50% of programs migrate
+        config.database.migration_generations = 5  # Migrate every 5 generations
+        self.db = ProgramDatabase(config.database)
+
+    def _create_test_program(self, program_id: str, score: float, island: int) -> Program:
+        """Helper to create a test program"""
+        program = Program(
+            id=program_id,
+            code=f"def func_{program_id}(): return {score}",
+            language="python",
+            metrics={"score": score, "combined_score": score},
+            metadata={"island": island}
+        )
+        return program
+
+    def test_initial_island_setup(self):
+        """Test that islands are properly initialized"""
+        self.assertEqual(len(self.db.islands), 3)
+        self.assertEqual(len(self.db.island_best_programs), 3)
+        self.assertEqual(len(self.db.island_generations), 3)
+        
+        # All islands should be empty initially
+        for island in self.db.islands:
+            self.assertEqual(len(island), 0)
+        
+        # All island best programs should be None initially
+        for best_id in self.db.island_best_programs:
+            self.assertIsNone(best_id)
+
+    def test_program_island_assignment(self):
+        """Test that programs are assigned to correct islands"""
+        # Add programs to specific islands
+        program1 = self._create_test_program("test1", 0.5, 0)
+        program2 = self._create_test_program("test2", 0.7, 1)
+        program3 = self._create_test_program("test3", 0.3, 2)
+        
+        self.db.add(program1, target_island=0)
+        self.db.add(program2, target_island=1) 
+        self.db.add(program3, target_island=2)
+        
+        # Verify island assignments
+        self.assertIn("test1", self.db.islands[0])
+        self.assertIn("test2", self.db.islands[1])
+        self.assertIn("test3", self.db.islands[2])
+        
+        # Verify metadata
+        self.assertEqual(self.db.programs["test1"].metadata["island"], 0)
+        self.assertEqual(self.db.programs["test2"].metadata["island"], 1)
+        self.assertEqual(self.db.programs["test3"].metadata["island"], 2)
+
+    def test_should_migrate_logic(self):
+        """Test the migration timing logic"""
+        # Initially should not migrate (no generations passed)
+        self.assertFalse(self.db.should_migrate())
+        
+        # Advance island generations
+        self.db.island_generations = [5, 6, 7]  # All above threshold
+        self.assertTrue(self.db.should_migrate())
+        
+        # Test with mixed generations
+        self.db.island_generations = [3, 6, 2]  # Only one above threshold
+        self.assertFalse(self.db.should_migrate())
+
+    def test_migration_ring_topology(self):
+        """Test that migration follows ring topology"""
+        # Add programs to islands 0 and 1
+        program1 = self._create_test_program("test1", 0.8, 0)
+        program2 = self._create_test_program("test2", 0.6, 1)
+        
+        self.db.add(program1, target_island=0)
+        self.db.add(program2, target_island=1)
+        
+        # Set up for migration
+        self.db.island_generations = [6, 6, 6]  # Trigger migration
+        
+        initial_program_count = len(self.db.programs)
+        
+        # Perform migration
+        self.db.migrate_programs()
+        
+        # Should have created migrant copies
+        self.assertGreater(len(self.db.programs), initial_program_count)
+        
+        # Check that migrants were created with proper naming
+        migrant_ids = [pid for pid in self.db.programs.keys() if "_migrant_" in pid]
+        self.assertGreater(len(migrant_ids), 0)
+        
+        # Verify ring topology: island 0 -> islands 1,2; island 1 -> islands 2,0
+        island_0_migrants = [pid for pid in migrant_ids if "test1_migrant_" in pid]
+        island_1_migrants = [pid for pid in migrant_ids if "test2_migrant_" in pid]
+        
+        # test1 should migrate to islands 1 and 2
+        self.assertTrue(any("_1" in pid for pid in island_0_migrants))
+        self.assertTrue(any("_2" in pid for pid in island_0_migrants))
+        
+        # test2 should migrate to islands 2 and 0
+        self.assertTrue(any("_2" in pid for pid in island_1_migrants))
+        self.assertTrue(any("_0" in pid for pid in island_1_migrants))
+
+    def test_migration_rate_respected(self):
+        """Test that migration rate is properly applied"""
+        # Add multiple programs to island 0
+        programs = []
+        for i in range(10):
+            program = self._create_test_program(f"test{i}", 0.5 + i * 0.05, 0)
+            programs.append(program)
+            self.db.add(program, target_island=0)
+        
+        # Set up for migration
+        self.db.island_generations = [6, 6, 6]
+        
+        initial_count = len(self.db.programs)
+        
+        # Perform migration
+        self.db.migrate_programs()
+        
+        # Calculate expected migrants
+        # With 50% migration rate and 10 programs, expect 5 migrants
+        # Each migrant goes to 2 target islands, so 10 total new programs
+        expected_new_programs = 5 * 2  # 5 migrants * 2 target islands each
+        actual_new_programs = len(self.db.programs) - initial_count
+        
+        self.assertEqual(actual_new_programs, expected_new_programs)
+
+    def test_migration_preserves_best_programs(self):
+        """Test that migration selects the best programs for migration"""
+        # Add programs with different scores to island 0
+        program1 = self._create_test_program("low_score", 0.2, 0)
+        program2 = self._create_test_program("high_score", 0.9, 0)
+        program3 = self._create_test_program("med_score", 0.5, 0)
+        
+        self.db.add(program1, target_island=0)
+        self.db.add(program2, target_island=0)
+        self.db.add(program3, target_island=0)
+        
+        # Set up for migration
+        self.db.island_generations = [6, 6, 6]
+        
+        # Perform migration
+        self.db.migrate_programs()
+        
+        # Check that the high-score program was selected for migration
+        migrant_ids = [pid for pid in self.db.programs.keys() if "_migrant_" in pid]
+        high_score_migrants = [pid for pid in migrant_ids if "high_score_migrant_" in pid]
+        
+        self.assertGreater(len(high_score_migrants), 0)
+
+    def test_migration_updates_generations(self):
+        """Test that migration updates the last migration generation"""
+        # Add a program and set up for migration
+        program = self._create_test_program("test1", 0.5, 0)
+        self.db.add(program, target_island=0)
+        
+        self.db.island_generations = [6, 7, 8]
+        initial_migration_gen = self.db.last_migration_generation
+        
+        # Perform migration
+        self.db.migrate_programs()
+        
+        # Should update to max of island generations
+        self.assertEqual(self.db.last_migration_generation, 8)
+        self.assertGreater(self.db.last_migration_generation, initial_migration_gen)
+
+    def test_migration_with_empty_islands(self):
+        """Test that migration handles empty islands gracefully"""
+        # Add program only to island 0, leave others empty
+        program = self._create_test_program("test1", 0.5, 0)
+        self.db.add(program, target_island=0)
+        
+        # Set up for migration
+        self.db.island_generations = [6, 6, 6]
+        
+        # Should not crash with empty islands
+        try:
+            self.db.migrate_programs()
+        except Exception as e:
+            self.fail(f"Migration with empty islands should not crash: {e}")
+
+    def test_migration_creates_proper_copies(self):
+        """Test that migration creates proper program copies"""
+        program = self._create_test_program("original", 0.7, 0)
+        self.db.add(program, target_island=0)
+        
+        # Set up for migration
+        self.db.island_generations = [6, 6, 6]
+        
+        # Perform migration
+        self.db.migrate_programs()
+        
+        # Find migrant copies
+        migrant_ids = [pid for pid in self.db.programs.keys() if "original_migrant_" in pid]
+        self.assertGreater(len(migrant_ids), 0)
+        
+        # Check migrant properties
+        for migrant_id in migrant_ids:
+            migrant = self.db.programs[migrant_id]
+            
+            # Should have same code and metrics
+            self.assertEqual(migrant.code, program.code)
+            self.assertEqual(migrant.metrics, program.metrics)
+            
+            # Should have proper parent reference
+            self.assertEqual(migrant.parent_id, "original")
+            
+            # Should be marked as migrant
+            self.assertTrue(migrant.metadata.get("migrant", False))
+            
+            # Should be in correct target island
+            target_island = migrant.metadata["island"]
+            self.assertIn(migrant_id, self.db.islands[target_island])
+
+    def test_no_migration_with_single_island(self):
+        """Test that migration is skipped with single island"""
+        # Create database with single island
+        config = Config()
+        config.database.in_memory = True
+        config.database.num_islands = 1
+        single_island_db = ProgramDatabase(config.database)
+        
+        program = self._create_test_program("test1", 0.5, 0)
+        single_island_db.add(program, target_island=0)
+        
+        single_island_db.island_generations = [6]
+        
+        initial_count = len(single_island_db.programs)
+        
+        # Should not perform migration
+        single_island_db.migrate_programs()
+        
+        # Program count should remain the same
+        self.assertEqual(len(single_island_db.programs), initial_count)
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file
diff --git a/tests/test_island_tracking.py b/tests/test_island_tracking.py
new file mode 100644
index 000000000..28723da1f
--- /dev/null
+++ b/tests/test_island_tracking.py
@@ -0,0 +1,266 @@
+"""
+Tests for island best program tracking functionality in openevolve.database
+"""
+
+import unittest
+from openevolve.config import Config
+from openevolve.database import Program, ProgramDatabase
+
+
+class TestIslandTracking(unittest.TestCase):
+    """Tests for island best program tracking in program database"""
+
+    def setUp(self):
+        """Set up test database with multiple islands"""
+        config = Config()
+        config.database.in_memory = True
+        config.database.num_islands = 3
+        self.db = ProgramDatabase(config.database)
+
+    def _create_test_program(self, program_id: str, score: float, island: int) -> Program:
+        """Helper to create a test program"""
+        program = Program(
+            id=program_id,
+            code=f"def func_{program_id}(): return {score}",
+            language="python",
+            metrics={"score": score, "combined_score": score},
+            metadata={"island": island}
+        )
+        return program
+
+    def test_initial_island_best_tracking(self):
+        """Test initial state of island best program tracking"""
+        # Initially all island best programs should be None
+        self.assertEqual(len(self.db.island_best_programs), 3)
+        for best_id in self.db.island_best_programs:
+            self.assertIsNone(best_id)
+
+    def test_first_program_becomes_island_best(self):
+        """Test that the first program added to an island becomes the best"""
+        program = self._create_test_program("first", 0.5, 0)
+        self.db.add(program, target_island=0)
+        
+        # Should become the best program for island 0
+        self.assertEqual(self.db.island_best_programs[0], "first")
+        
+        # Other islands should still have None
+        self.assertIsNone(self.db.island_best_programs[1])
+        self.assertIsNone(self.db.island_best_programs[2])
+
+    def test_better_program_updates_island_best(self):
+        """Test that a better program replaces the island best"""
+        # Add initial program
+        program1 = self._create_test_program("mediocre", 0.5, 0)
+        self.db.add(program1, target_island=0)
+        self.assertEqual(self.db.island_best_programs[0], "mediocre")
+        
+        # Add better program
+        program2 = self._create_test_program("better", 0.8, 0)
+        self.db.add(program2, target_island=0)
+        self.assertEqual(self.db.island_best_programs[0], "better")
+
+    def test_worse_program_does_not_update_island_best(self):
+        """Test that a worse program does not replace the island best"""
+        # Add good program
+        program1 = self._create_test_program("good", 0.8, 0)
+        self.db.add(program1, target_island=0)
+        self.assertEqual(self.db.island_best_programs[0], "good")
+        
+        # Add worse program
+        program2 = self._create_test_program("worse", 0.3, 0)
+        self.db.add(program2, target_island=0)
+        
+        # Should still be the good program
+        self.assertEqual(self.db.island_best_programs[0], "good")
+
+    def test_island_isolation_in_best_tracking(self):
+        """Test that island best tracking is isolated between islands"""
+        # Add programs to different islands
+        program1 = self._create_test_program("island0_best", 0.9, 0)
+        program2 = self._create_test_program("island1_best", 0.7, 1)
+        program3 = self._create_test_program("island2_best", 0.5, 2)
+        
+        self.db.add(program1, target_island=0)
+        self.db.add(program2, target_island=1)
+        self.db.add(program3, target_island=2)
+        
+        # Each island should track its own best
+        self.assertEqual(self.db.island_best_programs[0], "island0_best")
+        self.assertEqual(self.db.island_best_programs[1], "island1_best")
+        self.assertEqual(self.db.island_best_programs[2], "island2_best")
+
+    def test_migration_updates_island_best(self):
+        """Test that migration can update island best programs"""
+        # Add program to island 0
+        original = self._create_test_program("original", 0.6, 0)
+        self.db.add(original, target_island=0)
+        
+        # Island 1 starts empty
+        self.assertIsNone(self.db.island_best_programs[1])
+        
+        # Manually create a migrant to island 1 (simulating migration)
+        migrant = Program(
+            id="original_migrant_1",
+            code=original.code,
+            language=original.language,
+            parent_id=original.id,
+            generation=original.generation,
+            metrics=original.metrics.copy(),
+            metadata={"island": 1, "migrant": True}
+        )
+        
+        # Add migrant to island 1
+        self.db.add(migrant, target_island=1)
+        
+        # Should become best for island 1
+        self.assertEqual(self.db.island_best_programs[1], "original_migrant_1")
+
+    def test_get_top_programs_island_specific(self):
+        """Test getting top programs from a specific island"""
+        # Add programs to island 0
+        program1 = self._create_test_program("prog1", 0.9, 0)
+        program2 = self._create_test_program("prog2", 0.7, 0)
+        program3 = self._create_test_program("prog3", 0.5, 0)
+        
+        # Add programs to island 1
+        program4 = self._create_test_program("prog4", 0.8, 1)
+        program5 = self._create_test_program("prog5", 0.6, 1)
+        
+        self.db.add(program1, target_island=0)
+        self.db.add(program2, target_island=0)
+        self.db.add(program3, target_island=0)
+        self.db.add(program4, target_island=1)
+        self.db.add(program5, target_island=1)
+        
+        # Get top programs from island 0
+        island0_top = self.db.get_top_programs(n=2, island_idx=0)
+        self.assertEqual(len(island0_top), 2)
+        self.assertEqual(island0_top[0].id, "prog1")  # Highest score
+        self.assertEqual(island0_top[1].id, "prog2")  # Second highest
+        
+        # Get top programs from island 1
+        island1_top = self.db.get_top_programs(n=2, island_idx=1)
+        self.assertEqual(len(island1_top), 2)
+        self.assertEqual(island1_top[0].id, "prog4")  # Highest score in island 1
+        self.assertEqual(island1_top[1].id, "prog5")  # Second highest in island 1
+
+    def test_island_best_with_combined_score(self):
+        """Test island best tracking with combined_score metric"""
+        # Add programs with combined_score
+        program1 = Program(
+            id="test1",
+            code="def test1(): pass",
+            language="python",
+            metrics={"score": 0.5, "other": 0.3, "combined_score": 0.4},
+            metadata={"island": 0}
+        )
+        
+        program2 = Program(
+            id="test2", 
+            code="def test2(): pass",
+            language="python",
+            metrics={"score": 0.3, "other": 0.7, "combined_score": 0.5},
+            metadata={"island": 0}
+        )
+        
+        self.db.add(program1, target_island=0)
+        self.assertEqual(self.db.island_best_programs[0], "test1")
+        
+        # program2 has higher combined_score, should become best
+        self.db.add(program2, target_island=0)
+        self.assertEqual(self.db.island_best_programs[0], "test2")
+
+    def test_island_best_with_missing_program(self):
+        """Test island best tracking when best program is removed"""
+        program = self._create_test_program("to_remove", 0.8, 0)
+        self.db.add(program, target_island=0)
+        self.assertEqual(self.db.island_best_programs[0], "to_remove")
+        
+        # Manually remove the program (simulating cleanup)
+        del self.db.programs["to_remove"]
+        self.db.islands[0].remove("to_remove")
+        
+        # Add a new program - should detect stale reference and update
+        new_program = self._create_test_program("new", 0.6, 0)
+        self.db.add(new_program, target_island=0)
+        
+        # Should update the best program (the old one is gone)
+        self.assertEqual(self.db.island_best_programs[0], "new")
+
+    def test_sample_inspirations_from_island(self):
+        """Test that inspiration sampling respects island boundaries"""
+        # Add programs to island 0
+        program1 = self._create_test_program("island0_prog1", 0.9, 0)
+        program2 = self._create_test_program("island0_prog2", 0.7, 0)
+        
+        # Add programs to island 1
+        program3 = self._create_test_program("island1_prog1", 0.8, 1)
+        program4 = self._create_test_program("island1_prog2", 0.6, 1)
+        
+        self.db.add(program1, target_island=0)
+        self.db.add(program2, target_island=0)
+        self.db.add(program3, target_island=1)
+        self.db.add(program4, target_island=1)
+        
+        # Sample from island 0 program
+        inspirations = self.db._sample_inspirations(program1, n=5)
+        
+        # All inspirations should be from island 0
+        for inspiration in inspirations:
+            island = inspiration.metadata.get("island")
+            self.assertEqual(island, 0, f"Program {inspiration.id} should be from island 0, got {island}")
+
+    def test_island_status_logging(self):
+        """Test island status logging functionality"""
+        # Add programs to different islands
+        program1 = self._create_test_program("p1", 0.9, 0)
+        program2 = self._create_test_program("p2", 0.7, 1)
+        
+        self.db.add(program1, target_island=0)
+        self.db.add(program2, target_island=1)
+        
+        # Should not crash when logging status
+        try:
+            self.db.log_island_status()
+        except Exception as e:
+            self.fail(f"Island status logging should not crash: {e}")
+
+    def test_island_best_persistence(self):
+        """Test that island best programs are maintained across operations"""
+        # Add programs to islands
+        program1 = self._create_test_program("best0", 0.9, 0)
+        program2 = self._create_test_program("best1", 0.8, 1)
+        
+        self.db.add(program1, target_island=0)
+        self.db.add(program2, target_island=1)
+        
+        # Verify initial state
+        self.assertEqual(self.db.island_best_programs[0], "best0")
+        self.assertEqual(self.db.island_best_programs[1], "best1")
+        
+        # Add more programs that are not better
+        program3 = self._create_test_program("worse0", 0.5, 0)
+        program4 = self._create_test_program("worse1", 0.4, 1)
+        
+        self.db.add(program3, target_island=0)
+        self.db.add(program4, target_island=1)
+        
+        # Best should remain unchanged
+        self.assertEqual(self.db.island_best_programs[0], "best0")
+        self.assertEqual(self.db.island_best_programs[1], "best1")
+
+    def test_invalid_island_index_handling(self):
+        """Test handling of invalid island indices"""
+        # Test with island index out of bounds
+        with self.assertRaises(IndexError):
+            self.db.get_top_programs(n=5, island_idx=10)
+
+    def test_empty_island_top_programs(self):
+        """Test getting top programs from empty island"""
+        # Island 0 is empty initially
+        top_programs = self.db.get_top_programs(n=5, island_idx=0)
+        self.assertEqual(len(top_programs), 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file