Skip to content

Commit 68665a5

Browse files
authored
Merge pull request #140 from codelion/fix-islands-evolution
Fix islands evolution
2 parents 7d0178e + a4a3847 commit 68665a5

File tree

10 files changed

+1361
-70
lines changed

10 files changed

+1361
-70
lines changed

examples/rust_adaptive_sort/config.yaml

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,5 @@ evaluator:
4949
timeout: 60 # Rust compilation can take time
5050
parallel_evaluations: 3
5151

52-
# Use cascade evaluation for performance testing
53-
cascade_evaluation: true
54-
cascade_thresholds:
55-
- 0.5 # Compilation success and basic correctness
56-
- 0.7 # Good performance
57-
- 0.85 # Excellent adaptability
52+
# Direct evaluation - evaluator doesn't implement cascade functions
53+
cascade_evaluation: false

openevolve/database.py

Lines changed: 279 additions & 47 deletions
Large diffs are not rendered by default.

openevolve/evaluator.py

Lines changed: 65 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,42 @@ def _load_evaluation_function(self) -> None:
8989

9090
self.evaluate_function = module.evaluate
9191
logger.info(f"Successfully loaded evaluation function from {self.evaluation_file}")
92+
93+
# Validate cascade configuration
94+
self._validate_cascade_configuration(module)
9295
except Exception as e:
9396
logger.error(f"Error loading evaluation function: {str(e)}")
9497
raise
9598

99+
def _validate_cascade_configuration(self, module) -> None:
100+
"""
101+
Validate cascade evaluation configuration and warn about potential issues
102+
103+
Args:
104+
module: The loaded evaluation module
105+
"""
106+
if self.config.cascade_evaluation:
107+
# Check if cascade functions exist
108+
has_stage1 = hasattr(module, "evaluate_stage1")
109+
has_stage2 = hasattr(module, "evaluate_stage2")
110+
has_stage3 = hasattr(module, "evaluate_stage3")
111+
112+
if not has_stage1:
113+
logger.warning(
114+
f"Configuration has 'cascade_evaluation: true' but evaluator "
115+
f"'{self.evaluation_file}' does not define 'evaluate_stage1' function. "
116+
f"This will fall back to direct evaluation, making the cascade setting useless. "
117+
f"Consider setting 'cascade_evaluation: false' or implementing cascade functions."
118+
)
119+
elif not (has_stage2 or has_stage3):
120+
logger.warning(
121+
f"Evaluator '{self.evaluation_file}' defines 'evaluate_stage1' but no additional "
122+
f"cascade stages (evaluate_stage2, evaluate_stage3). Consider implementing "
123+
f"multi-stage evaluation for better cascade benefits."
124+
)
125+
else:
126+
logger.debug(f"Cascade evaluation properly configured with available stage functions")
127+
96128
async def evaluate_program(
97129
self,
98130
program_code: str,
@@ -273,15 +305,15 @@ def get_pending_artifacts(self, program_id: str) -> Optional[Dict[str, Union[str
273305
"""
274306
return self._pending_artifacts.pop(program_id, None)
275307

276-
async def _direct_evaluate(self, program_path: str) -> Dict[str, float]:
308+
async def _direct_evaluate(self, program_path: str) -> Union[Dict[str, float], EvaluationResult]:
277309
"""
278310
Directly evaluate a program using the evaluation function with timeout
279311
280312
Args:
281313
program_path: Path to the program file
282314
283315
Returns:
284-
Dictionary of metric name to score
316+
Dictionary of metrics or EvaluationResult with metrics and artifacts
285317
286318
Raises:
287319
asyncio.TimeoutError: If evaluation exceeds timeout
@@ -296,11 +328,8 @@ async def run_evaluation():
296328
# Run the evaluation with timeout - let exceptions bubble up for retry handling
297329
result = await asyncio.wait_for(run_evaluation(), timeout=self.config.timeout)
298330

299-
# Validate result
300-
if not isinstance(result, dict):
301-
logger.warning(f"Evaluation returned non-dictionary result: {result}")
302-
return {"error": 0.0}
303-
331+
# Return result as-is to be processed by _process_evaluation_result
332+
# This supports both dict and EvaluationResult returns, just like _cascade_evaluate
304333
return result
305334

306335
async def _cascade_evaluate(
@@ -354,13 +383,14 @@ async def run_stage1():
354383
)
355384
except Exception as e:
356385
logger.error(f"Error in stage 1 evaluation: {str(e)}")
357-
# Capture stage 1 failure as artifacts
386+
# Capture stage 1 failure with enhanced context
387+
error_context = self._create_cascade_error_context("stage1", e)
358388
return EvaluationResult(
359389
metrics={"stage1_passed": 0.0, "error": 0.0},
360390
artifacts={
361391
"stderr": str(e),
362392
"traceback": traceback.format_exc(),
363-
"failure_stage": "stage1",
393+
**error_context,
364394
},
365395
)
366396

@@ -481,13 +511,14 @@ async def run_stage3():
481511

482512
except Exception as e:
483513
logger.error(f"Error in cascade evaluation: {str(e)}")
484-
# Return proper cascade failure result instead of re-raising
514+
# Return proper cascade failure result with enhanced context
515+
error_context = self._create_cascade_error_context("cascade_setup", e)
485516
return EvaluationResult(
486517
metrics={"stage1_passed": 0.0, "error": 0.0},
487518
artifacts={
488519
"stderr": str(e),
489520
"traceback": traceback.format_exc(),
490-
"failure_stage": "cascade_setup",
521+
**error_context,
491522
},
492523
)
493524

@@ -582,6 +613,29 @@ async def _llm_evaluate(self, program_code: str, program_id: str = "") -> Dict[s
582613
traceback.print_exc()
583614
return {}
584615

616+
def _create_cascade_error_context(self, stage: str, error: Exception) -> dict:
617+
"""
618+
Create rich error context for cascade failures
619+
620+
Args:
621+
stage: The stage where the error occurred
622+
error: The exception that was raised
623+
624+
Returns:
625+
Dictionary with enhanced error context
626+
"""
627+
import time
628+
return {
629+
"failure_stage": stage,
630+
"error_type": type(error).__name__,
631+
"error_message": str(error),
632+
"timestamp": time.time(),
633+
"cascade_config": self.config.cascade_evaluation,
634+
"cascade_thresholds": getattr(self.config, 'cascade_thresholds', []),
635+
"timeout_config": self.config.timeout,
636+
"evaluation_file": self.evaluation_file,
637+
}
638+
585639
def _passes_threshold(self, metrics: Dict[str, float], threshold: float) -> bool:
586640
"""
587641
Check if metrics pass a threshold

openevolve/iteration.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,18 @@ async def run_iteration_with_shared_db(
5353
# Get artifacts for the parent program if available
5454
parent_artifacts = database.get_artifacts(parent.id)
5555

56-
# Get actual top programs for prompt context (separate from inspirations)
57-
actual_top_programs = database.get_top_programs(5)
56+
# Get island-specific top programs for prompt context (maintain island isolation)
57+
parent_island = parent.metadata.get("island", database.current_island)
58+
island_top_programs = database.get_top_programs(5, island_idx=parent_island)
59+
island_previous_programs = database.get_top_programs(3, island_idx=parent_island)
5860

5961
# Build prompt
6062
prompt = prompt_sampler.build_prompt(
6163
current_program=parent.code,
6264
parent_program=parent.code,
6365
program_metrics=parent.metrics,
64-
previous_programs=[p.to_dict() for p in database.get_top_programs(3)],
65-
top_programs=[p.to_dict() for p in actual_top_programs],
66+
previous_programs=[p.to_dict() for p in island_previous_programs],
67+
top_programs=[p.to_dict() for p in island_top_programs],
6668
inspirations=[p.to_dict() for p in inspirations],
6769
language=config.language,
6870
evolution_round=iteration,

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "openevolve"
7-
version = "0.0.14"
7+
version = "0.0.15"
88
description = "Open-source implementation of AlphaEvolve"
99
readme = "README.md"
1010
requires-python = ">=3.9"

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
setup(
44
name="openevolve",
5-
version="0.0.14",
5+
version="0.0.15",
66
packages=find_packages(),
77
include_package_data=True,
88
)

0 commit comments

Comments
 (0)