Skip to content

Commit fb70ffc

Browse files
committed
Use evaluation objects in place of files
1 parent ddf42e2 commit fb70ffc

File tree

3 files changed

+48
-30
lines changed

3 files changed

+48
-30
lines changed

openevolve/controller.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from openevolve.config import Config, load_config
1515
from openevolve.database import Program, ProgramDatabase
16-
from openevolve.evaluator import Evaluator
16+
from openevolve.evaluator import Evaluator, EvaluationObject
1717
from openevolve.llm.ensemble import LLMEnsemble
1818
from openevolve.prompt.sampler import PromptSampler
1919
from openevolve.process_parallel import ProcessParallelController
@@ -74,6 +74,7 @@ def __init__(
7474
self,
7575
initial_program_path: str,
7676
evaluation_file: str,
77+
evaluation_object: Optional[EvaluationObject] = None,
7778
config_path: Optional[str] = None,
7879
config: Optional[Config] = None,
7980
output_dir: Optional[str] = None,
@@ -154,11 +155,13 @@ def __init__(
154155
self.evaluator = Evaluator(
155156
self.config.evaluator,
156157
evaluation_file,
158+
evaluation_object,
157159
self.llm_evaluator_ensemble,
158160
self.evaluator_prompt_sampler,
159161
database=self.database,
160162
)
161163
self.evaluation_file = evaluation_file
164+
self.evaluation_object = self.evaluator.evaluation_object
162165

163166
logger.info(f"Initialized OpenEvolve with {initial_program_path}")
164167

@@ -275,7 +278,7 @@ async def run(
275278
# Initialize improved parallel processing
276279
try:
277280
self.parallel_controller = ProcessParallelController(
278-
self.config, self.evaluation_file, self.database
281+
self.config, self.evaluation_file, self.evaluation_object, self.database
279282
)
280283

281284
# Set up signal handlers for graceful shutdown

openevolve/evaluator.py

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@
1313
import time
1414
import traceback
1515
import uuid
16+
import warnings
1617
from pathlib import Path
17-
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
18+
from typing import Any, Dict, List, Optional, Tuple, Union, Protocol, cast
1819
import traceback
1920

2021
from openevolve.config import EvaluatorConfig
@@ -29,6 +30,22 @@
2930
logger = logging.getLogger(__name__)
3031

3132

33+
class EvaluationObject(Protocol):
34+
def evaluate(self, program_path: str) -> EvaluationResult:
35+
...
36+
37+
38+
class CascadeEvaluationObject(Protocol):
39+
def evaluate_stage1(self, program_path: str) -> EvaluationResult:
40+
...
41+
42+
def evaluate_stage2(self, program_path: str) -> EvaluationResult:
43+
...
44+
45+
def evaluate_stage3(self, program_path: str) -> EvaluationResult:
46+
...
47+
48+
3249
class Evaluator:
3350
"""
3451
Evaluates programs and assigns scores
@@ -41,21 +58,26 @@ def __init__(
4158
self,
4259
config: EvaluatorConfig,
4360
evaluation_file: str,
61+
evaluation_object: Optional[EvaluationObject] = None,
4462
llm_ensemble: Optional[LLMEnsemble] = None,
4563
prompt_sampler: Optional[PromptSampler] = None,
4664
database: Optional[ProgramDatabase] = None,
4765
):
66+
if evaluation_file and evaluation_object:
67+
warnings.warn("Both evaluation_file and evaluation_object provided - evaluation_object overrides evaluation_file")
4868
self.config = config
4969
self.evaluation_file = evaluation_file
70+
self.evaluation_object = evaluation_object
5071
self.llm_ensemble = llm_ensemble
5172
self.prompt_sampler = prompt_sampler
5273
self.database = database
5374

5475
# Create a task pool for parallel evaluation
5576
self.task_pool = TaskPool(max_concurrency=config.parallel_evaluations)
5677

57-
# Set up evaluation function if file exists
58-
self._load_evaluation_function()
78+
if self.evaluation_object is None:
79+
# Set up evaluation module if file exists
80+
self._load_evaluation_function()
5981

6082
# Pending artifacts storage for programs
6183
self._pending_artifacts: Dict[str, Dict[str, Union[str, bytes]]] = {}
@@ -87,7 +109,7 @@ def _load_evaluation_function(self) -> None:
87109
f"Evaluation file {self.evaluation_file} does not contain an 'evaluate' function"
88110
)
89111

90-
self.evaluate_function = module.evaluate
112+
self.evaluation_object = module.evaluate
91113
logger.info(f"Successfully loaded evaluation function from {self.evaluation_file}")
92114

93115
# Validate cascade configuration
@@ -346,7 +368,7 @@ async def _direct_evaluate(
346368
# Create a coroutine that runs the evaluation function in an executor
347369
async def run_evaluation():
348370
loop = asyncio.get_event_loop()
349-
return await loop.run_in_executor(None, self.evaluate_function, program_path)
371+
return await loop.run_in_executor(None, self.evaluation_object.evaluate, program_path)
350372

351373
# Run the evaluation with timeout - let exceptions bubble up for retry handling
352374
result = await asyncio.wait_for(run_evaluation(), timeout=self.config.timeout)
@@ -367,31 +389,19 @@ async def _cascade_evaluate(
367389
Returns:
368390
Dictionary of metrics or EvaluationResult with metrics and artifacts
369391
"""
370-
# Import the evaluation module to get cascade functions if they exist
392+
# This cast just makes static type checkers happy; actual checking is still done using hasattr
393+
evaluation_object = cast(CascadeEvaluationObject, self.evaluation_object)
371394
try:
372-
# Add the evaluation file's directory to Python path so it can import local modules
373-
eval_dir = os.path.dirname(os.path.abspath(self.evaluation_file))
374-
if eval_dir not in sys.path:
375-
sys.path.insert(0, eval_dir)
376-
logger.debug(f"Added {eval_dir} to Python path for cascade evaluation")
377-
378-
spec = importlib.util.spec_from_file_location("evaluation_module", self.evaluation_file)
379-
if spec is None or spec.loader is None:
380-
return await self._direct_evaluate(program_path)
381-
382-
module = importlib.util.module_from_spec(spec)
383-
spec.loader.exec_module(module)
384-
385395
# Check if cascade functions exist
386-
if not hasattr(module, "evaluate_stage1"):
396+
if not hasattr(evaluation_object, "evaluate_stage1"):
387397
return await self._direct_evaluate(program_path)
388398

389399
# Run first stage with timeout
390400
try:
391401

392402
async def run_stage1():
393403
loop = asyncio.get_event_loop()
394-
return await loop.run_in_executor(None, module.evaluate_stage1, program_path)
404+
return await loop.run_in_executor(None, evaluation_object.evaluate_stage1, program_path)
395405

396406
stage1_result = await asyncio.wait_for(run_stage1(), timeout=self.config.timeout)
397407
stage1_eval_result = self._process_evaluation_result(stage1_result)
@@ -424,15 +434,15 @@ async def run_stage1():
424434
return stage1_eval_result
425435

426436
# Check if second stage exists
427-
if not hasattr(module, "evaluate_stage2"):
437+
if not hasattr(evaluation_object, "evaluate_stage2"):
428438
return stage1_eval_result
429439

430440
# Run second stage with timeout
431441
try:
432442

433443
async def run_stage2():
434444
loop = asyncio.get_event_loop()
435-
return await loop.run_in_executor(None, module.evaluate_stage2, program_path)
445+
return await loop.run_in_executor(None, evaluation_object.evaluate_stage2, program_path)
436446

437447
stage2_result = await asyncio.wait_for(run_stage2(), timeout=self.config.timeout)
438448
stage2_eval_result = self._process_evaluation_result(stage2_result)
@@ -486,15 +496,15 @@ async def run_stage2():
486496
return merged_result
487497

488498
# Check if third stage exists
489-
if not hasattr(module, "evaluate_stage3"):
499+
if not hasattr(evaluation_object, "evaluate_stage3"):
490500
return merged_result
491501

492502
# Run third stage with timeout
493503
try:
494504

495505
async def run_stage3():
496506
loop = asyncio.get_event_loop()
497-
return await loop.run_in_executor(None, module.evaluate_stage3, program_path)
507+
return await loop.run_in_executor(None, evaluation_object.evaluate_stage3, program_path)
498508

499509
stage3_result = await asyncio.wait_for(run_stage3(), timeout=self.config.timeout)
500510
stage3_eval_result = self._process_evaluation_result(stage3_result)

openevolve/process_parallel.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from openevolve.config import Config
1717
from openevolve.database import Program, ProgramDatabase
18+
from openevolve.evaluator import EvaluationObject
1819

1920
logger = logging.getLogger(__name__)
2021

@@ -33,10 +34,11 @@ class SerializableResult:
3334
error: Optional[str] = None
3435

3536

36-
def _worker_init(config_dict: dict, evaluation_file: str) -> None:
37+
def _worker_init(config_dict: dict, evaluation_file: str, evaluation_object: EvaluationObject) -> None:
3738
"""Initialize worker process with necessary components"""
3839
global _worker_config
3940
global _worker_evaluation_file
41+
global _worker_evaluation_object
4042
global _worker_evaluator
4143
global _worker_llm_ensemble
4244
global _worker_prompt_sampler
@@ -79,6 +81,7 @@ def _worker_init(config_dict: dict, evaluation_file: str) -> None:
7981
},
8082
)
8183
_worker_evaluation_file = evaluation_file
84+
_worker_evaluation_object = evaluation_object
8285

8386
# These will be lazily initialized on first use
8487
_worker_evaluator = None
@@ -115,6 +118,7 @@ def _lazy_init_worker_components():
115118
_worker_evaluator = Evaluator(
116119
_worker_config.evaluator,
117120
_worker_evaluation_file,
121+
_worker_evaluation_object,
118122
evaluator_llm,
119123
evaluator_prompt,
120124
database=None, # No shared database in worker
@@ -258,9 +262,10 @@ def _run_iteration_worker(
258262
class ProcessParallelController:
259263
"""Controller for process-based parallel evolution"""
260264

261-
def __init__(self, config: Config, evaluation_file: str, database: ProgramDatabase):
265+
def __init__(self, config: Config, evaluation_file: str, evaluation_object: EvaluationObject, database: ProgramDatabase):
262266
self.config = config
263267
self.evaluation_file = evaluation_file
268+
self.evaluation_object = evaluation_object
264269
self.database = database
265270

266271
self.executor: Optional[ProcessPoolExecutor] = None
@@ -310,7 +315,7 @@ def start(self) -> None:
310315
self.executor = ProcessPoolExecutor(
311316
max_workers=self.num_workers,
312317
initializer=_worker_init,
313-
initargs=(config_dict, self.evaluation_file),
318+
initargs=(config_dict, self.evaluation_file, self.evaluation_object),
314319
)
315320

316321
logger.info(f"Started process pool with {self.num_workers} processes")

0 commit comments

Comments
 (0)