Use evaluation objects in place of files

tonyxty · tonyxty · commit fb70ffcc6188 · 2025-08-11T01:27:12.000+08:00
diff --git a/openevolve/controller.py b/openevolve/controller.py
@@ -13,7 +13,7 @@
 
 from openevolve.config import Config, load_config
 from openevolve.database import Program, ProgramDatabase
-from openevolve.evaluator import Evaluator
+from openevolve.evaluator import Evaluator, EvaluationObject
 from openevolve.llm.ensemble import LLMEnsemble
 from openevolve.prompt.sampler import PromptSampler
 from openevolve.process_parallel import ProcessParallelController
@@ -74,6 +74,7 @@ def __init__(
         self,
         initial_program_path: str,
         evaluation_file: str,
+        evaluation_object: Optional[EvaluationObject] = None,
         config_path: Optional[str] = None,
         config: Optional[Config] = None,
         output_dir: Optional[str] = None,
@@ -154,11 +155,13 @@ def __init__(
         self.evaluator = Evaluator(
             self.config.evaluator,
             evaluation_file,
+            evaluation_object,
             self.llm_evaluator_ensemble,
             self.evaluator_prompt_sampler,
             database=self.database,
         )
         self.evaluation_file = evaluation_file
+        self.evaluation_object = self.evaluator.evaluation_object
 
         logger.info(f"Initialized OpenEvolve with {initial_program_path}")
 
@@ -275,7 +278,7 @@ async def run(
         # Initialize improved parallel processing
         try:
             self.parallel_controller = ProcessParallelController(
-                self.config, self.evaluation_file, self.database
+                self.config, self.evaluation_file, self.evaluation_object, self.database
             )
 
             # Set up signal handlers for graceful shutdown
diff --git a/openevolve/evaluator.py b/openevolve/evaluator.py
@@ -13,8 +13,9 @@
 import time
 import traceback
 import uuid
+import warnings
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union, Protocol, cast
 import traceback
 
 from openevolve.config import EvaluatorConfig
@@ -29,6 +30,22 @@
 logger = logging.getLogger(__name__)
 
 
+class EvaluationObject(Protocol):
+    def evaluate(self, program_path: str) -> EvaluationResult:
+        ...
+
+
+class CascadeEvaluationObject(Protocol):
+    def evaluate_stage1(self, program_path: str) -> EvaluationResult:
+        ...
+
+    def evaluate_stage2(self, program_path: str) -> EvaluationResult:
+        ...
+
+    def evaluate_stage3(self, program_path: str) -> EvaluationResult:
+        ...
+
+
 class Evaluator:
     """
     Evaluates programs and assigns scores
@@ -41,21 +58,26 @@ def __init__(
         self,
         config: EvaluatorConfig,
         evaluation_file: str,
+        evaluation_object: Optional[EvaluationObject] = None,
         llm_ensemble: Optional[LLMEnsemble] = None,
         prompt_sampler: Optional[PromptSampler] = None,
         database: Optional[ProgramDatabase] = None,
     ):
+        if evaluation_file and evaluation_object:
+            warnings.warn("Both evaluation_file and evaluation_object provided - evaluation_object overrides evaluation_file")
         self.config = config
         self.evaluation_file = evaluation_file
+        self.evaluation_object = evaluation_object
         self.llm_ensemble = llm_ensemble
         self.prompt_sampler = prompt_sampler
         self.database = database
 
         # Create a task pool for parallel evaluation
         self.task_pool = TaskPool(max_concurrency=config.parallel_evaluations)
 
-        # Set up evaluation function if file exists
-        self._load_evaluation_function()
+        if self.evaluation_object is None:
+            # Set up evaluation module if file exists
+            self._load_evaluation_function()
 
         # Pending artifacts storage for programs
         self._pending_artifacts: Dict[str, Dict[str, Union[str, bytes]]] = {}
@@ -87,7 +109,7 @@ def _load_evaluation_function(self) -> None:
                     f"Evaluation file {self.evaluation_file} does not contain an 'evaluate' function"
                 )
 
-            self.evaluate_function = module.evaluate
+            self.evaluation_object = module.evaluate
             logger.info(f"Successfully loaded evaluation function from {self.evaluation_file}")
 
             # Validate cascade configuration
@@ -346,7 +368,7 @@ async def _direct_evaluate(
         # Create a coroutine that runs the evaluation function in an executor
         async def run_evaluation():
             loop = asyncio.get_event_loop()
-            return await loop.run_in_executor(None, self.evaluate_function, program_path)
+            return await loop.run_in_executor(None, self.evaluation_object.evaluate, program_path)
 
         # Run the evaluation with timeout - let exceptions bubble up for retry handling
         result = await asyncio.wait_for(run_evaluation(), timeout=self.config.timeout)
@@ -367,31 +389,19 @@ async def _cascade_evaluate(
         Returns:
             Dictionary of metrics or EvaluationResult with metrics and artifacts
         """
-        # Import the evaluation module to get cascade functions if they exist
+        # This cast just makes static type checkers happy; actual checking is still done using hasattr
+        evaluation_object = cast(CascadeEvaluationObject, self.evaluation_object)
         try:
-            # Add the evaluation file's directory to Python path so it can import local modules
-            eval_dir = os.path.dirname(os.path.abspath(self.evaluation_file))
-            if eval_dir not in sys.path:
-                sys.path.insert(0, eval_dir)
-                logger.debug(f"Added {eval_dir} to Python path for cascade evaluation")
-
-            spec = importlib.util.spec_from_file_location("evaluation_module", self.evaluation_file)
-            if spec is None or spec.loader is None:
-                return await self._direct_evaluate(program_path)
-
-            module = importlib.util.module_from_spec(spec)
-            spec.loader.exec_module(module)
-
             # Check if cascade functions exist
-            if not hasattr(module, "evaluate_stage1"):
+            if not hasattr(evaluation_object, "evaluate_stage1"):
                 return await self._direct_evaluate(program_path)
 
             # Run first stage with timeout
             try:
 
                 async def run_stage1():
                     loop = asyncio.get_event_loop()
-                    return await loop.run_in_executor(None, module.evaluate_stage1, program_path)
+                    return await loop.run_in_executor(None, evaluation_object.evaluate_stage1, program_path)
 
                 stage1_result = await asyncio.wait_for(run_stage1(), timeout=self.config.timeout)
                 stage1_eval_result = self._process_evaluation_result(stage1_result)
@@ -424,15 +434,15 @@ async def run_stage1():
                 return stage1_eval_result
 
             # Check if second stage exists
-            if not hasattr(module, "evaluate_stage2"):
+            if not hasattr(evaluation_object, "evaluate_stage2"):
                 return stage1_eval_result
 
             # Run second stage with timeout
             try:
 
                 async def run_stage2():
                     loop = asyncio.get_event_loop()
-                    return await loop.run_in_executor(None, module.evaluate_stage2, program_path)
+                    return await loop.run_in_executor(None, evaluation_object.evaluate_stage2, program_path)
 
                 stage2_result = await asyncio.wait_for(run_stage2(), timeout=self.config.timeout)
                 stage2_eval_result = self._process_evaluation_result(stage2_result)
@@ -486,15 +496,15 @@ async def run_stage2():
                 return merged_result
 
             # Check if third stage exists
-            if not hasattr(module, "evaluate_stage3"):
+            if not hasattr(evaluation_object, "evaluate_stage3"):
                 return merged_result
 
             # Run third stage with timeout
             try:
 
                 async def run_stage3():
                     loop = asyncio.get_event_loop()
-                    return await loop.run_in_executor(None, module.evaluate_stage3, program_path)
+                    return await loop.run_in_executor(None, evaluation_object.evaluate_stage3, program_path)
 
                 stage3_result = await asyncio.wait_for(run_stage3(), timeout=self.config.timeout)
                 stage3_eval_result = self._process_evaluation_result(stage3_result)
diff --git a/openevolve/process_parallel.py b/openevolve/process_parallel.py
@@ -15,6 +15,7 @@
 
 from openevolve.config import Config
 from openevolve.database import Program, ProgramDatabase
+from openevolve.evaluator import EvaluationObject
 
 logger = logging.getLogger(__name__)
 
@@ -33,10 +34,11 @@ class SerializableResult:
     error: Optional[str] = None
 
 
-def _worker_init(config_dict: dict, evaluation_file: str) -> None:
+def _worker_init(config_dict: dict, evaluation_file: str, evaluation_object: EvaluationObject) -> None:
     """Initialize worker process with necessary components"""
     global _worker_config
     global _worker_evaluation_file
+    global _worker_evaluation_object
     global _worker_evaluator
     global _worker_llm_ensemble
     global _worker_prompt_sampler
@@ -79,6 +81,7 @@ def _worker_init(config_dict: dict, evaluation_file: str) -> None:
         },
     )
     _worker_evaluation_file = evaluation_file
+    _worker_evaluation_object = evaluation_object
 
     # These will be lazily initialized on first use
     _worker_evaluator = None
@@ -115,6 +118,7 @@ def _lazy_init_worker_components():
         _worker_evaluator = Evaluator(
             _worker_config.evaluator,
             _worker_evaluation_file,
+            _worker_evaluation_object,
             evaluator_llm,
             evaluator_prompt,
             database=None,  # No shared database in worker
@@ -258,9 +262,10 @@ def _run_iteration_worker(
 class ProcessParallelController:
     """Controller for process-based parallel evolution"""
 
-    def __init__(self, config: Config, evaluation_file: str, database: ProgramDatabase):
+    def __init__(self, config: Config, evaluation_file: str, evaluation_object: EvaluationObject, database: ProgramDatabase):
         self.config = config
         self.evaluation_file = evaluation_file
+        self.evaluation_object = evaluation_object
         self.database = database
 
         self.executor: Optional[ProcessPoolExecutor] = None
@@ -310,7 +315,7 @@ def start(self) -> None:
         self.executor = ProcessPoolExecutor(
             max_workers=self.num_workers,
             initializer=_worker_init,
-            initargs=(config_dict, self.evaluation_file),
+            initargs=(config_dict, self.evaluation_file, self.evaluation_object),
         )
 
         logger.info(f"Started process pool with {self.num_workers} processes")