fix and add test

codelion · codelion · commit a09758251c90 · 2025-06-19T15:44:29.000+08:00
diff --git a/openevolve/evaluator.py b/openevolve/evaluator.py
@@ -242,10 +242,9 @@ def get_pending_artifacts(self, program_id: str) -> Optional[Dict[str, Union[str
         """
         return self._pending_artifacts.pop(program_id, None)
 
-    @run_in_executor
-    def _direct_evaluate(self, program_path: str) -> Dict[str, float]:
+    async def _direct_evaluate(self, program_path: str) -> Dict[str, float]:
         """
-        Directly evaluate a program using the evaluation function
+        Directly evaluate a program using the evaluation function with timeout
 
         Args:
             program_path: Path to the program file
@@ -254,8 +253,13 @@ def _direct_evaluate(self, program_path: str) -> Dict[str, float]:
             Dictionary of metric name to score
         """
         try:
+            # Create a coroutine that runs the evaluation function in an executor
+            async def run_evaluation():
+                loop = asyncio.get_event_loop()
+                return await loop.run_in_executor(None, self.evaluate_function, program_path)
+
             # Run the evaluation with timeout
-            result = self.evaluate_function(program_path)
+            result = await asyncio.wait_for(run_evaluation(), timeout=self.config.timeout)
 
             # Validate result
             if not isinstance(result, dict):
@@ -264,6 +268,9 @@ def _direct_evaluate(self, program_path: str) -> Dict[str, float]:
 
             return result
 
+        except asyncio.TimeoutError:
+            logger.warning(f"Evaluation timed out after {self.config.timeout}s")
+            return {"error": 0.0, "timeout": True}
         except Exception as e:
             logger.error(f"Error in direct evaluation: {str(e)}")
             return {"error": 0.0}
@@ -299,10 +306,24 @@ async def _cascade_evaluate(
             if not hasattr(module, "evaluate_stage1"):
                 return await self._direct_evaluate(program_path)
 
-            # Run first stage
+            # Run first stage with timeout
             try:
-                stage1_result = await run_in_executor(module.evaluate_stage1)(program_path)
+
+                async def run_stage1():
+                    loop = asyncio.get_event_loop()
+                    return await loop.run_in_executor(None, module.evaluate_stage1, program_path)
+
+                stage1_result = await asyncio.wait_for(run_stage1(), timeout=self.config.timeout)
                 stage1_eval_result = self._process_evaluation_result(stage1_result)
+            except asyncio.TimeoutError:
+                logger.warning(f"Stage 1 evaluation timed out after {self.config.timeout}s")
+                return EvaluationResult(
+                    metrics={"stage1_passed": 0.0, "error": 0.0, "timeout": True},
+                    artifacts={
+                        "failure_stage": "stage1",
+                        "timeout": True,
+                    },
+                )
             except Exception as e:
                 logger.error(f"Error in stage 1 evaluation: {str(e)}")
                 # Capture stage 1 failure as artifacts
@@ -325,10 +346,27 @@ async def _cascade_evaluate(
             if not hasattr(module, "evaluate_stage2"):
                 return stage1_eval_result
 
-            # Run second stage
+            # Run second stage with timeout
             try:
-                stage2_result = await run_in_executor(module.evaluate_stage2)(program_path)
+
+                async def run_stage2():
+                    loop = asyncio.get_event_loop()
+                    return await loop.run_in_executor(None, module.evaluate_stage2, program_path)
+
+                stage2_result = await asyncio.wait_for(run_stage2(), timeout=self.config.timeout)
                 stage2_eval_result = self._process_evaluation_result(stage2_result)
+            except asyncio.TimeoutError:
+                logger.warning(f"Stage 2 evaluation timed out after {self.config.timeout}s")
+                # Capture stage 2 failure, but keep stage 1 results
+                stage1_eval_result.artifacts.update(
+                    {
+                        "stage2_timeout": True,
+                        "failure_stage": "stage2",
+                    }
+                )
+                stage1_eval_result.metrics["stage2_passed"] = 0.0
+                stage1_eval_result.metrics["timeout"] = True
+                return stage1_eval_result
             except Exception as e:
                 logger.error(f"Error in stage 2 evaluation: {str(e)}")
                 # Capture stage 2 failure, but keep stage 1 results
@@ -370,10 +408,27 @@ async def _cascade_evaluate(
             if not hasattr(module, "evaluate_stage3"):
                 return merged_result
 
-            # Run third stage
+            # Run third stage with timeout
             try:
-                stage3_result = await run_in_executor(module.evaluate_stage3)(program_path)
+
+                async def run_stage3():
+                    loop = asyncio.get_event_loop()
+                    return await loop.run_in_executor(None, module.evaluate_stage3, program_path)
+
+                stage3_result = await asyncio.wait_for(run_stage3(), timeout=self.config.timeout)
                 stage3_eval_result = self._process_evaluation_result(stage3_result)
+            except asyncio.TimeoutError:
+                logger.warning(f"Stage 3 evaluation timed out after {self.config.timeout}s")
+                # Capture stage 3 failure, but keep previous results
+                merged_result.artifacts.update(
+                    {
+                        "stage3_timeout": True,
+                        "failure_stage": "stage3",
+                    }
+                )
+                merged_result.metrics["stage3_passed"] = 0.0
+                merged_result.metrics["timeout"] = True
+                return merged_result
             except Exception as e:
                 logger.error(f"Error in stage 3 evaluation: {str(e)}")
                 # Capture stage 3 failure, but keep previous results
diff --git a/openevolve/utils/async_utils.py b/openevolve/utils/async_utils.py
@@ -32,6 +32,60 @@ async def wrapper(*args: Any, **kwargs: Any) -> Any:
     return wrapper
 
 
+async def run_with_timeout(
+    coro: Callable, timeout: float, *args: Any, timeout_error_value: Any = None, **kwargs: Any
+) -> Any:
+    """
+    Run a coroutine with a timeout, returning a default value on timeout
+
+    Args:
+        coro: Coroutine function to run
+        timeout: Timeout in seconds
+        *args: Arguments to pass to the coroutine
+        timeout_error_value: Value to return on timeout (default: {"error": 0.0, "timeout": True})
+        **kwargs: Keyword arguments to pass to the coroutine
+
+    Returns:
+        Result of the coroutine or timeout_error_value on timeout
+    """
+    if timeout_error_value is None:
+        timeout_error_value = {"error": 0.0, "timeout": True}
+
+    try:
+        return await asyncio.wait_for(coro(*args, **kwargs), timeout=timeout)
+    except asyncio.TimeoutError:
+        logger.warning(f"Operation timed out after {timeout}s")
+        return timeout_error_value
+
+
+async def run_sync_with_timeout(
+    func: Callable, timeout: float, *args: Any, timeout_error_value: Any = None, **kwargs: Any
+) -> Any:
+    """
+    Run a synchronous function in an executor with a timeout
+
+    Args:
+        func: Synchronous function to run
+        timeout: Timeout in seconds
+        *args: Arguments to pass to the function
+        timeout_error_value: Value to return on timeout (default: {"error": 0.0, "timeout": True})
+        **kwargs: Keyword arguments to pass to the function
+
+    Returns:
+        Result of the function or timeout_error_value on timeout
+    """
+    if timeout_error_value is None:
+        timeout_error_value = {"error": 0.0, "timeout": True}
+
+    try:
+        loop = asyncio.get_event_loop()
+        task = loop.run_in_executor(None, functools.partial(func, *args, **kwargs))
+        return await asyncio.wait_for(task, timeout=timeout)
+    except asyncio.TimeoutError:
+        logger.warning(f"Sync operation timed out after {timeout}s")
+        return timeout_error_value
+
+
 async def gather_with_concurrency(
     n: int, *tasks: asyncio.Future, return_exceptions: bool = False
 ) -> List[Any]:
diff --git a/tests/test_evaluator_timeout.py b/tests/test_evaluator_timeout.py