fxies

codelion · codelion · commit b483af9c874a · 2025-06-19T16:04:17.000+08:00
diff --git a/openevolve/evaluator.py b/openevolve/evaluator.py
@@ -117,12 +117,21 @@ async def evaluate_program(
         # Retry logic for evaluation
         last_exception = None
         for attempt in range(self.config.max_retries + 1):
-            # Create a temporary file for the program
-            with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as temp_file:
-                temp_file.write(program_code.encode("utf-8"))
-                temp_file_path = temp_file.name
-
+            # Create a temporary file for the program - FIXED: proper file handling
+            temp_file_path = None
             try:
+                # Create temp file and write content with proper flushing
+                temp_fd, temp_file_path = tempfile.mkstemp(suffix=".py", text=True)
+                with os.fdopen(temp_fd, 'w') as temp_file:
+                    temp_file.write(program_code)
+                    temp_file.flush()  # Ensure content is written to disk
+                    os.fsync(temp_file.fileno())  # Force sync to disk
+
+                # Verify file was written correctly (debug)
+                with open(temp_file_path, 'r') as verify_file:
+                    written_content = verify_file.read()
+                    logger.debug(f"Temp file content (first 100 chars): {written_content[:100]}")
+
                 # Run evaluation
                 if self.config.cascade_evaluation:
                     # Run cascade evaluation
@@ -186,28 +195,49 @@ async def evaluate_program(
                 )
                 traceback.print_exc()
 
-                # Capture failure artifacts if enabled
+                # Capture failure artifacts if enabled - FIXED: better artifact capture
                 if artifacts_enabled and program_id:
-                    self._pending_artifacts[program_id] = {
+                    failure_artifacts = {
                         "stderr": str(e),
                         "traceback": traceback.format_exc(),
                         "failure_stage": "evaluation",
+                        "attempt": attempt + 1,
+                        "timeout_config": self.config.timeout,
                     }
+                    
+                    # Check if this was a timeout error
+                    if isinstance(e, asyncio.TimeoutError) or "timeout" in str(e).lower():
+                        failure_artifacts["timeout"] = True
+                        failure_artifacts["failure_stage"] = "timeout"
+                    
+                    # Store or update artifacts
+                    if program_id in self._pending_artifacts:
+                        self._pending_artifacts[program_id].update(failure_artifacts)
+                    else:
+                        self._pending_artifacts[program_id] = failure_artifacts
 
                 # If this is not the last attempt, wait a bit before retrying
                 if attempt < self.config.max_retries:
                     await asyncio.sleep(1.0)  # Wait 1 second before retry
 
             finally:
                 # Clean up temporary file
-                if os.path.exists(temp_file_path):
-                    os.unlink(temp_file_path)
+                if temp_file_path and os.path.exists(temp_file_path):
+                    try:
+                        os.unlink(temp_file_path)
+                    except OSError:
+                        pass  # Ignore cleanup errors
 
-        # All retries failed
+        # All retries failed - FIXED: better error return with timeout info
         logger.error(
             f"All evaluation attempts failed for program{program_id_str}. Last error: {str(last_exception)}"
         )
-        return {"error": 0.0}
+        
+        # Check if the last exception was a timeout
+        if isinstance(last_exception, asyncio.TimeoutError):
+            return {"error": 0.0, "timeout": True}
+        else:
+            return {"error": 0.0}
 
     def _process_evaluation_result(self, result: Any) -> EvaluationResult:
         """
@@ -252,27 +282,35 @@ async def _direct_evaluate(self, program_path: str) -> Dict[str, float]:
         Returns:
             Dictionary of metric name to score
         """
+        logger.debug(f"Starting direct evaluation with timeout={self.config.timeout}s")
+        
         try:
             # Create a coroutine that runs the evaluation function in an executor
             async def run_evaluation():
                 loop = asyncio.get_event_loop()
-                return await loop.run_in_executor(None, self.evaluate_function, program_path)
+                logger.debug(f"Running evaluation function on {program_path}")
+                result = await loop.run_in_executor(None, self.evaluate_function, program_path)
+                logger.debug(f"Evaluation function returned: {result}")
+                return result
 
             # Run the evaluation with timeout
+            logger.debug(f"Waiting for evaluation with {self.config.timeout}s timeout")
             result = await asyncio.wait_for(run_evaluation(), timeout=self.config.timeout)
 
             # Validate result
             if not isinstance(result, dict):
                 logger.warning(f"Evaluation returned non-dictionary result: {result}")
                 return {"error": 0.0}
 
+            logger.debug(f"Evaluation completed successfully: {result}")
             return result
 
         except asyncio.TimeoutError:
             logger.warning(f"Evaluation timed out after {self.config.timeout}s")
             return {"error": 0.0, "timeout": True}
         except Exception as e:
             logger.error(f"Error in direct evaluation: {str(e)}")
+            traceback.print_exc()
             return {"error": 0.0}
 
     async def _cascade_evaluate(
@@ -308,7 +346,6 @@ async def _cascade_evaluate(
 
             # Run first stage with timeout
             try:
-
                 async def run_stage1():
                     loop = asyncio.get_event_loop()
                     return await loop.run_in_executor(None, module.evaluate_stage1, program_path)
@@ -348,7 +385,6 @@ async def run_stage1():
 
             # Run second stage with timeout
             try:
-
                 async def run_stage2():
                     loop = asyncio.get_event_loop()
                     return await loop.run_in_executor(None, module.evaluate_stage2, program_path)
@@ -410,7 +446,6 @@ async def run_stage2():
 
             # Run third stage with timeout
             try:
-
                 async def run_stage3():
                     loop = asyncio.get_event_loop()
                     return await loop.run_in_executor(None, module.evaluate_stage3, program_path)
diff --git a/openevolve/utils/async_utils.py b/openevolve/utils/async_utils.py
@@ -33,24 +33,28 @@ async def wrapper(*args: Any, **kwargs: Any) -> Any:
 
 
 async def run_with_timeout(
-    coro: Callable, timeout: float, *args: Any, timeout_error_value: Any = None, **kwargs: Any
+    coro: Callable, 
+    timeout: float, 
+    *args: Any, 
+    timeout_error_value: Any = None,
+    **kwargs: Any
 ) -> Any:
     """
     Run a coroutine with a timeout, returning a default value on timeout
-
+    
     Args:
         coro: Coroutine function to run
         timeout: Timeout in seconds
         *args: Arguments to pass to the coroutine
         timeout_error_value: Value to return on timeout (default: {"error": 0.0, "timeout": True})
         **kwargs: Keyword arguments to pass to the coroutine
-
+        
     Returns:
         Result of the coroutine or timeout_error_value on timeout
     """
     if timeout_error_value is None:
         timeout_error_value = {"error": 0.0, "timeout": True}
-
+        
     try:
         return await asyncio.wait_for(coro(*args, **kwargs), timeout=timeout)
     except asyncio.TimeoutError:
@@ -59,24 +63,28 @@ async def run_with_timeout(
 
 
 async def run_sync_with_timeout(
-    func: Callable, timeout: float, *args: Any, timeout_error_value: Any = None, **kwargs: Any
+    func: Callable, 
+    timeout: float, 
+    *args: Any, 
+    timeout_error_value: Any = None,
+    **kwargs: Any
 ) -> Any:
     """
     Run a synchronous function in an executor with a timeout
-
+    
     Args:
         func: Synchronous function to run
         timeout: Timeout in seconds
         *args: Arguments to pass to the function
         timeout_error_value: Value to return on timeout (default: {"error": 0.0, "timeout": True})
         **kwargs: Keyword arguments to pass to the function
-
+        
     Returns:
         Result of the function or timeout_error_value on timeout
     """
     if timeout_error_value is None:
         timeout_error_value = {"error": 0.0, "timeout": True}
-
+        
     try:
         loop = asyncio.get_event_loop()
         task = loop.run_in_executor(None, functools.partial(func, *args, **kwargs))
@@ -170,9 +178,21 @@ class TaskPool:
     """
 
     def __init__(self, max_concurrency: int = 10):
-        self.semaphore = asyncio.Semaphore(max_concurrency)
+        self.max_concurrency = max_concurrency
+        self._semaphore = None  # Lazy initialization
         self.tasks: List[asyncio.Task] = []
 
+    @property
+    def semaphore(self):
+        """Lazy-initialized semaphore that creates itself when first accessed"""
+        if self._semaphore is None:
+            try:
+                self._semaphore = asyncio.Semaphore(self.max_concurrency)
+            except RuntimeError:
+                # No event loop running, will be created later when needed
+                pass
+        return self._semaphore
+
     async def run(self, coro: Callable, *args: Any, **kwargs: Any) -> Any:
         """
         Run a coroutine in the pool
@@ -185,7 +205,11 @@ async def run(self, coro: Callable, *args: Any, **kwargs: Any) -> Any:
         Returns:
             Result of the coroutine
         """
-        async with self.semaphore:
+        # Ensure semaphore is created in the current event loop
+        if self._semaphore is None:
+            self._semaphore = asyncio.Semaphore(self.max_concurrency)
+            
+        async with self._semaphore:
             return await coro(*args, **kwargs)
 
     def create_task(self, coro: Callable, *args: Any, **kwargs: Any) -> asyncio.Task:
diff --git a/tests/test_evaluator_timeout.py b/tests/test_evaluator_timeout.py