Skip to content

Commit b483af9

Browse files
committed
fxies
1 parent a097582 commit b483af9

File tree

3 files changed

+399
-304
lines changed

3 files changed

+399
-304
lines changed

openevolve/evaluator.py

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -117,12 +117,21 @@ async def evaluate_program(
117117
# Retry logic for evaluation
118118
last_exception = None
119119
for attempt in range(self.config.max_retries + 1):
120-
# Create a temporary file for the program
121-
with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as temp_file:
122-
temp_file.write(program_code.encode("utf-8"))
123-
temp_file_path = temp_file.name
124-
120+
# Create a temporary file for the program - FIXED: proper file handling
121+
temp_file_path = None
125122
try:
123+
# Create temp file and write content with proper flushing
124+
temp_fd, temp_file_path = tempfile.mkstemp(suffix=".py", text=True)
125+
with os.fdopen(temp_fd, 'w') as temp_file:
126+
temp_file.write(program_code)
127+
temp_file.flush() # Ensure content is written to disk
128+
os.fsync(temp_file.fileno()) # Force sync to disk
129+
130+
# Verify file was written correctly (debug)
131+
with open(temp_file_path, 'r') as verify_file:
132+
written_content = verify_file.read()
133+
logger.debug(f"Temp file content (first 100 chars): {written_content[:100]}")
134+
126135
# Run evaluation
127136
if self.config.cascade_evaluation:
128137
# Run cascade evaluation
@@ -186,28 +195,49 @@ async def evaluate_program(
186195
)
187196
traceback.print_exc()
188197

189-
# Capture failure artifacts if enabled
198+
# Capture failure artifacts if enabled - FIXED: better artifact capture
190199
if artifacts_enabled and program_id:
191-
self._pending_artifacts[program_id] = {
200+
failure_artifacts = {
192201
"stderr": str(e),
193202
"traceback": traceback.format_exc(),
194203
"failure_stage": "evaluation",
204+
"attempt": attempt + 1,
205+
"timeout_config": self.config.timeout,
195206
}
207+
208+
# Check if this was a timeout error
209+
if isinstance(e, asyncio.TimeoutError) or "timeout" in str(e).lower():
210+
failure_artifacts["timeout"] = True
211+
failure_artifacts["failure_stage"] = "timeout"
212+
213+
# Store or update artifacts
214+
if program_id in self._pending_artifacts:
215+
self._pending_artifacts[program_id].update(failure_artifacts)
216+
else:
217+
self._pending_artifacts[program_id] = failure_artifacts
196218

197219
# If this is not the last attempt, wait a bit before retrying
198220
if attempt < self.config.max_retries:
199221
await asyncio.sleep(1.0) # Wait 1 second before retry
200222

201223
finally:
202224
# Clean up temporary file
203-
if os.path.exists(temp_file_path):
204-
os.unlink(temp_file_path)
225+
if temp_file_path and os.path.exists(temp_file_path):
226+
try:
227+
os.unlink(temp_file_path)
228+
except OSError:
229+
pass # Ignore cleanup errors
205230

206-
# All retries failed
231+
# All retries failed - FIXED: better error return with timeout info
207232
logger.error(
208233
f"All evaluation attempts failed for program{program_id_str}. Last error: {str(last_exception)}"
209234
)
210-
return {"error": 0.0}
235+
236+
# Check if the last exception was a timeout
237+
if isinstance(last_exception, asyncio.TimeoutError):
238+
return {"error": 0.0, "timeout": True}
239+
else:
240+
return {"error": 0.0}
211241

212242
def _process_evaluation_result(self, result: Any) -> EvaluationResult:
213243
"""
@@ -252,27 +282,35 @@ async def _direct_evaluate(self, program_path: str) -> Dict[str, float]:
252282
Returns:
253283
Dictionary of metric name to score
254284
"""
285+
logger.debug(f"Starting direct evaluation with timeout={self.config.timeout}s")
286+
255287
try:
256288
# Create a coroutine that runs the evaluation function in an executor
257289
async def run_evaluation():
258290
loop = asyncio.get_event_loop()
259-
return await loop.run_in_executor(None, self.evaluate_function, program_path)
291+
logger.debug(f"Running evaluation function on {program_path}")
292+
result = await loop.run_in_executor(None, self.evaluate_function, program_path)
293+
logger.debug(f"Evaluation function returned: {result}")
294+
return result
260295

261296
# Run the evaluation with timeout
297+
logger.debug(f"Waiting for evaluation with {self.config.timeout}s timeout")
262298
result = await asyncio.wait_for(run_evaluation(), timeout=self.config.timeout)
263299

264300
# Validate result
265301
if not isinstance(result, dict):
266302
logger.warning(f"Evaluation returned non-dictionary result: {result}")
267303
return {"error": 0.0}
268304

305+
logger.debug(f"Evaluation completed successfully: {result}")
269306
return result
270307

271308
except asyncio.TimeoutError:
272309
logger.warning(f"Evaluation timed out after {self.config.timeout}s")
273310
return {"error": 0.0, "timeout": True}
274311
except Exception as e:
275312
logger.error(f"Error in direct evaluation: {str(e)}")
313+
traceback.print_exc()
276314
return {"error": 0.0}
277315

278316
async def _cascade_evaluate(
@@ -308,7 +346,6 @@ async def _cascade_evaluate(
308346

309347
# Run first stage with timeout
310348
try:
311-
312349
async def run_stage1():
313350
loop = asyncio.get_event_loop()
314351
return await loop.run_in_executor(None, module.evaluate_stage1, program_path)
@@ -348,7 +385,6 @@ async def run_stage1():
348385

349386
# Run second stage with timeout
350387
try:
351-
352388
async def run_stage2():
353389
loop = asyncio.get_event_loop()
354390
return await loop.run_in_executor(None, module.evaluate_stage2, program_path)
@@ -410,7 +446,6 @@ async def run_stage2():
410446

411447
# Run third stage with timeout
412448
try:
413-
414449
async def run_stage3():
415450
loop = asyncio.get_event_loop()
416451
return await loop.run_in_executor(None, module.evaluate_stage3, program_path)

openevolve/utils/async_utils.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,24 +33,28 @@ async def wrapper(*args: Any, **kwargs: Any) -> Any:
3333

3434

3535
async def run_with_timeout(
36-
coro: Callable, timeout: float, *args: Any, timeout_error_value: Any = None, **kwargs: Any
36+
coro: Callable,
37+
timeout: float,
38+
*args: Any,
39+
timeout_error_value: Any = None,
40+
**kwargs: Any
3741
) -> Any:
3842
"""
3943
Run a coroutine with a timeout, returning a default value on timeout
40-
44+
4145
Args:
4246
coro: Coroutine function to run
4347
timeout: Timeout in seconds
4448
*args: Arguments to pass to the coroutine
4549
timeout_error_value: Value to return on timeout (default: {"error": 0.0, "timeout": True})
4650
**kwargs: Keyword arguments to pass to the coroutine
47-
51+
4852
Returns:
4953
Result of the coroutine or timeout_error_value on timeout
5054
"""
5155
if timeout_error_value is None:
5256
timeout_error_value = {"error": 0.0, "timeout": True}
53-
57+
5458
try:
5559
return await asyncio.wait_for(coro(*args, **kwargs), timeout=timeout)
5660
except asyncio.TimeoutError:
@@ -59,24 +63,28 @@ async def run_with_timeout(
5963

6064

6165
async def run_sync_with_timeout(
62-
func: Callable, timeout: float, *args: Any, timeout_error_value: Any = None, **kwargs: Any
66+
func: Callable,
67+
timeout: float,
68+
*args: Any,
69+
timeout_error_value: Any = None,
70+
**kwargs: Any
6371
) -> Any:
6472
"""
6573
Run a synchronous function in an executor with a timeout
66-
74+
6775
Args:
6876
func: Synchronous function to run
6977
timeout: Timeout in seconds
7078
*args: Arguments to pass to the function
7179
timeout_error_value: Value to return on timeout (default: {"error": 0.0, "timeout": True})
7280
**kwargs: Keyword arguments to pass to the function
73-
81+
7482
Returns:
7583
Result of the function or timeout_error_value on timeout
7684
"""
7785
if timeout_error_value is None:
7886
timeout_error_value = {"error": 0.0, "timeout": True}
79-
87+
8088
try:
8189
loop = asyncio.get_event_loop()
8290
task = loop.run_in_executor(None, functools.partial(func, *args, **kwargs))
@@ -170,9 +178,21 @@ class TaskPool:
170178
"""
171179

172180
def __init__(self, max_concurrency: int = 10):
173-
self.semaphore = asyncio.Semaphore(max_concurrency)
181+
self.max_concurrency = max_concurrency
182+
self._semaphore = None # Lazy initialization
174183
self.tasks: List[asyncio.Task] = []
175184

185+
@property
186+
def semaphore(self):
187+
"""Lazy-initialized semaphore that creates itself when first accessed"""
188+
if self._semaphore is None:
189+
try:
190+
self._semaphore = asyncio.Semaphore(self.max_concurrency)
191+
except RuntimeError:
192+
# No event loop running, will be created later when needed
193+
pass
194+
return self._semaphore
195+
176196
async def run(self, coro: Callable, *args: Any, **kwargs: Any) -> Any:
177197
"""
178198
Run a coroutine in the pool
@@ -185,7 +205,11 @@ async def run(self, coro: Callable, *args: Any, **kwargs: Any) -> Any:
185205
Returns:
186206
Result of the coroutine
187207
"""
188-
async with self.semaphore:
208+
# Ensure semaphore is created in the current event loop
209+
if self._semaphore is None:
210+
self._semaphore = asyncio.Semaphore(self.max_concurrency)
211+
212+
async with self._semaphore:
189213
return await coro(*args, **kwargs)
190214

191215
def create_task(self, coro: Callable, *args: Any, **kwargs: Any) -> asyncio.Task:

0 commit comments

Comments
 (0)