Skip to content

Commit 0fac26a

Browse files
committed
Update evaluator.py
1 parent cb0a984 commit 0fac26a

File tree

1 file changed

+98
-21
lines changed

1 file changed

+98
-21
lines changed

openevolve/evaluator.py

Lines changed: 98 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -314,10 +314,6 @@ async def _cascade_evaluate(
314314
315315
Returns:
316316
Dictionary of metrics or EvaluationResult with metrics and artifacts
317-
318-
Raises:
319-
asyncio.TimeoutError: If any stage exceeds timeout
320-
Exception: If any evaluation stage raises an exception
321317
"""
322318
# Import the evaluation module to get cascade functions if they exist
323319
try:
@@ -339,12 +335,34 @@ async def _cascade_evaluate(
339335
return await self._direct_evaluate(program_path)
340336

341337
# Run first stage with timeout
342-
async def run_stage1():
343-
loop = asyncio.get_event_loop()
344-
return await loop.run_in_executor(None, module.evaluate_stage1, program_path)
338+
try:
345339

346-
stage1_result = await asyncio.wait_for(run_stage1(), timeout=self.config.timeout)
347-
stage1_eval_result = self._process_evaluation_result(stage1_result)
340+
async def run_stage1():
341+
loop = asyncio.get_event_loop()
342+
return await loop.run_in_executor(None, module.evaluate_stage1, program_path)
343+
344+
stage1_result = await asyncio.wait_for(run_stage1(), timeout=self.config.timeout)
345+
stage1_eval_result = self._process_evaluation_result(stage1_result)
346+
except asyncio.TimeoutError:
347+
logger.warning(f"Stage 1 evaluation timed out after {self.config.timeout}s")
348+
return EvaluationResult(
349+
metrics={"stage1_passed": 0.0, "error": 0.0, "timeout": True},
350+
artifacts={
351+
"failure_stage": "stage1",
352+
"timeout": True,
353+
},
354+
)
355+
except Exception as e:
356+
logger.error(f"Error in stage 1 evaluation: {str(e)}")
357+
# Capture stage 1 failure as artifacts
358+
return EvaluationResult(
359+
metrics={"stage1_passed": 0.0, "error": 0.0},
360+
artifacts={
361+
"stderr": str(e),
362+
"traceback": traceback.format_exc(),
363+
"failure_stage": "stage1",
364+
},
365+
)
348366

349367
# Check threshold
350368
if not self._passes_threshold(
@@ -357,12 +375,38 @@ async def run_stage1():
357375
return stage1_eval_result
358376

359377
# Run second stage with timeout
360-
async def run_stage2():
361-
loop = asyncio.get_event_loop()
362-
return await loop.run_in_executor(None, module.evaluate_stage2, program_path)
378+
try:
363379

364-
stage2_result = await asyncio.wait_for(run_stage2(), timeout=self.config.timeout)
365-
stage2_eval_result = self._process_evaluation_result(stage2_result)
380+
async def run_stage2():
381+
loop = asyncio.get_event_loop()
382+
return await loop.run_in_executor(None, module.evaluate_stage2, program_path)
383+
384+
stage2_result = await asyncio.wait_for(run_stage2(), timeout=self.config.timeout)
385+
stage2_eval_result = self._process_evaluation_result(stage2_result)
386+
except asyncio.TimeoutError:
387+
logger.warning(f"Stage 2 evaluation timed out after {self.config.timeout}s")
388+
# Capture stage 2 failure, but keep stage 1 results
389+
stage1_eval_result.artifacts.update(
390+
{
391+
"stage2_timeout": True,
392+
"failure_stage": "stage2",
393+
}
394+
)
395+
stage1_eval_result.metrics["stage2_passed"] = 0.0
396+
stage1_eval_result.metrics["timeout"] = True
397+
return stage1_eval_result
398+
except Exception as e:
399+
logger.error(f"Error in stage 2 evaluation: {str(e)}")
400+
# Capture stage 2 failure, but keep stage 1 results
401+
stage1_eval_result.artifacts.update(
402+
{
403+
"stage2_stderr": str(e),
404+
"stage2_traceback": traceback.format_exc(),
405+
"failure_stage": "stage2",
406+
}
407+
)
408+
stage1_eval_result.metrics["stage2_passed"] = 0.0
409+
return stage1_eval_result
366410

367411
# Merge results from stage 1 and 2
368412
merged_metrics = {}
@@ -393,12 +437,38 @@ async def run_stage2():
393437
return merged_result
394438

395439
# Run third stage with timeout
396-
async def run_stage3():
397-
loop = asyncio.get_event_loop()
398-
return await loop.run_in_executor(None, module.evaluate_stage3, program_path)
440+
try:
399441

400-
stage3_result = await asyncio.wait_for(run_stage3(), timeout=self.config.timeout)
401-
stage3_eval_result = self._process_evaluation_result(stage3_result)
442+
async def run_stage3():
443+
loop = asyncio.get_event_loop()
444+
return await loop.run_in_executor(None, module.evaluate_stage3, program_path)
445+
446+
stage3_result = await asyncio.wait_for(run_stage3(), timeout=self.config.timeout)
447+
stage3_eval_result = self._process_evaluation_result(stage3_result)
448+
except asyncio.TimeoutError:
449+
logger.warning(f"Stage 3 evaluation timed out after {self.config.timeout}s")
450+
# Capture stage 3 failure, but keep previous results
451+
merged_result.artifacts.update(
452+
{
453+
"stage3_timeout": True,
454+
"failure_stage": "stage3",
455+
}
456+
)
457+
merged_result.metrics["stage3_passed"] = 0.0
458+
merged_result.metrics["timeout"] = True
459+
return merged_result
460+
except Exception as e:
461+
logger.error(f"Error in stage 3 evaluation: {str(e)}")
462+
# Capture stage 3 failure, but keep previous results
463+
merged_result.artifacts.update(
464+
{
465+
"stage3_stderr": str(e),
466+
"stage3_traceback": traceback.format_exc(),
467+
"failure_stage": "stage3",
468+
}
469+
)
470+
merged_result.metrics["stage3_passed"] = 0.0
471+
return merged_result
402472

403473
# Merge stage 3 results
404474
for name, value in stage3_eval_result.metrics.items():
@@ -411,8 +481,15 @@ async def run_stage3():
411481

412482
except Exception as e:
413483
logger.error(f"Error in cascade evaluation: {str(e)}")
414-
# Re-raise the exception to allow retry handling at higher level
415-
raise
484+
# Return proper cascade failure result instead of re-raising
485+
return EvaluationResult(
486+
metrics={"stage1_passed": 0.0, "error": 0.0},
487+
artifacts={
488+
"stderr": str(e),
489+
"traceback": traceback.format_exc(),
490+
"failure_stage": "cascade_setup",
491+
},
492+
)
416493

417494
async def _llm_evaluate(self, program_code: str, program_id: str = "") -> Dict[str, float]:
418495
"""

0 commit comments

Comments
 (0)