@@ -314,10 +314,6 @@ async def _cascade_evaluate(
314314
315315 Returns:
316316 Dictionary of metrics or EvaluationResult with metrics and artifacts
317-
318- Raises:
319- asyncio.TimeoutError: If any stage exceeds timeout
320- Exception: If any evaluation stage raises an exception
321317 """
322318 # Import the evaluation module to get cascade functions if they exist
323319 try :
@@ -339,12 +335,34 @@ async def _cascade_evaluate(
339335 return await self ._direct_evaluate (program_path )
340336
341337 # Run first stage with timeout
342- async def run_stage1 ():
343- loop = asyncio .get_event_loop ()
344- return await loop .run_in_executor (None , module .evaluate_stage1 , program_path )
338+ try :
345339
346- stage1_result = await asyncio .wait_for (run_stage1 (), timeout = self .config .timeout )
347- stage1_eval_result = self ._process_evaluation_result (stage1_result )
340+ async def run_stage1 ():
341+ loop = asyncio .get_event_loop ()
342+ return await loop .run_in_executor (None , module .evaluate_stage1 , program_path )
343+
344+ stage1_result = await asyncio .wait_for (run_stage1 (), timeout = self .config .timeout )
345+ stage1_eval_result = self ._process_evaluation_result (stage1_result )
346+ except asyncio .TimeoutError :
347+ logger .warning (f"Stage 1 evaluation timed out after { self .config .timeout } s" )
348+ return EvaluationResult (
349+ metrics = {"stage1_passed" : 0.0 , "error" : 0.0 , "timeout" : True },
350+ artifacts = {
351+ "failure_stage" : "stage1" ,
352+ "timeout" : True ,
353+ },
354+ )
355+ except Exception as e :
356+ logger .error (f"Error in stage 1 evaluation: { str (e )} " )
357+ # Capture stage 1 failure as artifacts
358+ return EvaluationResult (
359+ metrics = {"stage1_passed" : 0.0 , "error" : 0.0 },
360+ artifacts = {
361+ "stderr" : str (e ),
362+ "traceback" : traceback .format_exc (),
363+ "failure_stage" : "stage1" ,
364+ },
365+ )
348366
349367 # Check threshold
350368 if not self ._passes_threshold (
@@ -357,12 +375,38 @@ async def run_stage1():
357375 return stage1_eval_result
358376
359377 # Run second stage with timeout
360- async def run_stage2 ():
361- loop = asyncio .get_event_loop ()
362- return await loop .run_in_executor (None , module .evaluate_stage2 , program_path )
378+ try :
363379
364- stage2_result = await asyncio .wait_for (run_stage2 (), timeout = self .config .timeout )
365- stage2_eval_result = self ._process_evaluation_result (stage2_result )
380+ async def run_stage2 ():
381+ loop = asyncio .get_event_loop ()
382+ return await loop .run_in_executor (None , module .evaluate_stage2 , program_path )
383+
384+ stage2_result = await asyncio .wait_for (run_stage2 (), timeout = self .config .timeout )
385+ stage2_eval_result = self ._process_evaluation_result (stage2_result )
386+ except asyncio .TimeoutError :
387+ logger .warning (f"Stage 2 evaluation timed out after { self .config .timeout } s" )
388+ # Capture stage 2 failure, but keep stage 1 results
389+ stage1_eval_result .artifacts .update (
390+ {
391+ "stage2_timeout" : True ,
392+ "failure_stage" : "stage2" ,
393+ }
394+ )
395+ stage1_eval_result .metrics ["stage2_passed" ] = 0.0
396+ stage1_eval_result .metrics ["timeout" ] = True
397+ return stage1_eval_result
398+ except Exception as e :
399+ logger .error (f"Error in stage 2 evaluation: { str (e )} " )
400+ # Capture stage 2 failure, but keep stage 1 results
401+ stage1_eval_result .artifacts .update (
402+ {
403+ "stage2_stderr" : str (e ),
404+ "stage2_traceback" : traceback .format_exc (),
405+ "failure_stage" : "stage2" ,
406+ }
407+ )
408+ stage1_eval_result .metrics ["stage2_passed" ] = 0.0
409+ return stage1_eval_result
366410
367411 # Merge results from stage 1 and 2
368412 merged_metrics = {}
@@ -393,12 +437,38 @@ async def run_stage2():
393437 return merged_result
394438
395439 # Run third stage with timeout
396- async def run_stage3 ():
397- loop = asyncio .get_event_loop ()
398- return await loop .run_in_executor (None , module .evaluate_stage3 , program_path )
440+ try :
399441
400- stage3_result = await asyncio .wait_for (run_stage3 (), timeout = self .config .timeout )
401- stage3_eval_result = self ._process_evaluation_result (stage3_result )
442+ async def run_stage3 ():
443+ loop = asyncio .get_event_loop ()
444+ return await loop .run_in_executor (None , module .evaluate_stage3 , program_path )
445+
446+ stage3_result = await asyncio .wait_for (run_stage3 (), timeout = self .config .timeout )
447+ stage3_eval_result = self ._process_evaluation_result (stage3_result )
448+ except asyncio .TimeoutError :
449+ logger .warning (f"Stage 3 evaluation timed out after { self .config .timeout } s" )
450+ # Capture stage 3 failure, but keep previous results
451+ merged_result .artifacts .update (
452+ {
453+ "stage3_timeout" : True ,
454+ "failure_stage" : "stage3" ,
455+ }
456+ )
457+ merged_result .metrics ["stage3_passed" ] = 0.0
458+ merged_result .metrics ["timeout" ] = True
459+ return merged_result
460+ except Exception as e :
461+ logger .error (f"Error in stage 3 evaluation: { str (e )} " )
462+ # Capture stage 3 failure, but keep previous results
463+ merged_result .artifacts .update (
464+ {
465+ "stage3_stderr" : str (e ),
466+ "stage3_traceback" : traceback .format_exc (),
467+ "failure_stage" : "stage3" ,
468+ }
469+ )
470+ merged_result .metrics ["stage3_passed" ] = 0.0
471+ return merged_result
402472
403473 # Merge stage 3 results
404474 for name , value in stage3_eval_result .metrics .items ():
@@ -411,8 +481,15 @@ async def run_stage3():
411481
412482 except Exception as e :
413483 logger .error (f"Error in cascade evaluation: { str (e )} " )
414- # Re-raise the exception to allow retry handling at higher level
415- raise
484+ # Return proper cascade failure result instead of re-raising
485+ return EvaluationResult (
486+ metrics = {"stage1_passed" : 0.0 , "error" : 0.0 },
487+ artifacts = {
488+ "stderr" : str (e ),
489+ "traceback" : traceback .format_exc (),
490+ "failure_stage" : "cascade_setup" ,
491+ },
492+ )
416493
417494 async def _llm_evaluate (self , program_code : str , program_id : str = "" ) -> Dict [str , float ]:
418495 """
0 commit comments