@@ -347,6 +347,13 @@ def extract_path(u):
347347 failed_steps .append (i + 1 )
348348 logging .warning (f"Step { i + 1 } detected as failed based on output" )
349349
350+ # Check for critical failures that should immediately stop execution
351+ if _is_critical_failure_step (tool_output , instruction_to_execute ):
352+ failed_steps .append (i + 1 )
353+ final_summary = f"FINAL_SUMMARY: Critical failure at step { i + 1 } : '{ instruction_to_execute } '. Error details: { tool_output [:200 ]} ..."
354+ logging .error (f"Critical failure detected at step { i + 1 } , aborting remaining steps to save time" )
355+ break
356+
350357 # Check for max iterations, which indicates a failure to complete the step.
351358 if "Agent stopped due to max iterations." in tool_output :
352359 failed_steps .append (i + 1 )
@@ -479,10 +486,17 @@ def extract_path(u):
479486
480487 logging .debug (f"Test case '{ case_name } ' final status: { status } (success indicators: { has_success } , failure indicators: { has_failure } )" )
481488
489+ # Classify failure type if the test case failed
490+ failure_type = None
491+ if status == "failed" :
492+ failure_type = _classify_failure_type (final_summary , failed_steps )
493+ logging .info (f"Test case '{ case_name } ' failed with type: { failure_type } " )
494+
482495 case_result = {
483496 "case_name" : case_name ,
484497 "final_summary" : final_summary ,
485498 "status" : status ,
499+ "failure_type" : failure_type ,
486500 }
487501
488502 logging .debug (f"=== Agent Worker Completed for { case_name } . ===" )
@@ -491,6 +505,112 @@ def extract_path(u):
491505 return {"case_result" : case_result }
492506
493507
508+ def _is_critical_failure_step (tool_output : str , step_instruction : str = "" ) -> bool :
509+ """Check if a single step output indicates a critical failure that should stop execution.
510+
511+ Args:
512+ tool_output: The output from the step execution
513+ step_instruction: The instruction that was executed (for context)
514+
515+ Returns:
516+ bool: True if this is a critical failure that should stop execution
517+ """
518+ if not tool_output :
519+ return False
520+
521+ output_lower = tool_output .lower ()
522+
523+ # Critical failure patterns for immediate exit
524+ critical_step_patterns = [
525+ "element not found" ,
526+ "cannot find" ,
527+ "page crashed" ,
528+ "permission denied" ,
529+ "access denied" ,
530+ "network timeout" ,
531+ "browser error" ,
532+ "navigation failed" ,
533+ "session expired" ,
534+ "server error" ,
535+ "connection timeout" ,
536+ "unable to load" ,
537+ "page not accessible" ,
538+ "critical error"
539+ ]
540+
541+ # Check for critical patterns
542+ for pattern in critical_step_patterns :
543+ if pattern in output_lower :
544+ logging .debug (f"Critical failure detected in step: pattern '{ pattern } ' found" )
545+ return True
546+
547+ return False
548+
549+
550+ def _classify_failure_type (final_summary : str , failed_steps : list = None ) -> str :
551+ """Classify failure type as 'critical' or 'recoverable'.
552+
553+ Args:
554+ final_summary: The final summary text containing failure information
555+ failed_steps: List of failed step numbers
556+
557+ Returns:
558+ str: 'critical' for unrecoverable failures, 'recoverable' for failures that might be fixed via replan
559+ """
560+ if not final_summary :
561+ return "recoverable"
562+
563+ summary_lower = final_summary .lower ()
564+
565+ # Check for early critical failure exit (from immediate step detection)
566+ if "critical failure at step" in summary_lower :
567+ logging .debug ("Early critical failure exit detected - classified as critical" )
568+ return "critical"
569+
570+ # Critical failure patterns - these indicate unrecoverable issues
571+ critical_patterns = [
572+ "element not found" ,
573+ "cannot find" ,
574+ "page crashed" ,
575+ "permission denied" ,
576+ "access denied" ,
577+ "network timeout" ,
578+ "max iterations" ,
579+ "exception:" ,
580+ "cannot proceed" ,
581+ "preamble action" ,
582+ "raised exception" ,
583+ "agent stopped due to max iterations" ,
584+ "element not available" ,
585+ "page not accessible" ,
586+ "browser error" ,
587+ "navigation failed" ,
588+ "session expired" ,
589+ "server error" ,
590+ "connection timeout" ,
591+ "unable to load" ,
592+ "critical error"
593+ ]
594+
595+ # Check if any critical pattern is present
596+ for pattern in critical_patterns :
597+ if pattern in summary_lower :
598+ logging .debug (f"Critical failure detected: pattern '{ pattern } ' found in summary" )
599+ return "critical"
600+
601+ # Additional heuristics for critical failures
602+ # If too many steps failed, it might indicate a fundamental issue
603+ if failed_steps and len (failed_steps ) > 0 :
604+ total_failed = len (failed_steps )
605+ if total_failed >= 3 : # If 3 or more steps failed, likely critical
606+ logging .debug (f"Critical failure detected: { total_failed } steps failed" )
607+ return "critical"
608+
609+ # Default to recoverable for validation failures, partial failures, etc.
610+ logging .debug ("Failure classified as recoverable" )
611+ return "recoverable"
612+
613+
494614def _is_navigation_instruction (instruction : str ) -> bool :
495615 """Determine if the instruction is a navigation instruction.
496616
0 commit comments