MigoXLab
diff --git a/‎webqa_agent/testers/case_gen/agents/execute_agent.py‎
Lines changed: 47 additions & 6 deletions b/‎webqa_agent/testers/case_gen/agents/execute_agent.py‎
Lines changed: 47 additions & 6 deletions
diff --git a/‎webqa_agent/testers/case_gen/prompts/agent_prompts.py‎
Lines changed: 24 additions & 12 deletions b/‎webqa_agent/testers/case_gen/prompts/agent_prompts.py‎
Lines changed: 24 additions & 12 deletions
@@ -279,11 +279,12 @@ async def generate_dynamic_steps_with_llm(
                 reason = result.get("reason", "No reason provided")
                 steps = result.get("steps", [])
 
-                # Extract and validate analysis fields (QAG format)
+                # Extract and validate analysis fields (Enhanced QAG format)
                 analysis = result.get("analysis", {})
                 q1_can_complete_alone = analysis.get("q1_can_complete_alone", False) if isinstance(analysis, dict) else False
                 q2_different_aspects = analysis.get("q2_different_aspects", False) if isinstance(analysis, dict) else False
                 q3_remaining_redundant = analysis.get("q3_remaining_redundant", False) if isinstance(analysis, dict) else False
+                q4_abstraction_gap = analysis.get("q4_abstraction_gap", False) if isinstance(analysis, dict) else False
 
                 # Validate strategy value
                 if strategy not in ["insert", "replace"]:
@@ -303,6 +304,10 @@ async def generate_dynamic_steps_with_llm(
                     logging.debug(f"Invalid q3_remaining_redundant {q3_remaining_redundant}, defaulting to False")
                     q3_remaining_redundant = False
 
+                if not isinstance(q4_abstraction_gap, bool):
+                    logging.debug(f"Invalid q4_abstraction_gap {q4_abstraction_gap}, defaulting to False")
+                    q4_abstraction_gap = False
+                
                 # Validate and limit step count
                 valid_steps = []
                 if isinstance(steps, list):
@@ -315,7 +320,7 @@ async def generate_dynamic_steps_with_llm(
 
                 logging.debug(f"Strategy reason: {reason}")
                 if analysis:
-                    logging.debug(f"QAG Analysis: q1_can_complete_alone={q1_can_complete_alone}, q2_different_aspects={q2_different_aspects}, q3_remaining_redundant={q3_remaining_redundant}")
+                    logging.debug(f"Enhanced QAG Analysis: q1_can_complete_alone={q1_can_complete_alone}, q2_different_aspects={q2_different_aspects}, q3_remaining_redundant={q3_remaining_redundant}, q4_abstraction_gap={q4_abstraction_gap}")
 
                 # Return enhanced result with QAG analysis
                 result_data = {
@@ -324,12 +329,13 @@ async def generate_dynamic_steps_with_llm(
                     "steps": valid_steps
                 }
 
-                # Include QAG analysis if provided
+                # Include Enhanced QAG analysis if provided
                 if analysis:
                     result_data["analysis"] = {
                         "q1_can_complete_alone": q1_can_complete_alone,
                         "q2_different_aspects": q2_different_aspects,
-                        "q3_remaining_redundant": q3_remaining_redundant
+                        "q3_remaining_redundant": q3_remaining_redundant,
+                        "q4_abstraction_gap": q4_abstraction_gap
                     }
 
                 return result_data
@@ -572,6 +578,7 @@ def extract_path(u):
     failed_steps = []  # Track failed steps for summary generation
     case_modified = False  # Track if case was modified with dynamic steps
     dynamic_generation_count = 0  # Track how many times dynamic generation occurred
+    dom_diff_cache = []
 
     for i, step in enumerate(case.get("steps", [])):
         instruction_to_execute = step.get("action") or step.get("verify")
@@ -692,10 +699,16 @@ def extract_path(u):
                 logging.error(f"Step {i+1} failed due to max iterations.")
                 break
 
+            # Check for objective achievement signal
+            is_achieved, achievement_reason = _is_objective_achieved(tool_output)
+            if is_achieved:
+                logging.info(f"Test objective achieved at step {i+1}: {achievement_reason}")
+                final_summary = f"FINAL_SUMMARY: Test case completed successfully with early termination at step {i+1}. {achievement_reason}"
+                break
+
             logging.debug(f"Step {i+1} completed {'successfully' if (i+1) not in failed_steps else 'with issues'}.")
 
             # --- Dynamic Step Generation ---
-            # Check if dynamic step generation is enabled and current step succeeded
             if step_type == "Action":
                 # Get dynamic step generation config from state
                 dynamic_config = state.get("dynamic_step_generation", {
@@ -712,7 +725,7 @@ def extract_path(u):
                     # Extract DOM diff from tool output
                     dom_diff = extract_dom_diff_from_output(result['intermediate_steps'][0][1])
 
-                    if dom_diff and len(dom_diff) >= min_elements_threshold:
+                    if dom_diff and len(dom_diff) >= min_elements_threshold and dom_diff not in dom_diff_cache:
                         logging.info(f"Detected {len(dom_diff)} new elements, starting dynamic test step generation")
 
                         try:
@@ -817,6 +830,8 @@ def is_similar_step(step1: dict, step2: dict) -> bool:
                             logging.debug(f"Detected {len(dom_diff)} new elements, but below threshold {min_elements_threshold}, skipping dynamic step generation")
                         else:
                             logging.debug("No DOM changes detected, skipping dynamic step generation")
+                    dom_diff_cache.append(dom_diff)
+
                 else:
                     logging.debug("Dynamic step generation not enabled")
             # --- Dynamic Step Generation End ---
@@ -968,6 +983,32 @@ def is_similar_step(step1: dict, step2: dict) -> bool:
     return result
 
 
+def _is_objective_achieved(tool_output: str) -> tuple[bool, str]:
+    """Check if the agent has signaled that the test objective is achieved.
+    
+    Args:
+        tool_output: The output from the step execution
+    
+    Returns:
+        tuple: (is_achieved: bool, reason: str)
+    """
+    if not tool_output or "OBJECTIVE_ACHIEVED:" not in tool_output:
+        return False, ""
+    
+    try:
+        # Extract the reason after the signal
+        parts = tool_output.split("OBJECTIVE_ACHIEVED:")
+        if len(parts) > 1:
+            reason = parts[1].split("\n")[0].strip()
+            # Only return True if there's actual content after the signal
+            if reason:
+                return True, reason
+    except Exception as e:
+        logging.debug(f"Error parsing objective achievement signal: {e}")
+    
+    return False, ""
+
+
 def _is_critical_failure_step(tool_output: str, step_instruction: str = "") -> bool:
     """Check if a single step output indicates a critical failure that should stop execution.
     
 
@@ -7,7 +7,6 @@ def get_execute_system_prompt(case: dict) -> str:
     # Core fields (original)
     objective = case.get("objective", "Not specified")
     success_criteria = case.get("success_criteria", ["Not specified"])
-    steps_list = case.get("steps", [])
 
     # Enhanced fields (new)
     priority = case.get("priority", "Medium")
@@ -16,14 +15,6 @@ def get_execute_system_prompt(case: dict) -> str:
     domain_specific_rules = case.get("domain_specific_rules", "")
     test_data_requirements = case.get("test_data_requirements", "")
 
-    # Format step information
-    formatted_steps = []
-    for i, step in enumerate(steps_list):
-        if "action" in step:
-            formatted_steps.append(f"{i+1}. Action: {step['action']}")
-        elif "verify" in step:
-            formatted_steps.append(f"{i+1}. Assert: {step['verify']}")
-
     system_prompt = f"""You are an intelligent UI test execution agent specialized in web application testing. Your role is to execute individual test cases by performing UI interactions and validations in a systematic, reliable manner following established QA best practices.
 
 ## Core Mission
@@ -139,7 +130,28 @@ def get_execute_system_prompt(case: dict) -> str:
    - UI state errors: Navigate back to expected state
 4. **Resume test plan** only after successful error resolution
 
-### 3. Test Plan Adherence (THIRD PRIORITY)
+### 3. Objective Achievement Detection (THIRD PRIORITY)
+**Critical Rule**: After completing each step, evaluate whether the test objective has been fully achieved. 
+If the objective is complete and remaining steps would be redundant, signal early completion.
+
+**Objective Achievement Criteria**:
+- All success criteria have been validated through executed actions
+- Core functionality has been thoroughly tested and verified
+- Remaining steps would provide no additional value or coverage
+- The test objective is comprehensively fulfilled based on actual results
+
+**Early Completion Signal Format**:
+When you determine the test objective is achieved, output this exact signal:
+`OBJECTIVE_ACHIEVED: Test objective "[objective]" completed at step [X]. Remaining [Y] steps are redundant. Reason: [detailed explanation of why objective is complete and remaining steps unnecessary].`
+
+**Decision Guidelines**:
+- **Be Conservative**: Only signal when absolutely certain objective is achieved
+- **Evaluate Coverage**: Consider if remaining steps test unique aspects not yet covered
+- **Base on Results**: Evaluate based on actual execution results, not assumptions
+- **Dynamic Context**: This is especially relevant after dynamic steps that may have covered the original test intent
+- **Unique Value Assessment**: Focus on whether remaining steps add genuine testing value
+
+### 4. Test Plan Adherence (FOURTH PRIORITY)
 **Execution Strategy**:
 - Execute test steps in the defined sequence
 - Use appropriate tools based on step type:
@@ -148,8 +160,8 @@ def get_execute_system_prompt(case: dict) -> str:
 - Maintain clear action descriptions for test documentation
 - Track progress through the test plan systematically
 
-### 4. Test Objective Achievement (FOURTH PRIORITY)
-**Goal-Oriented Execution**:
+### 5. Adaptive Goal Execution (FIFTH PRIORITY)
+**Goal-Oriented Adaptation**:
 - Keep the test objective as the ultimate success criterion
 - If the standard test steps cannot achieve the objective due to UI changes, adapt the approach while maintaining test integrity
 - Document any deviations from the planned approach with clear justification