minor update

seancoding-day · seancoding-day · commit b686a5dc84dc · 2025-11-17T16:45:18.000+08:00
diff --git a/webqa_agent/testers/case_gen/graph.py b/webqa_agent/testers/case_gen/graph.py
@@ -495,6 +495,12 @@ async def execute_single_case(state: MainGraphState) -> dict:
     ui_tester_instance = state["ui_tester_instance"]
     case_name = case.get("name")
 
+    # Set test context for context-aware verification
+    ui_tester_instance.current_test_objective = case.get("objective", case.get("name"))
+    ui_tester_instance.current_success_criteria = case.get("success_criteria", [])
+    # Clear old execution history to avoid cross-case pollution
+    ui_tester_instance.execution_history.clear()
+
     language = state.get('language', 'zh-CN')
     logging.debug(f"Execute case language: {language}")
     default_text = '智能功能测试' if language == 'zh-CN' else 'AI Function Test'
diff --git a/webqa_agent/testers/case_gen/tools/element_action_tool.py b/webqa_agent/testers/case_gen/tools/element_action_tool.py
@@ -446,9 +446,19 @@ async def _arun(self, assertion: str) -> str:
             # Build execution context from instance state (context-aware verification)
             execution_context = None
             if self.ui_tester_instance.last_action_context:
+                # Build complete execution context with all 5 expected fields
                 execution_context = {
                     "last_action": self.ui_tester_instance.last_action_context,
                     "test_objective": self.ui_tester_instance.current_test_objective,
+                    "success_criteria": self.ui_tester_instance.current_success_criteria,
+                    "completed_steps": [
+                        h for h in self.ui_tester_instance.execution_history
+                        if h.get("success") is True  # Use strict comparison to avoid None misclassification
+                    ],
+                    "failed_steps": [
+                        h for h in self.ui_tester_instance.execution_history
+                        if h.get("success") is False  # Use strict comparison to avoid None misclassification
+                    ]
                 }
                 logging.debug("Passing execution context to verify()")
 
diff --git a/webqa_agent/testers/function_tester.py b/webqa_agent/testers/function_tester.py
@@ -47,6 +47,7 @@ def __init__(self, llm_config: Dict[str, Any], browser_session: BrowserSession =
         self.last_action_context: Optional[Dict[str, Any]] = None
         self.execution_history: List[Dict[str, Any]] = []
         self.current_test_objective: Optional[str] = None
+        self.current_success_criteria: List[str] = []  # Store test success criteria
 
     async def initialize(self, browser_session: BrowserSession = None):
         if browser_session:
@@ -207,6 +208,15 @@ async def action(self, test_step: str, file_path: str = None) -> Tuple[Dict[str,
             # Automatically store step data
             self.add_step_data(execution_steps_dict, step_type="action")
 
+            # Update execution history for context-aware verification
+            self.execution_history.append({
+                "description": test_step,
+                "success": execution_result.get("success"),
+                "timestamp": end_time,
+                "dom_diff": diff_elems,
+                "actions": execution_steps
+            })
+
             return execution_steps_dict, execution_result
 
         except Exception as e:
@@ -233,6 +243,14 @@ async def action(self, test_step: str, file_path: str = None) -> Tuple[Dict[str,
                 "end_time": end_time,
             }
 
+            self.execution_history.append({
+                "description": test_step,
+                "success": False,
+                "timestamp": end_time,
+                "dom_diff": {},
+                "actions": []
+            })
+
             # Automatically store error step data
             self.add_step_data(error_execution_steps, step_type="action")