Skip to content

Commit b99e6ae

Browse files
feat: add UX verification tool and case recording system (#84)
- Add UIUXViewportTool for visual quality and content accuracy checks - Implement CentralCaseRecorder for independent test case tracking - Integrate UX verification into test execution workflow - Update test runners and agent prompts to support UX testing Co-authored-by: Sean Liu <[email protected]>
1 parent c704a56 commit b99e6ae

File tree

13 files changed

+1088
-336
lines changed

13 files changed

+1088
-336
lines changed

webqa_agent/executor/test_runners.py

Lines changed: 98 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -86,128 +86,127 @@ async def run_test(
8686

8787
# Mapping from case name to status obtained from LangGraph aggregate_results
8888
graph_case_status_map: Dict[str, str] = {}
89-
90-
# 执行LangGraph工作流
91-
graph_completed = False
92-
async for event in graph_app.astream(initial_state, config=graph_config):
93-
# Each event is a dict where keys are node names and values are their outputs
94-
for node_name, node_output in event.items():
95-
if node_name == 'aggregate_results':
96-
# Capture final report to retrieve authoritative case statuses
97-
final_report = node_output.get('final_report', {})
98-
for idx, case_res in enumerate(final_report.get('completed_summary', [])):
99-
case_name = case_res.get('case_name') or case_res.get('name') or f'Case_{idx + 1}'
100-
graph_case_status_map[case_name] = case_res.get('status', 'failed').lower()
101-
102-
if node_name == '__end__':
103-
logging.debug('Graph execution completed successfully')
104-
graph_completed = True
105-
break
106-
else:
107-
logging.debug(f"Node '{node_name}' completed")
108-
109-
# Break out of the outer loop if we found __end__
110-
if graph_completed:
111-
break
112-
113-
# === 使用UITester的新数据存储机制 ===
89+
recorded_cases_from_graph: List[dict] = []
90+
91+
# 执行LangGraph工作流,直接使用 ainvoke 获取最终状态
92+
final_state = await graph_app.ainvoke(initial_state, config=graph_config)
93+
94+
# 从最终状态获取 recorded_cases
95+
recorded_cases_from_graph = final_state.get('recorded_cases', [])
96+
logging.info(f"Retrieved {len(recorded_cases_from_graph)} recorded cases from final graph state")
97+
98+
# 从最终状态获取 completed_cases 用于状态映射
99+
completed_cases = final_state.get('completed_cases', [])
100+
for idx, case_res in enumerate(completed_cases):
101+
case_name = case_res.get('case_name') or case_res.get('name') or f'Case_{idx + 1}'
102+
graph_case_status_map[case_name] = case_res.get('status', 'failed').lower()
103+
104+
# === 使用recorded_cases中的数据构建测试结果 ===
114105
sub_tests = []
115-
runner_format_report = {}
116-
117-
if parallel_tester:
118-
# 生成符合runner标准格式的完整报告
119-
test_name = f'UI Agent Test - {target_url}'
120-
runner_format_report = parallel_tester.generate_runner_format_report(
121-
test_id=test_config.test_id, test_name=test_name
122-
)
123-
124-
sub_tests_data = runner_format_report.get('sub_tests', [])
125-
logging.debug(f'Generated runner format report with {len(sub_tests_data)} cases')
126-
127-
if not sub_tests_data:
128-
logging.warning('No sub_tests data found in runner format report')
129-
130-
# 将runner格式的sub_tests转换为TestResult.SubTestResult
131-
for i, case in enumerate(sub_tests_data):
132-
case_name = case.get('name', f"Unnamed test case - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
133-
case_steps = case.get('steps', [])
134-
106+
107+
if recorded_cases_from_graph:
108+
logging.debug(f'Processing {len(recorded_cases_from_graph)} cases from recorded_cases')
109+
110+
# 将recorded_cases转换为TestResult.SubTestResult
111+
for i, recorded_case in enumerate(recorded_cases_from_graph):
112+
case_name = recorded_case.get('name', f"Unnamed test case - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
113+
case_steps_raw = recorded_case.get('steps', [])
114+
135115
# 验证case数据完整性
136-
logging.debug(f"Processing case {i + 1}: '{case_name}' with {len(case_steps)} steps")
137-
if not case_steps:
116+
logging.debug(f"Processing case {i + 1}: '{case_name}' with {len(case_steps_raw)} steps")
117+
if not case_steps_raw:
138118
logging.warning(f"Case '{case_name}' has no steps data")
139-
119+
120+
# 转换步骤数据为SubTestStep格式
121+
from webqa_agent.data.test_structures import SubTestStep, SubTestScreenshot, SubTestReport
122+
123+
case_steps = []
124+
for step_data in case_steps_raw:
125+
# 转换截图数据
126+
screenshots = []
127+
for scr in step_data.get('screenshots', []):
128+
if isinstance(scr, dict) and scr.get('type') == 'base64':
129+
screenshots.append(SubTestScreenshot(type='base64', data=scr.get('data', '')))
130+
131+
# 转换状态
132+
step_status_str = step_data.get('status', 'passed').lower()
133+
step_status = TestStatus.PASSED
134+
if step_status_str in ['failed', 'error', 'failure']:
135+
step_status = TestStatus.FAILED
136+
elif step_status_str in ['warning', 'warn']:
137+
step_status = TestStatus.WARNING
138+
139+
case_steps.append(SubTestStep(
140+
id=step_data.get('id', 0),
141+
description=step_data.get('description', ''),
142+
screenshots=screenshots,
143+
modelIO=step_data.get('modelIO', ''),
144+
actions=step_data.get('actions', []),
145+
status=step_status,
146+
))
147+
148+
# 获取case的整体状态
149+
case_status_str = recorded_case.get('status', 'failed').lower()
140150
# Prefer status from graph aggregation if available
141-
sub_status = graph_case_status_map.get(case_name, case.get('status', 'failed')).lower()
151+
if case_name in graph_case_status_map:
152+
case_status_str = graph_case_status_map[case_name]
153+
142154
status_mapping = {
143155
'pending': TestStatus.PENDING,
144156
'running': TestStatus.RUNNING,
145157
'passed': TestStatus.PASSED,
146-
'completed': TestStatus.WARNING,
158+
'completed': TestStatus.PASSED,
159+
'warning': TestStatus.WARNING,
147160
'failed': TestStatus.FAILED,
148161
'cancelled': TestStatus.CANCELLED,
149162
}
150-
status_enum = status_mapping.get(sub_status, TestStatus.FAILED)
151-
163+
status_enum = status_mapping.get(case_status_str, TestStatus.FAILED)
164+
165+
# 构建报告
166+
reports = []
167+
if recorded_case.get('final_summary'):
168+
reports.append(SubTestReport(title='Summary', issues=recorded_case.get('final_summary', '')))
169+
152170
sub_tests.append(
153171
SubTestResult(
154172
name=case_name,
155173
status=status_enum,
156174
metrics={},
157175
steps=case_steps,
158-
messages=case.get('messages', {}),
159-
start_time=case.get('start_time'),
160-
end_time=case.get('end_time'),
161-
final_summary=case.get('final_summary', ''),
162-
report=case.get('report', []),
176+
messages={}, # recorded_cases不包含messages数据,设为空字典
177+
start_time=recorded_case.get('start_time'),
178+
end_time=recorded_case.get('end_time'),
179+
final_summary=recorded_case.get('final_summary', ''),
180+
report=reports,
163181
)
164182
)
165-
183+
166184
result.sub_tests = sub_tests
167-
168-
# 从runner格式报告提取汇总指标
169-
results_data = runner_format_report.get('results', {})
170-
result.add_metric('test_case_count', results_data.get('total_cases', 0))
171-
result.add_metric('passed_test_cases', results_data.get('passed_cases', 0))
172-
result.add_metric('failed_test_cases', results_data.get('failed_cases', 0))
173-
result.add_metric('total_steps', results_data.get('total_steps', 0))
174-
result.add_metric('success_rate', results_data.get('success_rate', 0))
175-
176-
# 从每个case的messages中提取网络和控制台数据并汇总
177-
total_failed_requests = 0
178-
total_requests = 0
179-
total_console_errors = 0
180-
181-
for case in runner_format_report.get('sub_tests', []):
182-
case_messages = case.get('messages', {})
183-
if isinstance(case_messages, dict):
184-
network_data = case_messages.get('network', {})
185-
if isinstance(network_data, dict):
186-
failed_requests = network_data.get('failed_requests', [])
187-
responses = network_data.get('responses', [])
188-
total_failed_requests += len(failed_requests)
189-
total_requests += len(responses)
190-
191-
console_data = case_messages.get('console', [])
192-
if isinstance(console_data, list):
193-
total_console_errors += len(console_data)
194-
195-
result.add_metric('network_failed_requests_count', total_failed_requests)
196-
result.add_metric('network_total_requests_count', total_requests)
197-
result.add_metric('console_error_count', total_console_errors)
198-
185+
186+
# 计算汇总指标
187+
total_cases = len(recorded_cases_from_graph)
188+
passed_cases = sum(1 for case in recorded_cases_from_graph if case.get('status', '').lower() in ['passed', 'completed'])
189+
failed_cases = total_cases - passed_cases
190+
total_steps = sum(len(case.get('steps', [])) for case in recorded_cases_from_graph)
191+
success_rate = (passed_cases / total_cases * 100) if total_cases > 0 else 0
192+
193+
result.add_metric('test_case_count', total_cases)
194+
result.add_metric('passed_test_cases', passed_cases)
195+
result.add_metric('failed_test_cases', failed_cases)
196+
result.add_metric('total_steps', total_steps)
197+
result.add_metric('success_rate', success_rate)
198+
199199
# 设置整体状态
200-
runner_status = runner_format_report.get('status', 'failed')
201-
if runner_status == 'completed':
200+
if failed_cases == 0:
202201
result.status = TestStatus.PASSED
203202
else:
204203
result.status = TestStatus.FAILED
205-
result.error_message = runner_format_report.get('error_message', 'Test execution failed')
206-
204+
result.error_message = f'{failed_cases} out of {total_cases} test cases failed'
205+
207206
else:
208-
logging.error('No UITester instance available for data extraction')
207+
logging.error('No recorded_cases data found in graph state')
209208
result.status = TestStatus.FAILED
210-
result.error_message = 'No test cases were executed or results were not available'
209+
result.error_message = 'No test cases were executed or recorded_cases data was not available'
211210

212211
logging.info(f"{icon['check']} Test completed: {test_config.test_name}")
213212

@@ -219,14 +218,10 @@ async def run_test(
219218
raise
220219

221220
finally:
222-
# Cleanup parallel tester
221+
# Note: Browser monitoring data collection already handled in main flow
222+
# No additional cleanup needed here
223223
if parallel_tester:
224-
try:
225-
# UITester现在已经自动管理监控数据,只需要清理资源
226-
await parallel_tester.cleanup()
227-
logging.debug('UITester cleanup completed')
228-
except Exception as e:
229-
logging.error(f'Error cleaning up UITester: {e}')
224+
logging.debug('UITester cleanup completed in main flow')
230225

231226
return result
232227

webqa_agent/llm/llm_api.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ async def initialize(self):
2121
self.base_url = self.llm_config.get("base_url")
2222
# Use AsyncOpenAI client for async operations
2323
self.client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url, timeout=60) if self.base_url else AsyncOpenAI(
24-
api_key=self.api_key, timeout=60)
24+
api_key=self.api_key, timeout=360)
2525
logging.debug(f"AsyncOpenAI client initialized with API key: {self.api_key}, Model: {self.model} and base URL: {self.base_url}")
2626
else:
2727
raise ValueError("Invalid API type or missing credentials. LLM client not initialized.")
@@ -96,7 +96,8 @@ async def _call_openai(self, messages, temperature=None, top_p=None, max_tokens=
9696
create_kwargs = {
9797
"model": actual_model,
9898
"messages": messages,
99-
"timeout": 60,
99+
"timeout": 360,
100+
"max_tokens": 16000,
100101
}
101102
# Always send user/configured temperature when provided (default handled upstream)
102103
if temperature is not None:

webqa_agent/llm/prompt.py

Lines changed: 0 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -538,90 +538,6 @@ class LLMPrompt:
538538
539539
"""
540540

541-
# New: Test case generation prompts
542-
case_generator_system_prompt = """
543-
## Role
544-
You are an expert UI test case generator. Your task is to analyze a webpage and user requirements, then generate comprehensive test cases that thoroughly validate the functionality.
545-
546-
## Objective
547-
Based on the provided webpage HTML/structure and user requirements, you need to:
548-
1. **Understand the webpage structure** and identify key interactive elements
549-
2. **Analyze user requirements** to understand what functionality needs to be tested
550-
3. **Generate comprehensive test steps** that cover the main user workflow
551-
4. **Include appropriate validations** to ensure the functionality works correctly
552-
5. **Consider edge cases** and error scenarios when applicable
553-
554-
## Test Case Structure
555-
Each test case should include:
556-
- **name**: A descriptive name for the test case
557-
- **steps**: A list of actions and validations
558-
- **objective**: What the test case aims to validate
559-
560-
## Available Action Types
561-
- **action**: Execute an action instruction (click, type, scroll, wait, drag, upload, keyboardPress etc.)
562-
- **verify**: Verify expected outcomes or states
563-
564-
## Guidelines
565-
1. **Logical Flow**: Ensure test steps follow a logical user workflow
566-
2. **Comprehensive Coverage**: Test main functionality, edge cases, and error scenarios
567-
3. **Clear Validations**: Each test should include proper assertions to verify success
568-
4. **Realistic User Behavior**: Steps should mimic real user interactions
569-
5. **Wait Times**: Include appropriate wait times for dynamic content
570-
6. **File Uploads**: When testing file upload, use appropriate file paths and wait times
571-
7. **Navigation**: Test page navigation and state changes
572-
8. **Error Handling**: Include tests for error scenarios when applicable
573-
574-
## Test Case Categories to Consider
575-
- **Core Functionality**: Main features and workflows
576-
- **User Interaction**: Form submissions, button clicks, navigation
577-
- **Data Validation**: Input validation, error messages
578-
- **Dynamic Content**: Loading states, real-time updates
579-
- **File Operations**: Upload, download, preview
580-
- **Responsive Behavior**: Different screen sizes and devices
581-
- **Error Scenarios**: Invalid inputs, network issues, permission errors
582-
583-
## Output Format
584-
Return a JSON object with the following structure:
585-
```json
586-
{
587-
"test_cases": [
588-
{
589-
"name": "descriptive_test_name",
590-
"objective": "what this test validates",
591-
"steps": [
592-
{"action": "action instruction"},
593-
{"verify": "validation instruction"},
594-
...
595-
]
596-
}
597-
]
598-
}
599-
```
600-
"""
601-
602-
case_generator_output_prompt = """
603-
## Task: Generate Comprehensive Test Cases
604-
605-
Based on the provided webpage structure and user requirements, generate detailed test cases that thoroughly validate the functionality.
606-
607-
### Webpage Analysis
608-
Please analyze the page structure and identify:
609-
1. **Interactive Elements**: buttons, forms, links, inputs, etc.
610-
2. **Key Features**: main functionalities exposed by the UI
611-
3. **User Workflows**: typical user journeys through the interface
612-
4. **Validation Points**: where success/failure can be measured
613-
614-
### Test Case Generation Rules
615-
1. **Start with Basic Flow**: Begin with the most common user workflow
616-
2. **Add Edge Cases**: Include boundary conditions and error scenarios
617-
3. **Include Proper Waits**: Add appropriate wait times for dynamic content
618-
4. **Validate Each Step**: Include assertions to verify expected outcomes
619-
5. **Use Realistic Data**: Include realistic test data and file paths
620-
6. **Consider User Experience**: Test from an end-user perspective
621-
622-
Generate comprehensive test cases in the specified JSON format. **Do not include code blocks in the output**
623-
"""
624-
625541
page_default_prompt = """
626542
You are a web content quality inspector. You need to carefully read the text content of the webpage and complete the task based on the user's test objective. Please ensure that the output JSON format does not contain any code blocks or backticks.
627543
"""

webqa_agent/testers/case_gen/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@
1111
from .graph import app as langgraph_app
1212
from .state.schemas import MainGraphState
1313
from .tools.element_action_tool import UIAssertTool, UITool
14+
from .tools.ux_tool import UIUXViewportTool
1415

1516
# Version info
1617
__version__ = "1.0.0"
1718

1819
# Make key components available at package level
19-
__all__ = ["langgraph_app", "MainGraphState", "agent_worker_node", "UITool", "UIAssertTool"]
20+
__all__ = ["langgraph_app", "MainGraphState", "agent_worker_node", "UITool", "UIAssertTool", "UIUXViewportTool"]

0 commit comments

Comments
 (0)