Skip to content

Commit 8322704

Browse files
feat: 1. add schema for action tool and verify tool; 2.synchronize actions with action_executor.py; 3. update crawl parameters
1 parent 3d5a8f0 commit 8322704

File tree

4 files changed

+137
-26
lines changed

4 files changed

+137
-26
lines changed

webqa_agent/actions/action_executor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ async def _execute_keyboard_press(self, action):
145145
else:
146146
return {"success": False, "message": "Keyboard press failed."}
147147

148-
async def _execute_get_new_page(self, action):
148+
async def _execute_get_new_page(self):
149149
"""Execute get new page action."""
150150
success = await self._actions.get_new_page()
151151
if success:
@@ -313,7 +313,7 @@ async def _execute_go_to_page(self, action):
313313
logging.error(f"Go to page action failed: {str(e)}")
314314
return {"success": False, "message": f"Navigation failed: {str(e)}", "playwright_error": str(e)}
315315

316-
async def _execute_go_back(self, action):
316+
async def _execute_go_back(self):
317317
"""Execute browser back navigation action."""
318318
try:
319319
if hasattr(self._actions, 'go_back'):

webqa_agent/testers/case_gen/graph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ async def reflect_and_replan(state: MainGraphState) -> dict:
296296
logging.debug(f"current page crawled result: {page_content_summary}")
297297
screenshot = await ui_tester._actions.b64_page_screenshot(file_name="reflection", save_to_log=False, full_page=False)
298298
await dp.remove_marker()
299-
await dp.crawl(highlight=False, highlight_text=True, viewport_only=True)
299+
await dp.crawl(highlight=False, filter_text=True, viewport_only=True)
300300
page_structure = dp.get_text()
301301
logging.debug(f"----- reflection ---- Page structure: {page_structure}")
302302

webqa_agent/testers/case_gen/prompts/agent_prompts.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def get_execute_system_prompt(case: dict) -> str:
3232
3333
- **`execute_ui_action(action: str, target: str, value: Optional[str], description: Optional[str], clear_before_type: bool)`**:
3434
Performs UI interactions such as clicking, typing, scrolling, dropdown selection, etc.
35-
- `action`: Action type ('click', 'type', 'scroll', 'SelectDropdown', 'clear', etc.)
35+
- `action`: Action type ('Tap', 'Input', 'Scroll', 'SelectDropdown', 'Clear', 'Hover', 'KeyboardPress', 'Upload', 'Drag', 'GoToPage', 'GoBack', 'Sleep', 'GetNewPage', 'Mouse')
3636
- `target`: Element descriptor (use natural language descriptions)
3737
- `value`: Input value for text-based actions
3838
- `description`: Purpose of the action for logging and context
@@ -285,9 +285,9 @@ def get_execute_system_prompt(case: dict) -> str:
285285
286286
### Example 1: Form Field Validation Recovery
287287
**Context**: Registration form with character length requirements
288-
**Initial Action**: `execute_ui_action(action='type', target='usage scenario field', value='test', description='Enter usage scenario')`
288+
**Initial Action**: `execute_ui_action(action='Input', target='usage scenario field', value='test', description='Enter usage scenario')`
289289
**Tool Response**: `[FAILURE] Validation error detected: Usage scenario must be at least 30 characters`
290-
**Recovery Action**: `execute_ui_action(action='type', target='usage scenario field', value='This is a comprehensive usage scenario description for research and development purposes in academic and commercial settings', description='Enter extended usage scenario meeting length requirements', clear_before_type=True)`
290+
**Recovery Action**: `execute_ui_action(action='Input', target='usage scenario field', value='This is a comprehensive usage scenario description for research and development purposes in academic and commercial settings', description='Enter extended usage scenario meeting length requirements', clear_before_type=True)`
291291
292292
### Example 2: Dropdown Language Adaptation
293293
**Context**: Bilingual interface with Chinese dropdown options
@@ -297,37 +297,37 @@ def get_execute_system_prompt(case: dict) -> str:
297297
298298
### Example 3: Dynamic Content Waiting
299299
**Context**: API-populated dropdown requiring wait time
300-
**Step 1**: `execute_ui_action(action='click', target='country dropdown', description='Open country selection dropdown')`
300+
**Step 1**: `execute_ui_action(action='Tap', target='country dropdown', description='Open country selection dropdown')`
301301
**Tool Response**: `[SUCCESS] Dropdown opened, loading options...`
302-
**Step 2**: `execute_ui_action(action='sleep', target='', value='2000', description='Wait for options to load')`
303-
**Step 3**: `execute_ui_action(action='click', target='option containing "Canada"', description='Select Canada from loaded options')`
302+
**Step 2**: `execute_ui_action(action='Sleep', target='', value='2000', description='Wait for options to load')`
303+
**Step 3**: `execute_ui_action(action='Tap', target='option containing "Canada"', description='Select Canada from loaded options')`
304304
305305
### Example 4: Element State Change Handling
306306
**Context**: Button state change after interaction
307-
**Initial Action**: `execute_ui_action(action='click', target='submit button', description='Submit form')`
307+
**Initial Action**: `execute_ui_action(action='Tap', target='submit button', description='Submit form')`
308308
**Tool Response**: `[SUCCESS] Form submitted, button disabled and showing 'Processing...'`
309-
**Recovery Action**: `execute_ui_action(action='wait', target='', value='3000', description='Wait for processing to complete')`
309+
**Recovery Action**: `execute_ui_action(action='Sleep', target='', value='3000', description='Wait for processing to complete')`
310310
**Follow-up**: `execute_ui_assertion(assertion='Verify success message appears and button returns to normal state')`
311311
312312
### Example 5: Multi-Action Instruction Handling
313313
**Context**: Instruction contains multiple actions "Browse the homepage top navigation bar, click one by one: 'Visitor', 'Alumni', 'Donate', 'Careers' links"
314314
**First Action Identification**: The first mentioned action is "Visitor" link
315-
**Correct Agent Response**: Execute only the FIRST action - `execute_ui_action(action='click', target='Visitor link', description='Click the visitor link in the top navigation bar')`
316-
**Tool Response**: `[SUCCESS] Action 'click' on 'Visitor link' completed successfully`
315+
**Correct Agent Response**: Execute only the FIRST action - `execute_ui_action(action='Tap', target='Visitor link', description='Click the visitor link in the top navigation bar')`
316+
**Tool Response**: `[SUCCESS] Action 'Tap' on 'Visitor link' completed successfully`
317317
**Agent Reporting**: Report completion of the single action and allow framework to proceed to next step
318318
319319
### Example 6: Another Multi-Action Instruction Handling
320320
**Context**: Instruction contains "Click on the 'Login', 'Register', and 'Help' links in the header"
321321
**First Action Identification**: The first mentioned action is "Login" link
322-
**Correct Agent Response**: Execute only the FIRST action - `execute_ui_action(action='click', target='Login link', description='Click the Login link in the header')`
323-
**Tool Response**: `[SUCCESS] Action 'click' on 'Login link' completed successfully`
322+
**Correct Agent Response**: Execute only the FIRST action - `execute_ui_action(action='Tap', target='Login link', description='Click the Login link in the header')`
323+
**Tool Response**: `[SUCCESS] Action 'Tap' on 'Login link' completed successfully`
324324
**Agent Reporting**: Report completion of the single action and allow framework to proceed to next step
325325
326326
### Example 7: Numbered List Multi-Action Handling
327327
**Context**: Instruction contains "1. Enter username 2. Enter password 3. Click submit"
328328
**First Action Identification**: The numbered step #1 is "Enter username"
329-
**Correct Agent Response**: Execute only the FIRST action - `execute_ui_action(action='type', target='username field', value='testuser', description='Enter username in the username field')`
330-
**Tool Response**: `[SUCCESS] Action 'type' on 'username field' completed successfully`
329+
**Correct Agent Response**: Execute only the FIRST action - `execute_ui_action(action='Input', target='username field', value='testuser', description='Enter username in the username field')`
330+
**Tool Response**: `[SUCCESS] Action 'Input' on 'username field' completed successfully`
331331
**Agent Reporting**: Report completion of the single action and allow framework to proceed to next step
332332
333333
## Test Completion Protocol

webqa_agent/testers/case_gen/tools/element_action_tool.py

Lines changed: 120 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,88 @@
77
import datetime
88
import json
99
import logging
10-
from typing import Any, Dict, Optional
10+
from typing import Optional, Type
1111

1212
from langchain_core.tools import BaseTool
13-
from pydantic import Field
13+
from pydantic import BaseModel, Field
1414

1515
from webqa_agent.crawler.deep_crawler import DeepCrawler
16-
from webqa_agent.testers.case_gen.prompts.tool_prompts import get_error_detection_prompt
1716
from webqa_agent.testers.function_tester import UITester
1817

1918

19+
class UIActionSchema(BaseModel):
20+
"""Schema for UI action tool arguments."""
21+
22+
action: str = Field(
23+
description=(
24+
"Type of UI action to perform. Supported actions: "
25+
"'Tap' - Click on an element; "
26+
"'Input' - Type text into an input field; "
27+
"'SelectDropdown' - Select an option from a dropdown menu (supports cascade selection with comma-separated paths); "
28+
"'Scroll' - Scroll the page with configurable modes ('once', 'untilBottom', 'untilTop') and optional distance; "
29+
"'Clear' - Clear the content of an input field; "
30+
"'Hover' - Hover over an element; "
31+
"'KeyboardPress' - Press a keyboard key; "
32+
"'Upload' - Upload a file; "
33+
"'Drag' - Drag an element to a target position; "
34+
"'GoToPage' - Navigate to a URL; "
35+
"'GoBack' - Navigate back to the previous page; "
36+
"'Sleep' - Wait for a specified duration; "
37+
"'GetNewPage' - Switch to a new tab or window; "
38+
"'Mouse' - Move mouse cursor or scroll mouse wheel."
39+
)
40+
)
41+
42+
target: str = Field(
43+
description=(
44+
"Element identifier or selector to target. "
45+
"For most actions, this should be the element ID from the page description. "
46+
"For Scroll actions, this can be a scroll target description. "
47+
"For GoToPage action, this should be the URL."
48+
)
49+
)
50+
51+
value: Optional[str] = Field(
52+
default=None,
53+
description=(
54+
"Value to use for the action. "
55+
"Required for 'Input' action (text to type), "
56+
"'SelectDropdown' action (option text or comma-separated cascade path like 'Category,Subcategory,Item'), "
57+
"'Scroll' action (direction 'up' or 'down', with optional scrollType and distance description), "
58+
"'KeyboardPress' action (key name like 'Enter', 'Tab', 'Escape', etc.), "
59+
"'Upload' action (file path), "
60+
"'Sleep' action (duration in milliseconds), "
61+
"'Mouse' action (operation type: 'move' for cursor positioning or 'wheel' for scrolling). "
62+
"Optional for 'Drag' action (target position description), "
63+
"'GetNewPage' action (tab/window identifier). "
64+
"Optional for other actions."
65+
)
66+
)
67+
68+
description: Optional[str] = Field(
69+
default=None,
70+
description=(
71+
"Optional custom description of what this action is intended to do. "
72+
"Helps provide context for the action in test reports."
73+
)
74+
)
75+
76+
clear_before_type: bool = Field(
77+
default=False,
78+
description=(
79+
"Whether to clear the input field before typing. "
80+
"Only applicable for 'Input' action. "
81+
"Set to True to clear existing content before typing new text."
82+
)
83+
)
84+
85+
2086
class UITool(BaseTool):
2187
"""A tool to interact with a UI via a UITester instance."""
2288

2389
name: str = "execute_ui_action"
2490
description: str = "Executes a UI action using the UITester and returns a structured summary of the new page state."
91+
args_schema: Type[BaseModel] = UIActionSchema
2592
ui_tester_instance: UITester = Field(...)
2693

2794
async def get_full_page_context(
@@ -36,7 +103,7 @@ async def get_full_page_context(
36103
logging.debug(f"Retrieving page context for analysis (viewport_only={viewport_only})")
37104
page = self.ui_tester_instance.driver.get_page()
38105
dp = DeepCrawler(page)
39-
await dp.crawl(highlight=True, highlight_text=True, viewport_only=viewport_only)
106+
await dp.crawl(highlight=True, filter_text=True, viewport_only=viewport_only)
40107
page_structure = dp.get_text()
41108

42109
screenshot = None
@@ -77,20 +144,47 @@ async def _arun(
77144
logging.debug(f"Using custom description: {description}")
78145

79146
# Build the action phrase
80-
if action.lower() == "click":
147+
if action == "Tap":
81148
action_phrase = f"Click on the {target}"
82-
elif action.lower() == "type":
149+
elif action == "Input":
83150
if clear_before_type:
84151
action_phrase = f"Clear the {target} field and then type '{value}'"
85152
logging.debug("Using clear-before-type strategy")
86153
else:
87154
action_phrase = f"Type '{value}' in the {target}"
88-
elif action.lower() == "selectdropdown":
155+
elif action == "SelectDropdown":
89156
action_phrase = f"From the {target}, select the option '{value}'"
90-
elif action.lower() == "scroll":
157+
elif action == "Scroll":
91158
action_phrase = f"Scroll {value or 'down'} on the page"
92-
elif action.lower() == "clear":
159+
elif action == "Clear":
93160
action_phrase = f"Clear the content of {target}"
161+
elif action == "Hover":
162+
action_phrase = f"Hover over {target}"
163+
elif action == "KeyboardPress":
164+
action_phrase = f"Press the {value} key"
165+
elif action == "Upload":
166+
action_phrase = f"Upload file {value} to {target}"
167+
elif action == "Drag":
168+
action_phrase = f"Drag {target}"
169+
if value:
170+
action_phrase += f" to {value}"
171+
elif action == "GoToPage":
172+
action_phrase = f"Navigate to {target}"
173+
elif action == "GoBack":
174+
action_phrase = f"Navigate back to the previous page"
175+
elif action == "Sleep":
176+
action_phrase = f"Wait for {value or '1000'} milliseconds"
177+
elif action == "GetNewPage":
178+
action_phrase = f"Switch to new page/tab"
179+
if value:
180+
action_phrase += f" {value}"
181+
elif action == "Mouse":
182+
if value and 'move' in value.lower():
183+
action_phrase = f"Move mouse cursor to {target}"
184+
elif value and 'wheel' in value.lower():
185+
action_phrase = f"Scroll mouse wheel on {target}"
186+
else:
187+
action_phrase = f"Perform mouse action on {target}"
94188
else:
95189
action_phrase = f"{action} on {target}"
96190
if value:
@@ -168,11 +262,28 @@ async def _arun(
168262
return f"[FAILURE] {error_msg}"
169263

170264

265+
class UIAssertionSchema(BaseModel):
266+
"""Schema for UI assertion tool arguments."""
267+
268+
assertion: str = Field(
269+
description=(
270+
"The assertion or validation to perform on the current page state. "
271+
"Should be a clear, specific statement of what to verify. "
272+
"Examples: "
273+
"'The login button should be visible', "
274+
"'The error message should contain the text \"Invalid credentials\"', "
275+
"'The page title should be \"Dashboard\"', "
276+
"'There should be 5 items in the shopping cart'."
277+
)
278+
)
279+
280+
171281
class UIAssertTool(BaseTool):
172282
"""A tool to perform UI assertions via a UITester instance."""
173283

174284
name: str = "execute_ui_assertion"
175285
description: str = "Performs a UI assertion/validation using the UITester and returns the verification result."
286+
args_schema: Type[BaseModel] = UIAssertionSchema
176287
ui_tester_instance: UITester = Field(...)
177288

178289
def _run(self, assertion: str) -> str:

0 commit comments

Comments
 (0)