Skip to content

Commit ca2a84b

Browse files
feat: 1. add switch for saving screenshots to local; add context for verify stage
fix: 1. remove deprecated prompt in action stage; 2. abort steps and cases when unable to crawl the current page
1 parent 8e126ce commit ca2a84b

File tree

13 files changed

+712
-169
lines changed

13 files changed

+712
-169
lines changed

app_gradio/demo_gradio.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,8 @@ def create_config_dict(
201201
"viewport": {"width": 1280, "height": 720},
202202
"headless": True,
203203
"language": "zh-CN",
204-
"cookies": []
204+
"cookies": [],
205+
"save_screenshots": False # Always save screenshots in Gradio demo
205206
}
206207
}
207208

@@ -273,6 +274,11 @@ def build_test_configurations(config: Dict[str, Any]) -> list:
273274
async def run_webqa_test(config: Dict[str, Any], lang: str = "zh-CN") -> Tuple[Optional[str], Optional[str], Optional[str]]:
274275
"""Run WebQA test"""
275276
try:
277+
# Configure screenshot saving behavior
278+
from webqa_agent.actions.action_handler import ActionHandler
279+
save_screenshots = config.get("browser_config", {}).get("save_screenshots", False)
280+
ActionHandler.set_screenshot_config(save_screenshots=save_screenshots)
281+
276282
# Validate LLM configuration
277283
llm_config = {
278284
"api": "openai",

config/config.yaml.example

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ browser_config:
3333
headless: False # Docker environment will automatically override to True
3434
language: zh-CN
3535
cookies: []
36+
save_screenshots: False # Whether to save screenshots to local disk (default: False)
3637

3738
report:
3839
language: en-US # zh-CN, en-US

webqa-agent.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,15 @@ async def run_tests(cfg):
288288
if is_docker:
289289
print("🐳 Docker mode: automatically enable headless browser")
290290

291+
# 0.1. Configure screenshot saving behavior
292+
from webqa_agent.actions.action_handler import ActionHandler
293+
save_screenshots = cfg.get("browser_config", {}).get("save_screenshots", False)
294+
ActionHandler.set_screenshot_config(save_screenshots=save_screenshots)
295+
if not save_screenshots:
296+
print("📸 Screenshot saving: disabled (screenshots will be captured but not saved to disk)")
297+
else:
298+
print("📸 Screenshot saving: enabled")
299+
291300
# 1. Check required tools based on configuration
292301
tconf = cfg.get("test_config", {})
293302

webqa_agent/actions/action_handler.py

Lines changed: 142 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import base64
2+
import datetime
23
import json
34
import os
45
import re
56
from contextvars import ContextVar
67
from dataclasses import dataclass, field
8+
from pathlib import Path
79
from typing import Any, Dict, List, Optional, Union
810

911
from playwright.async_api import Page
@@ -64,6 +66,40 @@ def reset(self):
6466

6567

6668
class ActionHandler:
69+
# Session management for screenshot organization
70+
_screenshot_session_dir: Optional[Path] = None
71+
_screenshot_session_timestamp: Optional[str] = None
72+
_save_screenshots: bool = False # Default: not save screenshots to disk
73+
74+
@classmethod
75+
def set_screenshot_config(cls, save_screenshots: bool = False):
76+
"""Set global screenshot saving behavior.
77+
78+
Args:
79+
save_screenshots: Whether to save screenshots to local disk (default: False)
80+
"""
81+
cls._save_screenshots = save_screenshots
82+
logging.debug(f"Screenshot saving config set to: {save_screenshots}")
83+
84+
@classmethod
85+
def init_screenshot_session(cls) -> Path:
86+
"""Initialize screenshot session directory for this test run.
87+
88+
Creates a timestamped directory under webqa_agent/crawler/screenshots/
89+
for organizing all screenshots from a single test session.
90+
91+
Returns:
92+
Path: The session directory path
93+
"""
94+
if cls._screenshot_session_dir is None:
95+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
96+
base_dir = Path(__file__).parent.parent / "crawler" / "screenshots"
97+
cls._screenshot_session_dir = base_dir / timestamp
98+
cls._screenshot_session_timestamp = timestamp
99+
cls._screenshot_session_dir.mkdir(parents=True, exist_ok=True)
100+
logging.info(f"Initialized screenshot session directory: {cls._screenshot_session_dir}")
101+
return cls._screenshot_session_dir
102+
67103
def __init__(self):
68104
self.page_data = {}
69105
self.page_element_buffer = {} # page element buffer
@@ -348,12 +384,18 @@ async def ensure_element_in_viewport(self, element_id: str, max_retries: int = 3
348384
"""
349385
# Get current active page
350386
page = self._get_current_page()
351-
352-
# Initialize action context for error propagation
353-
ctx = ActionContext()
354-
action_context_var.set(ctx)
387+
388+
# Get existing context or create new one (preserves parent context)
389+
ctx = action_context_var.get()
390+
if ctx is None:
391+
ctx = ActionContext()
392+
action_context_var.set(ctx)
393+
394+
# Update scroll-specific context info
355395
ctx.max_scroll_attempts = max_retries
356-
ctx.element_info = {"element_id": element_id, "action": "ensure_viewport"}
396+
# Only set element_info if not already set by parent method
397+
if not ctx.element_info.get("element_id"):
398+
ctx.element_info = {"element_id": element_id, "action": "ensure_viewport"}
357399

358400
element = self.page_element_buffer.get(str(element_id))
359401
if not element:
@@ -924,9 +966,11 @@ async def wait(self, timeMs) -> bool:
924966
async def type(self, id, text, clear_before_type: bool = False) -> bool:
925967
"""Types text into the specified element, optionally clearing it
926968
first."""
927-
# Initialize action context for error propagation
928-
ctx = ActionContext()
929-
action_context_var.set(ctx)
969+
# Get existing context or create new one (preserves context from helpers)
970+
ctx = action_context_var.get()
971+
if ctx is None:
972+
ctx = ActionContext()
973+
action_context_var.set(ctx)
930974
ctx.element_info = {"element_id": str(id), "action": "type", "text_length": len(text), "clear_before_type": clear_before_type}
931975

932976
try:
@@ -1071,8 +1115,12 @@ async def _fill_element_text(
10711115
"""
10721116
# Get current active page
10731117
page = self._get_current_page()
1074-
1118+
1119+
# Get existing context or create new one if none exists
10751120
ctx = action_context_var.get()
1121+
if ctx is None:
1122+
ctx = ActionContext()
1123+
action_context_var.set(ctx)
10761124

10771125
# Strategy 1: Try CSS selector if format is valid
10781126
if self._is_valid_css_selector(selector):
@@ -1154,9 +1202,11 @@ async def _fill_element_text(
11541202

11551203
async def clear(self, id) -> bool:
11561204
"""Clears the text in the specified input element."""
1157-
# Initialize action context for error propagation
1158-
ctx = ActionContext()
1159-
action_context_var.set(ctx)
1205+
# Get existing context or create new one (preserves context from helpers)
1206+
ctx = action_context_var.get()
1207+
if ctx is None:
1208+
ctx = ActionContext()
1209+
action_context_var.set(ctx)
11601210
ctx.element_info = {"element_id": str(id), "action": "clear"}
11611211

11621212
try:
@@ -1236,27 +1286,67 @@ async def keyboard_press(self, key) -> bool:
12361286
)
12371287
return False
12381288

1239-
async def b64_page_screenshot(self, full_page=False, file_path=None, file_name=None, save_to_log=True):
1240-
"""Get page screenshot (Base64 encoded)
1289+
async def b64_page_screenshot(
1290+
self,
1291+
full_page: bool = False,
1292+
file_name: Optional[str] = None,
1293+
context: str = 'default'
1294+
) -> Optional[str]:
1295+
"""Get page screenshot (Base64 encoded) and optionally save to local file.
12411296
12421297
Args:
12431298
full_page: whether to capture the whole page
1244-
file_path: screenshot save path (optional)
1245-
file_name: screenshot file name (optional)
1246-
save_to_log: whether to save to log system (default True)
1299+
file_name: descriptive screenshot name (e.g., "marker", "action_click_button")
1300+
context: test context category (e.g., 'test', 'agent', 'scroll', 'error')
12471301
12481302
Returns:
12491303
str: screenshot base64 encoded, or None if screenshot fails
1304+
1305+
Note:
1306+
The screenshot is always returned as base64 for HTML reports and LLM analysis.
1307+
Local file saving is controlled by the _save_screenshots class variable.
12501308
"""
12511309
try:
1252-
# get screenshot from current active page (dynamically resolves to latest page)
1310+
# Get current active page (dynamically resolves to latest page)
12531311
current_page = self._get_current_page()
1254-
screenshot_bytes = await self.take_screenshot(current_page, full_page=full_page, timeout=30000)
1312+
timeout = 90000 if full_page else 60000 # 90s for full page, 60s for viewport
1313+
1314+
# Prepare file path only if saving is enabled
1315+
file_path_str = None
1316+
if self._save_screenshots:
1317+
# Initialize session directory if needed
1318+
session_dir = self.init_screenshot_session()
1319+
1320+
# Generate timestamp and filename
1321+
timestamp = datetime.datetime.now().strftime("%H%M%S")
12551322

1256-
# convert to Base64
1323+
# Build filename: {timestamp}_{context}_{file_name}.png
1324+
if file_name:
1325+
filename = f"{timestamp}_{context}_{file_name}.png"
1326+
else:
1327+
filename = f"{timestamp}_{context}_screenshot.png"
1328+
1329+
file_path_str = str(session_dir / filename)
1330+
1331+
# Capture screenshot (with or without file saving based on config)
1332+
screenshot_bytes = await self.take_screenshot(
1333+
current_page,
1334+
full_page=full_page,
1335+
file_path=file_path_str,
1336+
timeout=timeout
1337+
)
1338+
1339+
# Convert to Base64 for HTML reports
12571340
screenshot_base64 = base64.b64encode(screenshot_bytes).decode('utf-8')
12581341
base64_data = f'data:image/png;base64,{screenshot_base64}'
1342+
1343+
if self._save_screenshots and file_path_str:
1344+
logging.debug(f"Screenshot saved to {file_path_str}")
1345+
else:
1346+
logging.debug("Screenshot captured (not saved to disk)")
1347+
12591348
return base64_data
1349+
12601350
except Exception as e:
12611351
logging.warning(f"Failed to capture screenshot: {e}")
12621352
return None
@@ -1273,32 +1363,46 @@ async def take_screenshot(
12731363
Args:
12741364
page: page object
12751365
full_page: whether to capture the whole page
1276-
file_path: screenshot save path (only used for direct saving, not recommended in test flow)
1277-
timeout: timeout
1366+
file_path: screenshot save path (only used when save_screenshots=True)
1367+
timeout: timeout (milliseconds)
12781368
12791369
Returns:
12801370
bytes: screenshot binary data
1371+
1372+
Note:
1373+
If save_screenshots is False, the screenshot will not be saved to disk
1374+
regardless of the file_path parameter. The method always returns the
1375+
screenshot bytes for in-memory use (e.g., Base64 encoding).
12811376
"""
12821377
try:
1378+
# Shortened and more lenient load state check
1379+
# Note: page.screenshot() already waits for fonts and basic rendering internally
12831380
try:
1284-
await page.wait_for_load_state(timeout=60000)
1381+
await page.wait_for_load_state('domcontentloaded', timeout=10000)
12851382
except Exception as e:
1286-
logging.warning(f'wait_for_load_state before screenshot failed: {e}; attempting screenshot anyway')
1287-
logging.debug('Page is fully loaded or skipped wait; taking screenshot')
1288-
1289-
# Directly capture screenshot as binary data
1290-
if file_path:
1291-
screenshot: bytes = await page.screenshot(
1292-
path=file_path,
1293-
full_page=full_page,
1294-
timeout=timeout,
1295-
)
1296-
else:
1297-
screenshot: bytes = await page.screenshot(
1298-
full_page=full_page,
1299-
timeout=timeout,
1300-
)
1383+
logging.debug(f'Load state check: {e}; proceeding with screenshot')
1384+
1385+
logging.debug(f'Taking screenshot (full_page={full_page}, save={self._save_screenshots}, timeout={timeout}ms)')
1386+
1387+
# Prepare screenshot options with Playwright best practices
1388+
screenshot_options = {
1389+
'full_page': full_page,
1390+
'timeout': timeout,
1391+
'animations': 'disabled', # Skip waiting for CSS animations/transitions (Playwright 1.25+)
1392+
'caret': 'hide', # Hide text input cursor for cleaner screenshots
1393+
}
1394+
1395+
# Only save to disk if _save_screenshots is True and file_path is provided
1396+
if self._save_screenshots and file_path:
1397+
screenshot_options['path'] = file_path
1398+
logging.debug(f'Screenshot will be saved to: {file_path}')
1399+
elif not self._save_screenshots:
1400+
logging.debug('Screenshot saving disabled, returning bytes only')
1401+
1402+
# Capture screenshot with optimized options
1403+
screenshot: bytes = await page.screenshot(**screenshot_options)
13011404

1405+
logging.debug(f'Screenshot captured successfully ({len(screenshot)} bytes)')
13021406
return screenshot
13031407

13041408
except Exception as e:

webqa_agent/actions/click_handler.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,8 @@ def handle_new_page(page_obj):
123123
new_page_action_handler = ActionHandler()
124124
new_page_action_handler.page = new_page
125125
screenshot_b64 = await new_page_action_handler.b64_page_screenshot(
126-
file_name=f"element_{element_index}_new_page"
126+
file_name=f"element_{element_index}_new_page",
127+
context="test"
127128
)
128129
click_result["new_page_screenshot"] = screenshot_b64
129130
logging.debug("New page screenshot saved")
@@ -135,7 +136,8 @@ def handle_new_page(page_obj):
135136
await page.wait_for_load_state("networkidle", timeout=30000)
136137
else:
137138
screenshot_b64 = await action_handler.b64_page_screenshot(
138-
file_name=f"element_{element_index}_after_click"
139+
file_name=f"element_{element_index}_after_click",
140+
context="test"
139141
)
140142
click_result["screenshot_after"] = screenshot_b64
141143
logging.debug("After click screenshot saved")

0 commit comments

Comments
 (0)