From cee79a812992f6008178c4af6637678a7a6d086f Mon Sep 17 00:00:00 2001 From: TheRedRad Date: Tue, 6 Jan 2026 21:12:17 +0100 Subject: [PATCH] feat: add force viewport screenshot --- crawl4ai/async_configs.py | 5 +++++ crawl4ai/async_crawler_strategy.py | 11 ++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/crawl4ai/async_configs.py b/crawl4ai/async_configs.py index 10cc48d08..3ea103e44 100644 --- a/crawl4ai/async_configs.py +++ b/crawl4ai/async_configs.py @@ -1081,6 +1081,9 @@ class CrawlerRunConfig(): Default: None. screenshot_height_threshold (int): Threshold for page height to decide screenshot strategy. Default: SCREENSHOT_HEIGHT_TRESHOLD (from config, e.g. 20000). + force_viewport_screenshot (bool): If True, always take viewport-only screenshots regardless of page height. + When False, uses automatic decision (viewport for short pages, full-page for long pages). + Default: False. pdf (bool): Whether to generate a PDF of the page. Default: False. image_description_min_word_threshold (int): Minimum words for image description extraction. @@ -1220,6 +1223,7 @@ def __init__( screenshot: bool = False, screenshot_wait_for: float = None, screenshot_height_threshold: int = SCREENSHOT_HEIGHT_TRESHOLD, + force_viewport_screenshot: bool = False, pdf: bool = False, capture_mhtml: bool = False, image_description_min_word_threshold: int = IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD, @@ -1336,6 +1340,7 @@ def __init__( self.screenshot = screenshot self.screenshot_wait_for = screenshot_wait_for self.screenshot_height_threshold = screenshot_height_threshold + self.force_viewport_screenshot = force_viewport_screenshot self.pdf = pdf self.capture_mhtml = capture_mhtml self.image_description_min_word_threshold = image_description_min_word_threshold diff --git a/crawl4ai/async_crawler_strategy.py b/crawl4ai/async_crawler_strategy.py index 2850b36a6..68c2db808 100644 --- a/crawl4ai/async_crawler_strategy.py +++ b/crawl4ai/async_crawler_strategy.py @@ -998,7 +998,9 @@ async def handle_request_failed_capture(request): if config.screenshot_wait_for: await asyncio.sleep(config.screenshot_wait_for) screenshot_data = await self.take_screenshot( - page, screenshot_height_threshold=config.screenshot_height_threshold + page, + screenshot_height_threshold=config.screenshot_height_threshold, + force_viewport_screenshot=config.force_viewport_screenshot ) if screenshot_data or pdf_data or mhtml_data: @@ -1536,6 +1538,13 @@ async def take_screenshot(self, page, **kwargs) -> str: Returns: str: The base64-encoded screenshot data """ + # Check if viewport-only screenshot is forced + force_viewport = kwargs.get('force_viewport_screenshot', False) + + if force_viewport: + # Use viewport-only screenshot + return await self.take_screenshot_naive(page) + need_scroll = await self.page_need_scroll(page) if not need_scroll: