Skip to content

Commit 5f6400d

Browse files
authored
feat: Add enhanced screenshot API (#3012)
Currently (I think) theres only one way to take a screenshot (via events): ``` screenshot_event = browser_session.event_bus.dispatch(ScreenshotEvent(full_page=True)) await screenshot_event screenshot_b64 = await screenshot_event.event_result() ``` This is much easier: ``` screenshot_data = await browser_session.take_screenshot(full_page=True) ``` Also I need this for demos plz. <!-- This is an auto-generated description by cubic. --> --- ## Summary by cubic Adds a simple screenshot API to BrowserSession, replacing the event‑bus flow. Supports full-page, region, and element captures, with optional file save and format control. - **New Features** - BrowserSession.take_screenshot(path=None, full_page=False, format='png', quality=None, clip=None) returns bytes; supports png/jpeg/webp, JPEG quality, optional clip, and saving to disk. - BrowserSession.screenshot_element(selector, path=None, format='png', quality=None) captures a CSS-selected element by auto-computing bounds. - CI test verifies viewport and element screenshots return data. <!-- End of auto-generated description by cubic. -->
2 parents ebb017a + a1db06f commit 5f6400d

File tree

2 files changed

+155
-0
lines changed

2 files changed

+155
-0
lines changed

browser_use/browser/session.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2255,3 +2255,125 @@ async def cdp_client_for_node(self, node: EnhancedDOMTreeNode) -> CDPSession:
22552255
self.logger.debug(f'Failed to get CDP client for target {node.target_id}: {e}, using main session')
22562256

22572257
return await self.get_or_create_cdp_session()
2258+
2259+
async def take_screenshot(
2260+
self,
2261+
path: str | None = None,
2262+
full_page: bool = False,
2263+
format: str = 'png',
2264+
quality: int | None = None,
2265+
clip: dict | None = None,
2266+
) -> bytes:
2267+
"""Take a screenshot using CDP.
2268+
2269+
Args:
2270+
path: Optional file path to save screenshot
2271+
full_page: Capture entire scrollable page beyond viewport
2272+
format: Image format ('png', 'jpeg', 'webp')
2273+
quality: Quality 0-100 for JPEG format
2274+
clip: Region to capture {'x': int, 'y': int, 'width': int, 'height': int}
2275+
2276+
Returns:
2277+
Screenshot data as bytes
2278+
"""
2279+
import base64
2280+
2281+
from cdp_use.cdp.page import CaptureScreenshotParameters
2282+
2283+
cdp_session = await self.get_or_create_cdp_session()
2284+
2285+
# Build parameters dict explicitly to satisfy TypedDict expectations
2286+
params: CaptureScreenshotParameters = {
2287+
'format': format,
2288+
'captureBeyondViewport': full_page,
2289+
}
2290+
2291+
if quality is not None and format == 'jpeg':
2292+
params['quality'] = quality
2293+
2294+
if clip:
2295+
params['clip'] = {
2296+
'x': clip['x'],
2297+
'y': clip['y'],
2298+
'width': clip['width'],
2299+
'height': clip['height'],
2300+
'scale': 1,
2301+
}
2302+
2303+
params = CaptureScreenshotParameters(**params)
2304+
2305+
result = await cdp_session.cdp_client.send.Page.captureScreenshot(params=params, session_id=cdp_session.session_id)
2306+
2307+
if not result or 'data' not in result:
2308+
raise Exception('Screenshot failed - no data returned')
2309+
2310+
screenshot_data = base64.b64decode(result['data'])
2311+
2312+
if path:
2313+
Path(path).write_bytes(screenshot_data)
2314+
2315+
return screenshot_data
2316+
2317+
async def screenshot_element(
2318+
self,
2319+
selector: str,
2320+
path: str | None = None,
2321+
format: str = 'png',
2322+
quality: int | None = None,
2323+
) -> bytes:
2324+
"""Take a screenshot of a specific element.
2325+
2326+
Args:
2327+
selector: CSS selector for the element
2328+
path: Optional file path to save screenshot
2329+
format: Image format ('png', 'jpeg', 'webp')
2330+
quality: Quality 0-100 for JPEG format
2331+
2332+
Returns:
2333+
Screenshot data as bytes
2334+
"""
2335+
2336+
bounds = await self._get_element_bounds(selector)
2337+
if not bounds:
2338+
raise ValueError(f"Element '{selector}' not found or has no bounds")
2339+
2340+
return await self.take_screenshot(
2341+
path=path,
2342+
format=format,
2343+
quality=quality,
2344+
clip=bounds,
2345+
)
2346+
2347+
async def _get_element_bounds(self, selector: str) -> dict | None:
2348+
"""Get element bounding box using CDP."""
2349+
2350+
cdp_session = await self.get_or_create_cdp_session()
2351+
2352+
# Get document
2353+
doc = await cdp_session.cdp_client.send.DOM.getDocument(params={'depth': 1}, session_id=cdp_session.session_id)
2354+
2355+
# Query selector
2356+
node_result = await cdp_session.cdp_client.send.DOM.querySelector(
2357+
params={'nodeId': doc['root']['nodeId'], 'selector': selector}, session_id=cdp_session.session_id
2358+
)
2359+
2360+
node_id = node_result.get('nodeId')
2361+
if not node_id:
2362+
return None
2363+
2364+
# Get bounding box
2365+
box_result = await cdp_session.cdp_client.send.DOM.getBoxModel(
2366+
params={'nodeId': node_id}, session_id=cdp_session.session_id
2367+
)
2368+
2369+
box_model = box_result.get('model')
2370+
if not box_model:
2371+
return None
2372+
2373+
content = box_model['content']
2374+
return {
2375+
'x': min(content[0], content[2], content[4], content[6]),
2376+
'y': min(content[1], content[3], content[5], content[7]),
2377+
'width': max(content[0], content[2], content[4], content[6]) - min(content[0], content[2], content[4], content[6]),
2378+
'height': max(content[1], content[3], content[5], content[7]) - min(content[1], content[3], content[5], content[7]),
2379+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import pytest
2+
3+
from browser_use.browser.events import NavigateToUrlEvent
4+
from browser_use.browser.profile import BrowserProfile
5+
from browser_use.browser.session import BrowserSession
6+
7+
8+
@pytest.fixture(scope='function')
9+
async def browser_session():
10+
session = BrowserSession(browser_profile=BrowserProfile(headless=True))
11+
await session.start()
12+
yield session
13+
await session.kill()
14+
15+
16+
@pytest.mark.asyncio
17+
async def test_basic_screenshots(browser_session: BrowserSession, httpserver):
18+
"""Navigate to a local page and ensure screenshot helpers return bytes."""
19+
20+
html = """
21+
<html><body><h1 id='title'>Hello</h1><p>Screenshot demo.</p></body></html>
22+
"""
23+
httpserver.expect_request('/demo').respond_with_data(html, content_type='text/html')
24+
url = httpserver.url_for('/demo')
25+
26+
nav = browser_session.event_bus.dispatch(NavigateToUrlEvent(url=url, new_tab=False))
27+
await nav
28+
29+
data = await browser_session.take_screenshot(full_page=False)
30+
assert data, 'Viewport screenshot returned no data'
31+
32+
element = await browser_session.screenshot_element('h1')
33+
assert element, 'Element screenshot returned no data'

0 commit comments

Comments
 (0)