Skip to content

Commit 3336b9d

Browse files
committed
feat: Add enhanced screenshot API
1 parent d595221 commit 3336b9d

File tree

2 files changed

+159
-0
lines changed

2 files changed

+159
-0
lines changed

browser_use/browser/session.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2255,3 +2255,129 @@ async def cdp_client_for_node(self, node: EnhancedDOMTreeNode) -> CDPSession:
22552255
self.logger.debug(f'Failed to get CDP client for target {node.target_id}: {e}, using main session')
22562256

22572257
return await self.get_or_create_cdp_session()
2258+
2259+
async def take_screenshot(self, path: str | None = None, full_page: bool = False, format: str = 'png', quality: int | None = None, clip: dict | None = None) -> bytes:
2260+
"""Take a screenshot using CDP.
2261+
2262+
Args:
2263+
path: Optional file path to save screenshot
2264+
full_page: Capture entire scrollable page beyond viewport
2265+
format: Image format ('png', 'jpeg', 'webp')
2266+
quality: Quality 0-100 for JPEG format
2267+
clip: Region to capture {'x': int, 'y': int, 'width': int, 'height': int}
2268+
2269+
Returns:
2270+
Screenshot data as bytes
2271+
"""
2272+
import base64
2273+
from cdp_use.cdp.page import CaptureScreenshotParameters
2274+
2275+
cdp_session = await self.get_or_create_cdp_session()
2276+
2277+
# Build parameters dict explicitly to satisfy TypedDict expectations
2278+
params: CaptureScreenshotParameters = {
2279+
'format': format,
2280+
'captureBeyondViewport': full_page,
2281+
}
2282+
2283+
if quality is not None and format == 'jpeg':
2284+
params['quality'] = quality
2285+
2286+
if clip:
2287+
params['clip'] = {
2288+
'x': clip['x'],
2289+
'y': clip['y'],
2290+
'width': clip['width'],
2291+
'height': clip['height'],
2292+
'scale': 1,
2293+
}
2294+
2295+
params = CaptureScreenshotParameters(**params)
2296+
2297+
result = await cdp_session.cdp_client.send.Page.captureScreenshot(
2298+
params=params,
2299+
session_id=cdp_session.session_id
2300+
)
2301+
2302+
if not result or 'data' not in result:
2303+
raise Exception('Screenshot failed - no data returned')
2304+
2305+
screenshot_data = base64.b64decode(result['data'])
2306+
2307+
if path:
2308+
with open(path, 'wb') as f:
2309+
f.write(screenshot_data)
2310+
2311+
return screenshot_data
2312+
2313+
async def screenshot_element(
2314+
self,
2315+
selector: str,
2316+
path: str | None = None,
2317+
format: str = 'png',
2318+
quality: int | None = None,
2319+
) -> bytes:
2320+
"""Take a screenshot of a specific element.
2321+
2322+
Args:
2323+
selector: CSS selector for the element
2324+
path: Optional file path to save screenshot
2325+
format: Image format ('png', 'jpeg', 'webp')
2326+
quality: Quality 0-100 for JPEG format
2327+
2328+
Returns:
2329+
Screenshot data as bytes
2330+
"""
2331+
2332+
bounds = await self._get_element_bounds(selector)
2333+
if not bounds:
2334+
raise ValueError(f"Element '{selector}' not found or has no bounds")
2335+
2336+
return await self.take_screenshot(
2337+
path=path,
2338+
format=format,
2339+
quality=quality,
2340+
clip=bounds,
2341+
)
2342+
2343+
async def _get_element_bounds(self, selector: str) -> dict | None:
2344+
"""Get element bounding box using CDP."""
2345+
2346+
cdp_session = await self.get_or_create_cdp_session()
2347+
2348+
# Get document
2349+
doc = await cdp_session.cdp_client.send.DOM.getDocument(
2350+
params={'depth': 1},
2351+
session_id=cdp_session.session_id
2352+
)
2353+
2354+
# Query selector
2355+
node_result = await cdp_session.cdp_client.send.DOM.querySelector(
2356+
params={
2357+
'nodeId': doc['root']['nodeId'],
2358+
'selector': selector
2359+
},
2360+
session_id=cdp_session.session_id
2361+
)
2362+
2363+
node_id = node_result.get('nodeId')
2364+
if not node_id:
2365+
return None
2366+
2367+
# Get bounding box
2368+
box_result = await cdp_session.cdp_client.send.DOM.getBoxModel(
2369+
params={'nodeId': node_id},
2370+
session_id=cdp_session.session_id
2371+
)
2372+
2373+
box_model = box_result.get('model')
2374+
if not box_model:
2375+
return None
2376+
2377+
content = box_model['content']
2378+
return {
2379+
'x': min(content[0], content[2], content[4], content[6]),
2380+
'y': min(content[1], content[3], content[5], content[7]),
2381+
'width': max(content[0], content[2], content[4], content[6]) - min(content[0], content[2], content[4], content[6]),
2382+
'height': max(content[1], content[3], content[5], content[7]) - min(content[1], content[3], content[5], content[7])
2383+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import pytest
2+
3+
from browser_use.browser.events import NavigateToUrlEvent
4+
from browser_use.browser.profile import BrowserProfile
5+
from browser_use.browser.session import BrowserSession
6+
7+
8+
@pytest.fixture(scope="function")
9+
async def browser_session():
10+
session = BrowserSession(browser_profile=BrowserProfile(headless=True))
11+
await session.start()
12+
yield session
13+
await session.kill()
14+
15+
16+
@pytest.mark.asyncio
17+
async def test_basic_screenshots(browser_session: BrowserSession, httpserver):
18+
"""Navigate to a local page and ensure screenshot helpers return bytes."""
19+
20+
html = """
21+
<html><body><h1 id='title'>Hello</h1><p>Screenshot demo.</p></body></html>
22+
"""
23+
httpserver.expect_request("/demo").respond_with_data(html, content_type="text/html")
24+
url = httpserver.url_for("/demo")
25+
26+
nav = browser_session.event_bus.dispatch(NavigateToUrlEvent(url=url, new_tab=False))
27+
await nav
28+
29+
data = await browser_session.take_screenshot(full_page=False)
30+
assert data, "Viewport screenshot returned no data"
31+
32+
element = await browser_session.screenshot_element("h1")
33+
assert element, "Element screenshot returned no data"

0 commit comments

Comments
 (0)