Skip to content

Commit 960e607

Browse files
authored
Merge branch 'main' into add-ad-use-demo
2 parents 59d9c50 + 5c99dab commit 960e607

17 files changed

+403
-20
lines changed

browser_use/agent/service.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1312,7 +1312,7 @@ async def take_step(self, step_info: AgentStepInfo | None = None) -> tuple[bool,
13121312
Returns:
13131313
Tuple[bool, bool]: (is_done, is_valid)
13141314
"""
1315-
if len(self.history.history) == 0:
1315+
if step_info is not None and step_info.step_number == 0:
13161316
# First step
13171317
self._log_first_step_startup()
13181318
await self._execute_initial_actions()

browser_use/browser/session.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2255,3 +2255,125 @@ async def cdp_client_for_node(self, node: EnhancedDOMTreeNode) -> CDPSession:
22552255
self.logger.debug(f'Failed to get CDP client for target {node.target_id}: {e}, using main session')
22562256

22572257
return await self.get_or_create_cdp_session()
2258+
2259+
async def take_screenshot(
2260+
self,
2261+
path: str | None = None,
2262+
full_page: bool = False,
2263+
format: str = 'png',
2264+
quality: int | None = None,
2265+
clip: dict | None = None,
2266+
) -> bytes:
2267+
"""Take a screenshot using CDP.
2268+
2269+
Args:
2270+
path: Optional file path to save screenshot
2271+
full_page: Capture entire scrollable page beyond viewport
2272+
format: Image format ('png', 'jpeg', 'webp')
2273+
quality: Quality 0-100 for JPEG format
2274+
clip: Region to capture {'x': int, 'y': int, 'width': int, 'height': int}
2275+
2276+
Returns:
2277+
Screenshot data as bytes
2278+
"""
2279+
import base64
2280+
2281+
from cdp_use.cdp.page import CaptureScreenshotParameters
2282+
2283+
cdp_session = await self.get_or_create_cdp_session()
2284+
2285+
# Build parameters dict explicitly to satisfy TypedDict expectations
2286+
params: CaptureScreenshotParameters = {
2287+
'format': format,
2288+
'captureBeyondViewport': full_page,
2289+
}
2290+
2291+
if quality is not None and format == 'jpeg':
2292+
params['quality'] = quality
2293+
2294+
if clip:
2295+
params['clip'] = {
2296+
'x': clip['x'],
2297+
'y': clip['y'],
2298+
'width': clip['width'],
2299+
'height': clip['height'],
2300+
'scale': 1,
2301+
}
2302+
2303+
params = CaptureScreenshotParameters(**params)
2304+
2305+
result = await cdp_session.cdp_client.send.Page.captureScreenshot(params=params, session_id=cdp_session.session_id)
2306+
2307+
if not result or 'data' not in result:
2308+
raise Exception('Screenshot failed - no data returned')
2309+
2310+
screenshot_data = base64.b64decode(result['data'])
2311+
2312+
if path:
2313+
Path(path).write_bytes(screenshot_data)
2314+
2315+
return screenshot_data
2316+
2317+
async def screenshot_element(
2318+
self,
2319+
selector: str,
2320+
path: str | None = None,
2321+
format: str = 'png',
2322+
quality: int | None = None,
2323+
) -> bytes:
2324+
"""Take a screenshot of a specific element.
2325+
2326+
Args:
2327+
selector: CSS selector for the element
2328+
path: Optional file path to save screenshot
2329+
format: Image format ('png', 'jpeg', 'webp')
2330+
quality: Quality 0-100 for JPEG format
2331+
2332+
Returns:
2333+
Screenshot data as bytes
2334+
"""
2335+
2336+
bounds = await self._get_element_bounds(selector)
2337+
if not bounds:
2338+
raise ValueError(f"Element '{selector}' not found or has no bounds")
2339+
2340+
return await self.take_screenshot(
2341+
path=path,
2342+
format=format,
2343+
quality=quality,
2344+
clip=bounds,
2345+
)
2346+
2347+
async def _get_element_bounds(self, selector: str) -> dict | None:
2348+
"""Get element bounding box using CDP."""
2349+
2350+
cdp_session = await self.get_or_create_cdp_session()
2351+
2352+
# Get document
2353+
doc = await cdp_session.cdp_client.send.DOM.getDocument(params={'depth': 1}, session_id=cdp_session.session_id)
2354+
2355+
# Query selector
2356+
node_result = await cdp_session.cdp_client.send.DOM.querySelector(
2357+
params={'nodeId': doc['root']['nodeId'], 'selector': selector}, session_id=cdp_session.session_id
2358+
)
2359+
2360+
node_id = node_result.get('nodeId')
2361+
if not node_id:
2362+
return None
2363+
2364+
# Get bounding box
2365+
box_result = await cdp_session.cdp_client.send.DOM.getBoxModel(
2366+
params={'nodeId': node_id}, session_id=cdp_session.session_id
2367+
)
2368+
2369+
box_model = box_result.get('model')
2370+
if not box_model:
2371+
return None
2372+
2373+
content = box_model['content']
2374+
return {
2375+
'x': min(content[0], content[2], content[4], content[6]),
2376+
'y': min(content[1], content[3], content[5], content[7]),
2377+
'width': max(content[0], content[2], content[4], content[6]) - min(content[0], content[2], content[4], content[6]),
2378+
'height': max(content[1], content[3], content[5], content[7]) - min(content[1], content[3], content[5], content[7]),
2379+
}

browser_use/browser/watchdogs/downloads_watchdog.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
from browser_use.browser.events import (
1818
BrowserLaunchEvent,
19+
BrowserStateRequestEvent,
1920
BrowserStoppedEvent,
2021
FileDownloadedEvent,
2122
NavigationCompleteEvent,
@@ -34,6 +35,7 @@ class DownloadsWatchdog(BaseWatchdog):
3435
# Events this watchdog listens to (for documentation)
3536
LISTENS_TO: ClassVar[list[type[BaseEvent[Any]]]] = [
3637
BrowserLaunchEvent,
38+
BrowserStateRequestEvent,
3739
BrowserStoppedEvent,
3840
TabCreatedEvent,
3941
TabClosedEvent,
@@ -81,6 +83,26 @@ async def on_TabClosedEvent(self, event: TabClosedEvent) -> None:
8183
"""Stop monitoring closed tabs."""
8284
pass # No cleanup needed, browser context handles target lifecycle
8385

86+
async def on_BrowserStateRequestEvent(self, event: BrowserStateRequestEvent) -> None:
87+
"""Handle browser state request events."""
88+
cdp_session = self.browser_session.agent_focus
89+
if not cdp_session:
90+
return
91+
92+
url = await self.browser_session.get_current_page_url()
93+
if not url:
94+
return
95+
96+
target_id = cdp_session.target_id
97+
self.event_bus.dispatch(
98+
NavigationCompleteEvent(
99+
event_type='NavigationCompleteEvent',
100+
url=url,
101+
target_id=target_id,
102+
event_parent_id=event.event_id,
103+
)
104+
)
105+
84106
async def on_BrowserStoppedEvent(self, event: BrowserStoppedEvent) -> None:
85107
"""Clean up when browser stops."""
86108
# Cancel all CDP event handler tasks

browser_use/dom/serializer/serializer.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,6 @@ def _is_interactive_cached(self, node: EnhancedDOMTreeNode) -> bool:
127127
def _create_simplified_tree(self, node: EnhancedDOMTreeNode, depth: int = 0) -> SimplifiedNode | None:
128128
"""Step 1: Create a simplified tree with enhanced element detection."""
129129

130-
# Prevent infinite recursion by limiting depth to 30 levels
131-
if depth > 30:
132-
return None
133-
134130
if node.node_type == NodeType.DOCUMENT_NODE:
135131
# for all cldren including shadow roots
136132
for child in node.children_and_shadow_roots:
File renamed without changes.

docs/docs.json

Lines changed: 63 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,42 @@
7474
{
7575
"source": "/customize/examples/chain-agents",
7676
"destination": "/customize/examples/follow-up-tasks"
77+
},
78+
{
79+
"source": "/customize/examples/fast-agent",
80+
"destination": "/examples/templates/fast-agent"
81+
},
82+
{
83+
"source": "/customize/examples/follow-up-tasks",
84+
"destination": "/examples/templates/follow-up-tasks"
85+
},
86+
{
87+
"source": "/customize/examples/parallel-browser",
88+
"destination": "/examples/templates/parallel-browser"
89+
},
90+
{
91+
"source": "/customize/examples/playwright-integration",
92+
"destination": "/examples/templates/playwright-integration"
93+
},
94+
{
95+
"source": "/customize/examples/sensitive-data",
96+
"destination": "/examples/templates/sensitive-data"
97+
},
98+
{
99+
"source": "/customize/examples/secure",
100+
"destination": "/examples/templates/secure"
101+
},
102+
{
103+
"source": "/customize/examples/more-examples",
104+
"destination": "/examples/templates/more-examples"
105+
},
106+
{
107+
"source": "/customize/examples/ad-use",
108+
"destination": "/examples/apps/ad-use"
109+
},
110+
{
111+
"source": "/customize/examples/vibetest-use",
112+
"destination": "/examples/apps/vibetest-use"
77113
}
78114
],
79115
"navigation": {
@@ -99,6 +135,7 @@
99135
"pages": [
100136
"customize/agent/basics",
101137
"customize/agent/supported-models",
138+
"customize/agent/prompting-guide",
102139
"customize/agent/output-format",
103140
"customize/agent/all-parameters"
104141
]
@@ -126,20 +163,6 @@
126163
"customize/tools/response"
127164
]
128165
},
129-
{
130-
"group": "Examples",
131-
"icon": "folder-open",
132-
"pages": [
133-
"customize/examples/fast-agent",
134-
"customize/examples/follow-up-tasks",
135-
"customize/examples/parallel-browser",
136-
"customize/examples/playwright-integration",
137-
"customize/examples/sensitive-data",
138-
"customize/examples/secure",
139-
"customize/examples/more-examples",
140-
"customize/examples/prompting-guide"
141-
]
142-
},
143166
{
144167
"group": "Integration",
145168
"icon": "plug",
@@ -150,6 +173,32 @@
150173
}
151174
]
152175
},
176+
{
177+
"group": "Examples",
178+
"pages": [
179+
{
180+
"group": "Templates",
181+
"icon": "folder",
182+
"pages": [
183+
"examples/templates/fast-agent",
184+
"examples/templates/follow-up-tasks",
185+
"examples/templates/parallel-browser",
186+
"examples/templates/playwright-integration",
187+
"examples/templates/sensitive-data",
188+
"examples/templates/secure",
189+
"examples/templates/more-examples"
190+
]
191+
},
192+
{
193+
"group": "Apps",
194+
"icon": "box-open",
195+
"pages": [
196+
"examples/apps/ad-use",
197+
"examples/apps/vibetest-use"
198+
]
199+
}
200+
]
201+
},
153202
{
154203
"group": "Development",
155204
"pages": [

docs/examples/apps/ad-use.mdx

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
---
2+
title: "Ad-Use (Ad Generator)"
3+
description: "Generate Instagram ads from landing pages using browser agents and Google's Nano Banana 🍌."
4+
icon: "image"
5+
mode: "wide"
6+
---
7+
8+
<Note>
9+
This demo requires browser-use v0.7.6+.
10+
</Note>
11+
12+
<video
13+
controls
14+
className="w-full aspect-video rounded-xl"
15+
src="https://github.com/user-attachments/assets/7fab54a9-b36b-4fba-ab98-a438f2b86b7e">
16+
</video>
17+
18+
## Features
19+
20+
1. Agent visits your target website
21+
2. Captures brand name, tagline, and key selling points
22+
3. Takes a clean screenshot for design reference
23+
4. Creates a scroll-stopping Instagram ad with 🍌
24+
25+
## Setup
26+
27+
Make sure the newest version of browser-use is installed (with screenshot functionality):
28+
```bash
29+
pip install -U browser-use
30+
```
31+
32+
Export your Gemini API key, get it from: [Google AI Studio](https://makersuite.google.com/app/apikey)
33+
```
34+
export GOOGLE_API_KEY='your-google-api-key-here'
35+
```
36+
37+
## Normal Usage
38+
39+
```bash
40+
# Basic - Generate ad from any website
41+
python ad_generator.py https://www.apple.com/iphone-16-pro/
42+
43+
# Debug Mode - See the browser in action
44+
python ad_generator.py https://www.apple.com/iphone-16-pro/ --debug
45+
```
46+
47+
## Programmatic Usage
48+
```python
49+
import asyncio
50+
from ad_generator import create_ad_from_landing_page
51+
52+
async def main():
53+
results = await create_ad_from_landing_page(
54+
url="https://your-landing-page.com",
55+
debug=False
56+
)
57+
print(f"Generated ads: {results}")
58+
59+
asyncio.run(main())
60+
```
61+
62+
## Output
63+
64+
Generated ads are saved in the `output/` directory with:
65+
- **PNG image files** (ad_style_timestamp.png) - Actual generated ads from Gemini 2.5 Flash Image
66+
- **Prompt files** (ad_style_timestamp_prompt.txt) - The prompts used for generation
67+
- **Landing page screenshots** for reference
68+
69+
## Source Code
70+
71+
Full implementation: [https://github.com/browser-use/browser-use/tree/main/examples/apps/ad-use](https://github.com/browser-use/browser-use/tree/main/examples/apps/ad-use)

0 commit comments

Comments
 (0)