Skip to content

Commit bb15a75

Browse files
authored
Add ad use demo: Instagram ad generator (#3049)
Ad-use demo to showcase the New ad-use demo for showcasing browser-use capabilities for creative automation (using Google's Nano Banana). Files are added to new examples folder examples/apps/. <!-- This is an auto-generated description by cubic. --> --- ## Summary by cubic Adds an example app that generates Instagram ads from any landing page using browser-use agents and Gemini image generation. This showcases creative automation and lives under examples/apps/ad-use. - New Features - Example app and README in examples/apps/ad-use with CLI and programmatic usage. - Agent visits the URL, extracts brand name/tagline/CTA/offers, takes a screenshot, and generates a 1:1 ad image. - Saves ad PNG, prompt, analysis, and landing page screenshot to output/. - Debug mode to watch the browser; simple API: create_ad_from_landing_page(url, debug=False). - Requires browser-use v0.7.4+ and GOOGLE_API_KEY. <!-- End of auto-generated description by cubic. -->
2 parents 5c99dab + 960e607 commit bb15a75

File tree

4 files changed

+293
-0
lines changed

4 files changed

+293
-0
lines changed
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Ad-Use
2+
3+
Automatically generate Instagram ads from any landing page using browser agents and Google's Nano Banana 🍌 image generation model.
4+
5+
[!CAUTION]
6+
This demo requires browser-use v0.7.4+.
7+
8+
https://github.com/user-attachments/assets/7fab54a9-b36b-4fba-ab98-a438f2b86b7e
9+
10+
## Features
11+
12+
1. Agent visits your target website
13+
2. Captures brand name, tagline, and key selling points
14+
3. Takes a clean screenshot for design reference
15+
4. Creates a scroll-stopping Instagram ad with 🍌
16+
17+
## Setup
18+
19+
Make sure the newest version of browser-use is installed (with screenshot functionality):
20+
```bash
21+
pip install -U browser-use
22+
```
23+
24+
Export your Gemini API key, get it from: [Google AI Studio](https://makersuite.google.com/app/apikey)
25+
```
26+
export GOOGLE_API_KEY='your-google-api-key-here'
27+
```
28+
29+
## Normal Usage
30+
31+
```bash
32+
# Basic - Generate ad from any website
33+
python ad_generator.py https://www.apple.com/iphone-16-pro/
34+
35+
# Debug Mode - See the browser in action
36+
python ad_generator.py https://www.apple.com/iphone-16-pro/ --debug
37+
```
38+
39+
## Programmatic Usage
40+
```python
41+
import asyncio
42+
from ad_generator import create_ad_from_landing_page
43+
44+
async def main():
45+
results = await create_ad_from_landing_page(
46+
url="https://your-landing-page.com",
47+
debug=False
48+
)
49+
print(f"Generated ads: {results}")
50+
51+
asyncio.run(main())
52+
```
53+
54+
## Output
55+
56+
Generated ads are saved in the `output/` directory with:
57+
- **PNG image files** (ad_style_timestamp.png) - Actual generated ads from Gemini 2.5 Flash Image
58+
- **Prompt files** (ad_style_timestamp_prompt.txt) - The prompts used for generation
59+
- **Landing page screenshots** for reference
60+
61+
## Source Code
62+
63+
Full implementation: [https://github.com/browser-use/browser-use/tree/main/examples/apps/ad-use](https://github.com/browser-use/browser-use/tree/main/examples/apps/ad-use)
Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
import argparse
2+
import asyncio
3+
import logging
4+
import os
5+
import subprocess
6+
import sys
7+
from datetime import datetime
8+
from pathlib import Path
9+
10+
11+
def setup_environment(debug: bool):
12+
if not debug:
13+
os.environ['BROWSER_USE_SETUP_LOGGING'] = 'false'
14+
os.environ['BROWSER_USE_LOGGING_LEVEL'] = 'critical'
15+
logging.getLogger().setLevel(logging.CRITICAL)
16+
else:
17+
os.environ['BROWSER_USE_SETUP_LOGGING'] = 'true'
18+
os.environ['BROWSER_USE_LOGGING_LEVEL'] = 'info'
19+
20+
21+
parser = argparse.ArgumentParser(description='Generate ads from landing pages using browser-use + 🍌')
22+
parser.add_argument('url', nargs='?', help='Landing page URL to analyze')
23+
parser.add_argument('--debug', action='store_true', default=False, help='Enable debug mode (show browser, verbose logs)')
24+
args = parser.parse_args()
25+
setup_environment(args.debug)
26+
27+
import aiofiles
28+
from google import genai
29+
from PIL import Image
30+
31+
from browser_use import Agent, BrowserSession
32+
from browser_use.llm.google import ChatGoogle
33+
34+
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
35+
36+
37+
class LandingPageAnalyzer:
38+
def __init__(self, debug: bool = False):
39+
self.debug = debug
40+
self.llm = ChatGoogle(model='gemini-2.0-flash-exp', api_key=GOOGLE_API_KEY)
41+
self.output_dir = Path('output')
42+
self.output_dir.mkdir(exist_ok=True)
43+
44+
async def analyze_landing_page(self, url: str) -> dict:
45+
browser_session = BrowserSession(
46+
headless=not self.debug, # headless=False only when debug=True
47+
disable_security=True,
48+
)
49+
50+
agent = Agent(
51+
task=f"""Go to {url} and quickly extract key brand information for Instagram ad creation.
52+
53+
Steps:
54+
1. Navigate to the website
55+
2. From the initial view, extract ONLY these essentials:
56+
- Brand/Product name
57+
- Main tagline or value proposition (one sentence)
58+
- Primary call-to-action text
59+
- Any visible pricing or special offer
60+
3. Scroll down half a page, twice (0.5 pages each) to check for any key info
61+
4. Done - keep it simple and focused on the brand
62+
63+
Return ONLY the key brand info, not page structure details.""",
64+
llm=self.llm,
65+
browser_session=browser_session,
66+
max_actions_per_step=2,
67+
step_timeout=30,
68+
use_thinking=False,
69+
vision_detail_level='high',
70+
)
71+
72+
screenshot_path = None
73+
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
74+
75+
async def screenshot_callback(agent_instance):
76+
nonlocal screenshot_path
77+
import asyncio
78+
79+
await asyncio.sleep(4)
80+
screenshot_path = self.output_dir / f'landing_page_{timestamp}.png'
81+
active_session = agent_instance.browser_session
82+
screenshot_data = await active_session.take_screenshot(path=str(screenshot_path), full_page=False)
83+
84+
import asyncio
85+
86+
screenshot_task = asyncio.create_task(screenshot_callback(agent))
87+
88+
history = await agent.run()
89+
90+
try:
91+
await screenshot_task
92+
except Exception as e:
93+
print(f'Screenshot task failed: {e}')
94+
95+
analysis = history.final_result()
96+
if not analysis:
97+
analysis = 'No analysis content extracted'
98+
99+
return {'url': url, 'analysis': analysis, 'screenshot_path': screenshot_path, 'timestamp': timestamp}
100+
101+
102+
class AdGenerator:
103+
def __init__(self, api_key: str | None = GOOGLE_API_KEY):
104+
if not api_key:
105+
raise ValueError('GOOGLE_API_KEY is missing or empty – set the environment variable or pass api_key explicitly')
106+
107+
self.client = genai.Client(api_key=api_key)
108+
self.output_dir = Path('output')
109+
self.output_dir.mkdir(exist_ok=True)
110+
111+
def create_ad_prompt(self, browser_analysis: str) -> str:
112+
prompt = f"""Create an Instagram ad for this brand:
113+
114+
{browser_analysis}
115+
116+
Create a vibrant, eye-catching Instagram ad image with:
117+
- Try to use the colors and style of the logo or brand, else:
118+
- Bold, modern gradient background with bright colors
119+
- Large, playful sans-serif text with the product/service name from the analysis
120+
- Trendy design elements: geometric shapes, sparkles, emojis
121+
- Fun bubbles or badges for any pricing or special offers mentioned
122+
- Call-to-action button with text from the analysis
123+
- Emphasizes the key value proposition from the analysis
124+
- Uses visual elements that match the brand personality
125+
- Square format (1:1 ratio)
126+
- Use color psychology to drive action
127+
128+
Style: Modern Instagram advertisement, (1:1), scroll-stopping, professional but playful, conversion-focused"""
129+
return prompt
130+
131+
async def generate_ad_image(self, prompt: str, screenshot_path: Path | None = None) -> bytes:
132+
"""Generate ad image bytes using Gemini. Returns *empty bytes* on failure."""
133+
134+
try:
135+
from typing import Any
136+
137+
contents: list[Any] = [prompt]
138+
139+
if screenshot_path and screenshot_path.exists():
140+
screenshot_prompt = (
141+
'\n\nHere is the actual landing page screenshot to reference for design inspiration, '
142+
'colors, layout, and visual style:'
143+
)
144+
145+
img = Image.open(screenshot_path)
146+
w, h = img.size
147+
side = min(w, h)
148+
img = img.crop(((w - side) // 2, (h - side) // 2, (w + side) // 2, (h + side) // 2))
149+
150+
contents = [prompt + screenshot_prompt, img]
151+
152+
response = self.client.models.generate_content(
153+
model='gemini-2.5-flash-image-preview',
154+
contents=contents,
155+
)
156+
157+
cand = getattr(response, 'candidates', None)
158+
if cand:
159+
for part in getattr(cand[0].content, 'parts', []):
160+
inline = getattr(part, 'inline_data', None)
161+
if inline:
162+
return inline.data
163+
164+
except Exception as e:
165+
print(f'❌ Image generation failed: {e}')
166+
167+
return b''
168+
169+
async def save_results(self, ad_image: bytes, prompt: str, analysis: str, url: str, timestamp: str) -> str:
170+
image_path = self.output_dir / f'ad_{timestamp}.png'
171+
async with aiofiles.open(image_path, 'wb') as f:
172+
await f.write(ad_image)
173+
174+
analysis_path = self.output_dir / f'analysis_{timestamp}.txt'
175+
async with aiofiles.open(analysis_path, 'w', encoding='utf-8') as f:
176+
await f.write(f'URL: {url}\n\n')
177+
await f.write('BROWSER-USE ANALYSIS:\n')
178+
await f.write(analysis)
179+
await f.write('\n\nGENERATED PROMPT:\n')
180+
await f.write(prompt)
181+
182+
return str(image_path)
183+
184+
185+
def open_image(image_path: str):
186+
"""Open image with default system viewer"""
187+
try:
188+
if sys.platform.startswith('darwin'):
189+
# macOS
190+
subprocess.run(['open', image_path], check=True)
191+
elif sys.platform.startswith('win'):
192+
# Windows
193+
os.startfile(image_path)
194+
else:
195+
# Linux
196+
subprocess.run(['xdg-open', image_path], check=True)
197+
except Exception as e:
198+
print(f'❌ Could not open image: {e}')
199+
200+
201+
async def create_ad_from_landing_page(url: str, debug: bool = False):
202+
analyzer = LandingPageAnalyzer(debug=debug)
203+
generator = AdGenerator()
204+
205+
try:
206+
print(f'πŸš€ Analyzing {url}...')
207+
page_data = await analyzer.analyze_landing_page(url)
208+
209+
prompt = generator.create_ad_prompt(page_data['analysis'])
210+
ad_image = await generator.generate_ad_image(prompt, page_data.get('screenshot_path'))
211+
result_path = await generator.save_results(ad_image, prompt, page_data['analysis'], url, page_data['timestamp'])
212+
213+
print(f'🎨 Generated ad: {result_path}')
214+
if page_data.get('screenshot_path'):
215+
print(f'πŸ“Έ Page screenshot: {page_data["screenshot_path"]}')
216+
open_image(result_path)
217+
218+
return result_path
219+
220+
except Exception as e:
221+
print(f'❌ Error: {e}')
222+
raise
223+
224+
225+
if __name__ == '__main__':
226+
url = args.url
227+
if not url:
228+
url = input('πŸ”— Enter URL: ').strip() or 'https://www.apple.com/iphone-17-pro/'
229+
230+
asyncio.run(create_ad_from_landing_page(url, debug=args.debug))
1.36 MB
Loading
1.45 MB
Loading

0 commit comments

Comments
Β (0)