Skip to content

Commit 3de7e34

Browse files
authored
Merge pull request #83 from vvincent1234/fix/own_browser_on_mac
Fix using own browser on mac
2 parents b00ebf5 + a234f0c commit 3de7e34

File tree

5 files changed

+458
-122
lines changed

5 files changed

+458
-122
lines changed

src/agent/custom_agent.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
import pdb
1010
import traceback
1111
from typing import Optional, Type
12+
from PIL import Image, ImageDraw, ImageFont
13+
import os
14+
import base64
15+
import io
1216

1317
from browser_use.agent.prompts import SystemPrompt
1418
from browser_use.agent.service import Agent
@@ -227,6 +231,119 @@ async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None:
227231
)
228232
if state:
229233
self._make_history_item(model_output, state, result)
234+
def create_history_gif(
235+
self,
236+
output_path: str = 'agent_history.gif',
237+
duration: int = 3000,
238+
show_goals: bool = True,
239+
show_task: bool = True,
240+
show_logo: bool = False,
241+
font_size: int = 40,
242+
title_font_size: int = 56,
243+
goal_font_size: int = 44,
244+
margin: int = 40,
245+
line_spacing: float = 1.5,
246+
) -> None:
247+
"""Create a GIF from the agent's history with overlaid task and goal text."""
248+
if not self.history.history:
249+
logger.warning('No history to create GIF from')
250+
return
251+
252+
images = []
253+
# if history is empty or first screenshot is None, we can't create a gif
254+
if not self.history.history or not self.history.history[0].state.screenshot:
255+
logger.warning('No history or first screenshot to create GIF from')
256+
return
257+
258+
# Try to load nicer fonts
259+
try:
260+
# Try different font options in order of preference
261+
font_options = ['Helvetica', 'Arial', 'DejaVuSans', 'Verdana']
262+
font_loaded = False
263+
264+
for font_name in font_options:
265+
try:
266+
import platform
267+
if platform.system() == "Windows":
268+
# Need to specify the abs font path on Windows
269+
font_name = os.path.join(os.getenv("WIN_FONT_DIR", "C:\\Windows\\Fonts"), font_name + ".ttf")
270+
regular_font = ImageFont.truetype(font_name, font_size)
271+
title_font = ImageFont.truetype(font_name, title_font_size)
272+
goal_font = ImageFont.truetype(font_name, goal_font_size)
273+
font_loaded = True
274+
break
275+
except OSError:
276+
continue
277+
278+
if not font_loaded:
279+
raise OSError('No preferred fonts found')
280+
281+
except OSError:
282+
regular_font = ImageFont.load_default()
283+
title_font = ImageFont.load_default()
284+
285+
goal_font = regular_font
286+
287+
# Load logo if requested
288+
logo = None
289+
if show_logo:
290+
try:
291+
logo = Image.open('./static/browser-use.png')
292+
# Resize logo to be small (e.g., 40px height)
293+
logo_height = 150
294+
aspect_ratio = logo.width / logo.height
295+
logo_width = int(logo_height * aspect_ratio)
296+
logo = logo.resize((logo_width, logo_height), Image.Resampling.LANCZOS)
297+
except Exception as e:
298+
logger.warning(f'Could not load logo: {e}')
299+
300+
# Create task frame if requested
301+
if show_task and self.task:
302+
task_frame = self._create_task_frame(
303+
self.task,
304+
self.history.history[0].state.screenshot,
305+
title_font,
306+
regular_font,
307+
logo,
308+
line_spacing,
309+
)
310+
images.append(task_frame)
311+
312+
# Process each history item
313+
for i, item in enumerate(self.history.history, 1):
314+
if not item.state.screenshot:
315+
continue
316+
317+
# Convert base64 screenshot to PIL Image
318+
img_data = base64.b64decode(item.state.screenshot)
319+
image = Image.open(io.BytesIO(img_data))
320+
321+
if show_goals and item.model_output:
322+
image = self._add_overlay_to_image(
323+
image=image,
324+
step_number=i,
325+
goal_text=item.model_output.current_state.thought,
326+
regular_font=regular_font,
327+
title_font=title_font,
328+
margin=margin,
329+
logo=logo,
330+
)
331+
332+
images.append(image)
333+
334+
if images:
335+
# Save the GIF
336+
images[0].save(
337+
output_path,
338+
save_all=True,
339+
append_images=images[1:],
340+
duration=duration,
341+
loop=0,
342+
optimize=False,
343+
)
344+
logger.info(f'Created GIF at {output_path}')
345+
else:
346+
logger.warning('No images found in history to create GIF')
230347

231348
async def run(self, max_steps: int = 100) -> AgentHistoryList:
232349
"""Execute the task with maximum number of steps"""
@@ -283,3 +400,6 @@ async def run(self, max_steps: int = 100) -> AgentHistoryList:
283400

284401
if not self.injected_browser and self.browser:
285402
await self.browser.close()
403+
404+
if self.generate_gif:
405+
self.create_history_gif()

src/browser/custom_browser.py

Lines changed: 104 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,16 @@
44
# @ProjectName: browser-use-webui
55
# @FileName: browser.py
66

7+
import asyncio
8+
9+
from playwright.async_api import Browser as PlaywrightBrowser
10+
from playwright.async_api import (
11+
BrowserContext as PlaywrightBrowserContext,
12+
)
13+
from playwright.async_api import (
14+
Playwright,
15+
async_playwright,
16+
)
717
from browser_use.browser.browser import Browser
818
from browser_use.browser.context import BrowserContext, BrowserContextConfig
919
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
@@ -15,36 +25,102 @@
1525
logger = logging.getLogger(__name__)
1626

1727
class CustomBrowser(Browser):
18-
_global_context = None
1928

2029
async def new_context(
2130
self,
22-
config: BrowserContextConfig = BrowserContextConfig(),
23-
context: PlaywrightBrowserContext = None,
31+
config: BrowserContextConfig = BrowserContextConfig()
2432
) -> CustomBrowserContext:
25-
"""Create a browser context with persistence support"""
26-
persistence_config = BrowserPersistenceConfig.from_env()
27-
28-
if persistence_config.persistent_session:
29-
if CustomBrowser._global_context is not None:
30-
logger.info("Reusing existing persistent browser context")
31-
return CustomBrowser._global_context
32-
33-
context_instance = CustomBrowserContext(config=config, browser=self, context=context)
34-
CustomBrowser._global_context = context_instance
35-
logger.info("Created new persistent browser context")
36-
return context_instance
37-
38-
logger.info("Creating non-persistent browser context")
39-
return CustomBrowserContext(config=config, browser=self, context=context)
40-
41-
async def close(self):
42-
"""Override close to respect persistence setting"""
43-
persistence_config = BrowserPersistenceConfig.from_env()
44-
if not persistence_config.persistent_session:
45-
if CustomBrowser._global_context is not None:
46-
await CustomBrowser._global_context.close()
47-
CustomBrowser._global_context = None
48-
await super().close()
33+
return CustomBrowserContext(config=config, browser=self)
34+
35+
async def _setup_browser(self, playwright: Playwright) -> PlaywrightBrowser:
36+
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
37+
if self.config.wss_url:
38+
browser = await playwright.chromium.connect(self.config.wss_url)
39+
return browser
40+
elif self.config.chrome_instance_path:
41+
import subprocess
42+
43+
import requests
44+
45+
try:
46+
# Check if browser is already running
47+
response = requests.get('http://localhost:9222/json/version', timeout=2)
48+
if response.status_code == 200:
49+
logger.info('Reusing existing Chrome instance')
50+
browser = await playwright.chromium.connect_over_cdp(
51+
endpoint_url='http://localhost:9222',
52+
timeout=20000, # 20 second timeout for connection
53+
)
54+
return browser
55+
except requests.ConnectionError:
56+
logger.debug('No existing Chrome instance found, starting a new one')
57+
58+
# Start a new Chrome instance
59+
subprocess.Popen(
60+
[
61+
self.config.chrome_instance_path,
62+
'--remote-debugging-port=9222',
63+
],
64+
stdout=subprocess.DEVNULL,
65+
stderr=subprocess.DEVNULL,
66+
)
67+
68+
# Attempt to connect again after starting a new instance
69+
for _ in range(10):
70+
try:
71+
response = requests.get('http://localhost:9222/json/version', timeout=2)
72+
if response.status_code == 200:
73+
break
74+
except requests.ConnectionError:
75+
pass
76+
await asyncio.sleep(1)
77+
78+
try:
79+
browser = await playwright.chromium.connect_over_cdp(
80+
endpoint_url='http://localhost:9222',
81+
timeout=20000, # 20 second timeout for connection
82+
)
83+
return browser
84+
except Exception as e:
85+
logger.error(f'Failed to start a new Chrome instance.: {str(e)}')
86+
raise RuntimeError(
87+
' To start chrome in Debug mode, you need to close all existing Chrome instances and try again otherwise we can not connect to the instance.'
88+
)
89+
4990
else:
50-
logger.info("Skipping browser close due to persistent session")
91+
try:
92+
disable_security_args = []
93+
if self.config.disable_security:
94+
disable_security_args = [
95+
'--disable-web-security',
96+
'--disable-site-isolation-trials',
97+
'--disable-features=IsolateOrigins,site-per-process',
98+
]
99+
100+
browser = await playwright.chromium.launch(
101+
headless=self.config.headless,
102+
args=[
103+
'--no-sandbox',
104+
'--disable-blink-features=AutomationControlled',
105+
'--disable-infobars',
106+
'--disable-background-timer-throttling',
107+
'--disable-popup-blocking',
108+
'--disable-backgrounding-occluded-windows',
109+
'--disable-renderer-backgrounding',
110+
'--disable-window-activation',
111+
'--disable-focus-on-load',
112+
'--no-first-run',
113+
'--no-default-browser-check',
114+
'--no-startup-window',
115+
'--window-position=0,0',
116+
# '--window-size=1280,1000',
117+
]
118+
+ disable_security_args
119+
+ self.config.extra_chromium_args,
120+
proxy=self.config.proxy,
121+
)
122+
123+
return browser
124+
except Exception as e:
125+
logger.error(f'Failed to initialize Playwright browser: {str(e)}')
126+
raise

src/browser/custom_context.py

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,17 @@ class CustomBrowserContext(BrowserContext):
2222
def __init__(
2323
self,
2424
browser: "Browser",
25-
config: BrowserContextConfig = BrowserContextConfig(),
26-
context: PlaywrightBrowserContext = None,
25+
config: BrowserContextConfig = BrowserContextConfig()
2726
):
2827
super(CustomBrowserContext, self).__init__(browser=browser, config=config)
29-
self.context = context
30-
self._persistence_config = BrowserPersistenceConfig.from_env()
3128

3229
async def _create_context(self, browser: PlaywrightBrowser) -> PlaywrightBrowserContext:
3330
"""Creates a new browser context with anti-detection measures and loads cookies if available."""
3431
# If we have a context, return it directly
35-
if self.context:
36-
return self.context
3732

3833
# Check if we should use existing context for persistence
39-
if self._persistence_config.persistent_session and len(browser.contexts) > 0:
40-
logger.info("Using existing persistent context")
34+
if self.browser.config.chrome_instance_path and len(browser.contexts) > 0:
35+
# Connect to existing Chrome instance instead of creating new one
4136
context = browser.contexts[0]
4237
else:
4338
# Original code for creating new context
@@ -99,8 +94,3 @@ async def _create_context(self, browser: PlaywrightBrowser) -> PlaywrightBrowser
9994
)
10095

10196
return context
102-
103-
async def close(self):
104-
"""Override close to respect persistence setting"""
105-
if not self._persistence_config.persistent_session:
106-
await super().close()

0 commit comments

Comments
 (0)