Skip to content

Commit c807f4d

Browse files
committed
add prohibited domains
1 parent b372bb7 commit c807f4d

File tree

3 files changed

+208
-43
lines changed

3 files changed

+208
-43
lines changed

browser_use/browser/profile.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,10 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
559559
default=None,
560560
description='List of allowed domains for navigation e.g. ["*.google.com", "https://example.com", "chrome-extension://*"]',
561561
)
562+
prohibited_domains: list[str] | None = Field(
563+
default=None,
564+
description='List of prohibited domains for navigation e.g. ["*.google.com", "https://example.com", "chrome-extension://*"]',
565+
)
562566
keep_alive: bool | None = Field(default=None, description='Keep browser alive after agent run.')
563567

564568
# --- Proxy settings ---

browser_use/browser/watchdogs/security_watchdog.py

Lines changed: 58 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,6 @@ def _is_url_allowed(self, url: str) -> bool:
127127
Returns:
128128
True if the URL is allowed, False otherwise
129129
"""
130-
# If no allowed_domains specified, allow all URLs
131-
if not self.browser_session.browser_profile.allowed_domains:
132-
return True
133130

134131
# Always allow internal browser targets
135132
if url in ['about:blank', 'chrome://new-tab-page/', 'chrome://new-tab-page', 'chrome://newtab/']:
@@ -149,48 +146,66 @@ def _is_url_allowed(self, url: str) -> bool:
149146
if not host:
150147
return False
151148

152-
# Full URL for matching (scheme + host)
153-
full_url_pattern = f'{parsed.scheme}://{host}'
154-
155149
# Check each allowed domain pattern
156-
for pattern in self.browser_session.browser_profile.allowed_domains:
157-
# Handle glob patterns
158-
if '*' in pattern:
159-
self._log_glob_warning()
160-
import fnmatch
161-
162-
# Check if pattern matches the host
163-
if pattern.startswith('*.'):
164-
# Pattern like *.example.com should match subdomains and main domain
165-
domain_part = pattern[2:] # Remove *.
166-
if host == domain_part or host.endswith('.' + domain_part):
167-
# Only match http/https URLs for domain-only patterns
168-
if parsed.scheme in ['http', 'https']:
169-
return True
170-
elif pattern.endswith('/*'):
171-
# Pattern like brave://* should match any brave:// URL
172-
prefix = pattern[:-1] # Remove the * at the end
173-
if url.startswith(prefix):
174-
return True
175-
else:
176-
# Use fnmatch for other glob patterns
177-
if fnmatch.fnmatch(
178-
full_url_pattern if '://' in pattern else host,
179-
pattern,
180-
):
150+
if self.browser_session.browser_profile.allowed_domains:
151+
for pattern in self.browser_session.browser_profile.allowed_domains:
152+
if self._is_url_match(url, host, parsed.scheme, pattern):
153+
return True
154+
155+
return False
156+
157+
if self.browser_session.browser_profile.prohibited_domains:
158+
for pattern in self.browser_session.browser_profile.prohibited_domains:
159+
if self._is_url_match(url, host, parsed.scheme, pattern):
160+
return False
161+
162+
return True
163+
164+
return True
165+
166+
def _is_url_match(self, url: str, host: str, scheme: str, pattern: str) -> bool:
167+
"""Check if a URL matches a pattern."""
168+
169+
# Full URL for matching (scheme + host)
170+
full_url_pattern = f'{scheme}://{host}'
171+
172+
# Handle glob patterns
173+
if '*' in pattern:
174+
self._log_glob_warning()
175+
import fnmatch
176+
177+
# Check if pattern matches the host
178+
if pattern.startswith('*.'):
179+
# Pattern like *.example.com should match subdomains and main domain
180+
domain_part = pattern[2:] # Remove *.
181+
if host == domain_part or host.endswith('.' + domain_part):
182+
# Only match http/https URLs for domain-only patterns
183+
if scheme in ['http', 'https']:
181184
return True
185+
elif pattern.endswith('/*'):
186+
# Pattern like brave://* should match any brave:// URL
187+
prefix = pattern[:-1] # Remove the * at the end
188+
if url.startswith(prefix):
189+
return True
182190
else:
183-
# Exact match
184-
if '://' in pattern:
185-
# Full URL pattern
186-
if url.startswith(pattern):
187-
return True
188-
else:
189-
# Domain-only pattern (case-insensitive comparison)
190-
if host.lower() == pattern.lower():
191-
return True
192-
# If pattern is a root domain, also check www subdomain
193-
if self._is_root_domain(pattern) and host.lower() == f'www.{pattern.lower()}':
194-
return True
191+
# Use fnmatch for other glob patterns
192+
if fnmatch.fnmatch(
193+
full_url_pattern if '://' in pattern else host,
194+
pattern,
195+
):
196+
return True
197+
else:
198+
# Exact match
199+
if '://' in pattern:
200+
# Full URL pattern
201+
if url.startswith(pattern):
202+
return True
203+
else:
204+
# Domain-only pattern (case-insensitive comparison)
205+
if host.lower() == pattern.lower():
206+
return True
207+
# If pattern is a root domain, also check www subdomain
208+
if self._is_root_domain(pattern) and host.lower() == f'www.{pattern.lower()}':
209+
return True
195210

196211
return False

tests/ci/test_browser_watchdog_security2.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,3 +287,149 @@ def test_is_root_domain_helper(self):
287287
# Invalid domains - should return False
288288
assert watchdog._is_root_domain('example') is False
289289
assert watchdog._is_root_domain('') is False
290+
291+
292+
class TestUrlProhibitlistSecurity:
293+
"""Tests for URL prohibitlist (blocked domains) behavior and matching semantics."""
294+
295+
def test_simple_prohibited_domains(self):
296+
"""Domain-only patterns block exact host and www, but not other subdomains."""
297+
from bubus import EventBus
298+
299+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
300+
301+
browser_profile = BrowserProfile(prohibited_domains=['example.com', 'test.org'], headless=True, user_data_dir=None)
302+
browser_session = BrowserSession(browser_profile=browser_profile)
303+
event_bus = EventBus()
304+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
305+
306+
# Block exact and www
307+
assert watchdog._is_url_allowed('https://example.com') is False
308+
assert watchdog._is_url_allowed('https://www.example.com') is False
309+
assert watchdog._is_url_allowed('https://test.org') is False
310+
assert watchdog._is_url_allowed('https://www.test.org') is False
311+
312+
# Allow other subdomains when only root is prohibited
313+
assert watchdog._is_url_allowed('https://mail.example.com') is True
314+
assert watchdog._is_url_allowed('https://api.test.org') is True
315+
316+
# Allow unrelated domains
317+
assert watchdog._is_url_allowed('https://notexample.com') is True
318+
319+
def test_glob_pattern_prohibited(self):
320+
"""Wildcard patterns block subdomains and main domain for http/https only."""
321+
from bubus import EventBus
322+
323+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
324+
325+
browser_profile = BrowserProfile(prohibited_domains=['*.example.com'], headless=True, user_data_dir=None)
326+
browser_session = BrowserSession(browser_profile=browser_profile)
327+
event_bus = EventBus()
328+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
329+
330+
# Block subdomains and main domain
331+
assert watchdog._is_url_allowed('https://example.com') is False
332+
assert watchdog._is_url_allowed('https://www.example.com') is False
333+
assert watchdog._is_url_allowed('https://mail.example.com') is False
334+
335+
# Allow other domains
336+
assert watchdog._is_url_allowed('https://notexample.com') is True
337+
338+
# Wildcard with domain-only should not apply to non-http(s)
339+
assert watchdog._is_url_allowed('chrome://abc.example.com') is True
340+
341+
def test_full_url_prohibited_patterns(self):
342+
"""Full URL patterns block only matching scheme/host/prefix."""
343+
from bubus import EventBus
344+
345+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
346+
347+
browser_profile = BrowserProfile(prohibited_domains=['https://wiki.org', 'brave://*'], headless=True, user_data_dir=None)
348+
browser_session = BrowserSession(browser_profile=browser_profile)
349+
event_bus = EventBus()
350+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
351+
352+
# Scheme-specific blocking
353+
assert watchdog._is_url_allowed('http://wiki.org') is True
354+
assert watchdog._is_url_allowed('https://wiki.org') is False
355+
assert watchdog._is_url_allowed('https://wiki.org/path') is False
356+
357+
# Internal URL prefix blocking
358+
assert watchdog._is_url_allowed('brave://anything/') is False
359+
assert watchdog._is_url_allowed('chrome://settings') is True
360+
361+
def test_internal_urls_allowed_even_when_prohibited(self):
362+
"""Internal new-tab/blank URLs are always allowed regardless of prohibited list."""
363+
from bubus import EventBus
364+
365+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
366+
367+
browser_profile = BrowserProfile(prohibited_domains=['*'], headless=True, user_data_dir=None)
368+
browser_session = BrowserSession(browser_profile=browser_profile)
369+
event_bus = EventBus()
370+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
371+
372+
assert watchdog._is_url_allowed('about:blank') is True
373+
assert watchdog._is_url_allowed('chrome://new-tab-page/') is True
374+
assert watchdog._is_url_allowed('chrome://new-tab-page') is True
375+
assert watchdog._is_url_allowed('chrome://newtab/') is True
376+
377+
def test_prohibited_ignored_when_allowlist_present(self):
378+
"""When allowlist is set, prohibited list is ignored by design."""
379+
from bubus import EventBus
380+
381+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
382+
383+
browser_profile = BrowserProfile(
384+
allowed_domains=['*.example.com'],
385+
prohibited_domains=['https://example.com'],
386+
headless=True,
387+
user_data_dir=None,
388+
)
389+
browser_session = BrowserSession(browser_profile=browser_profile)
390+
event_bus = EventBus()
391+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
392+
393+
# Allowed by allowlist even though exact URL is in prohibited list
394+
assert watchdog._is_url_allowed('https://example.com') is True
395+
assert watchdog._is_url_allowed('https://www.example.com') is True
396+
397+
# Not in allowlist => blocked (prohibited list is not consulted in this mode)
398+
assert watchdog._is_url_allowed('https://api.example.com') is True # wildcard allowlist includes this
399+
# A domain outside the allowlist should be blocked
400+
assert watchdog._is_url_allowed('https://notexample.com') is False
401+
402+
def test_auth_credentials_do_not_cause_false_block(self):
403+
"""Credentials injection with prohibited domain in username should not block unrelated hosts."""
404+
from bubus import EventBus
405+
406+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
407+
408+
browser_profile = BrowserProfile(prohibited_domains=['example.com'], headless=True, user_data_dir=None)
409+
browser_session = BrowserSession(browser_profile=browser_profile)
410+
event_bus = EventBus()
411+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
412+
413+
# Host is malicious.com, should not be blocked just because username contains example.com
414+
assert watchdog._is_url_allowed('https://example.com:[email protected]') is True
415+
assert watchdog._is_url_allowed('https://[email protected]') is True
416+
assert watchdog._is_url_allowed('https://example.com%[email protected]') is True
417+
assert watchdog._is_url_allowed('https://example.com%[email protected]') is True
418+
419+
# Legitimate credentials to a prohibited host should be blocked
420+
assert watchdog._is_url_allowed('https://user:[email protected]') is False
421+
422+
def test_case_insensitive_prohibited_domains(self):
423+
"""Prohibited domain matching should be case-insensitive."""
424+
from bubus import EventBus
425+
426+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
427+
428+
browser_profile = BrowserProfile(prohibited_domains=['Example.COM'], headless=True, user_data_dir=None)
429+
browser_session = BrowserSession(browser_profile=browser_profile)
430+
event_bus = EventBus()
431+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
432+
433+
assert watchdog._is_url_allowed('https://example.com') is False
434+
assert watchdog._is_url_allowed('https://WWW.EXAMPLE.COM') is False
435+
assert watchdog._is_url_allowed('https://mail.example.com') is True

0 commit comments

Comments
 (0)