Skip to content

Commit 122feb9

Browse files
authored
feat: Support Prohibited Domains (#3115)
This PR adds support for a list of prohibited domains. <!-- This is an auto-generated description by cubic. --> --- ## Summary by cubic Adds a prohibited_domains option to block navigation to specific domains and URL patterns in the SecurityWatchdog. Allowlist still takes precedence, and internal new-tab/blank pages remain allowed. - New Features - BrowserProfile: new prohibited_domains list. - SecurityWatchdog: uses allowlist if present; otherwise applies prohibitlist; defaults to allow when neither is set. - Pattern support: exact domains (case-insensitive, also blocks www), wildcard subdomains (*.domain.com for http/https), and full URL/prefix patterns (e.g., https://host, brave://*). Ignores credentials in the URL when matching the host. Always allows about:blank and Chrome new-tab pages. - Refactors - Extracted _is_url_match for shared pattern matching. - Added tests for precedence (allowlist over prohibitlist), wildcard and scheme rules, internal URL exceptions, and credential edge cases. <!-- End of auto-generated description by cubic. -->
2 parents b372bb7 + 48c96b6 commit 122feb9

File tree

3 files changed

+214
-41
lines changed

3 files changed

+214
-41
lines changed

browser_use/browser/profile.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,10 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
559559
default=None,
560560
description='List of allowed domains for navigation e.g. ["*.google.com", "https://example.com", "chrome-extension://*"]',
561561
)
562+
prohibited_domains: list[str] | None = Field(
563+
default=None,
564+
description='List of prohibited domains for navigation e.g. ["*.google.com", "https://example.com", "chrome-extension://*"]. Allowed domains take precedence over prohibited domains.',
565+
)
562566
keep_alive: bool | None = Field(default=None, description='Keep browser alive after agent run.')
563567

564568
# --- Proxy settings ---

browser_use/browser/watchdogs/security_watchdog.py

Lines changed: 64 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,12 @@ def _is_url_allowed(self, url: str) -> bool:
127127
Returns:
128128
True if the URL is allowed, False otherwise
129129
"""
130+
130131
# If no allowed_domains specified, allow all URLs
131-
if not self.browser_session.browser_profile.allowed_domains:
132+
if (
133+
not self.browser_session.browser_profile.allowed_domains
134+
and not self.browser_session.browser_profile.prohibited_domains
135+
):
132136
return True
133137

134138
# Always allow internal browser targets
@@ -149,48 +153,67 @@ def _is_url_allowed(self, url: str) -> bool:
149153
if not host:
150154
return False
151155

152-
# Full URL for matching (scheme + host)
153-
full_url_pattern = f'{parsed.scheme}://{host}'
154-
155156
# Check each allowed domain pattern
156-
for pattern in self.browser_session.browser_profile.allowed_domains:
157-
# Handle glob patterns
158-
if '*' in pattern:
159-
self._log_glob_warning()
160-
import fnmatch
161-
162-
# Check if pattern matches the host
163-
if pattern.startswith('*.'):
164-
# Pattern like *.example.com should match subdomains and main domain
165-
domain_part = pattern[2:] # Remove *.
166-
if host == domain_part or host.endswith('.' + domain_part):
167-
# Only match http/https URLs for domain-only patterns
168-
if parsed.scheme in ['http', 'https']:
169-
return True
170-
elif pattern.endswith('/*'):
171-
# Pattern like brave://* should match any brave:// URL
172-
prefix = pattern[:-1] # Remove the * at the end
173-
if url.startswith(prefix):
174-
return True
175-
else:
176-
# Use fnmatch for other glob patterns
177-
if fnmatch.fnmatch(
178-
full_url_pattern if '://' in pattern else host,
179-
pattern,
180-
):
157+
if self.browser_session.browser_profile.allowed_domains:
158+
for pattern in self.browser_session.browser_profile.allowed_domains:
159+
if self._is_url_match(url, host, parsed.scheme, pattern):
160+
return True
161+
162+
return False
163+
164+
# Check each prohibited domain pattern
165+
if self.browser_session.browser_profile.prohibited_domains:
166+
for pattern in self.browser_session.browser_profile.prohibited_domains:
167+
if self._is_url_match(url, host, parsed.scheme, pattern):
168+
return False
169+
170+
return True
171+
172+
return True
173+
174+
def _is_url_match(self, url: str, host: str, scheme: str, pattern: str) -> bool:
175+
"""Check if a URL matches a pattern."""
176+
177+
# Full URL for matching (scheme + host)
178+
full_url_pattern = f'{scheme}://{host}'
179+
180+
# Handle glob patterns
181+
if '*' in pattern:
182+
self._log_glob_warning()
183+
import fnmatch
184+
185+
# Check if pattern matches the host
186+
if pattern.startswith('*.'):
187+
# Pattern like *.example.com should match subdomains and main domain
188+
domain_part = pattern[2:] # Remove *.
189+
if host == domain_part or host.endswith('.' + domain_part):
190+
# Only match http/https URLs for domain-only patterns
191+
if scheme in ['http', 'https']:
181192
return True
193+
elif pattern.endswith('/*'):
194+
# Pattern like brave://* should match any brave:// URL
195+
prefix = pattern[:-1] # Remove the * at the end
196+
if url.startswith(prefix):
197+
return True
182198
else:
183-
# Exact match
184-
if '://' in pattern:
185-
# Full URL pattern
186-
if url.startswith(pattern):
187-
return True
188-
else:
189-
# Domain-only pattern (case-insensitive comparison)
190-
if host.lower() == pattern.lower():
191-
return True
192-
# If pattern is a root domain, also check www subdomain
193-
if self._is_root_domain(pattern) and host.lower() == f'www.{pattern.lower()}':
194-
return True
199+
# Use fnmatch for other glob patterns
200+
if fnmatch.fnmatch(
201+
full_url_pattern if '://' in pattern else host,
202+
pattern,
203+
):
204+
return True
205+
else:
206+
# Exact match
207+
if '://' in pattern:
208+
# Full URL pattern
209+
if url.startswith(pattern):
210+
return True
211+
else:
212+
# Domain-only pattern (case-insensitive comparison)
213+
if host.lower() == pattern.lower():
214+
return True
215+
# If pattern is a root domain, also check www subdomain
216+
if self._is_root_domain(pattern) and host.lower() == f'www.{pattern.lower()}':
217+
return True
195218

196219
return False

tests/ci/test_browser_watchdog_security2.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,3 +287,149 @@ def test_is_root_domain_helper(self):
287287
# Invalid domains - should return False
288288
assert watchdog._is_root_domain('example') is False
289289
assert watchdog._is_root_domain('') is False
290+
291+
292+
class TestUrlProhibitlistSecurity:
293+
"""Tests for URL prohibitlist (blocked domains) behavior and matching semantics."""
294+
295+
def test_simple_prohibited_domains(self):
296+
"""Domain-only patterns block exact host and www, but not other subdomains."""
297+
from bubus import EventBus
298+
299+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
300+
301+
browser_profile = BrowserProfile(prohibited_domains=['example.com', 'test.org'], headless=True, user_data_dir=None)
302+
browser_session = BrowserSession(browser_profile=browser_profile)
303+
event_bus = EventBus()
304+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
305+
306+
# Block exact and www
307+
assert watchdog._is_url_allowed('https://example.com') is False
308+
assert watchdog._is_url_allowed('https://www.example.com') is False
309+
assert watchdog._is_url_allowed('https://test.org') is False
310+
assert watchdog._is_url_allowed('https://www.test.org') is False
311+
312+
# Allow other subdomains when only root is prohibited
313+
assert watchdog._is_url_allowed('https://mail.example.com') is True
314+
assert watchdog._is_url_allowed('https://api.test.org') is True
315+
316+
# Allow unrelated domains
317+
assert watchdog._is_url_allowed('https://notexample.com') is True
318+
319+
def test_glob_pattern_prohibited(self):
320+
"""Wildcard patterns block subdomains and main domain for http/https only."""
321+
from bubus import EventBus
322+
323+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
324+
325+
browser_profile = BrowserProfile(prohibited_domains=['*.example.com'], headless=True, user_data_dir=None)
326+
browser_session = BrowserSession(browser_profile=browser_profile)
327+
event_bus = EventBus()
328+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
329+
330+
# Block subdomains and main domain
331+
assert watchdog._is_url_allowed('https://example.com') is False
332+
assert watchdog._is_url_allowed('https://www.example.com') is False
333+
assert watchdog._is_url_allowed('https://mail.example.com') is False
334+
335+
# Allow other domains
336+
assert watchdog._is_url_allowed('https://notexample.com') is True
337+
338+
# Wildcard with domain-only should not apply to non-http(s)
339+
assert watchdog._is_url_allowed('chrome://abc.example.com') is True
340+
341+
def test_full_url_prohibited_patterns(self):
342+
"""Full URL patterns block only matching scheme/host/prefix."""
343+
from bubus import EventBus
344+
345+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
346+
347+
browser_profile = BrowserProfile(prohibited_domains=['https://wiki.org', 'brave://*'], headless=True, user_data_dir=None)
348+
browser_session = BrowserSession(browser_profile=browser_profile)
349+
event_bus = EventBus()
350+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
351+
352+
# Scheme-specific blocking
353+
assert watchdog._is_url_allowed('http://wiki.org') is True
354+
assert watchdog._is_url_allowed('https://wiki.org') is False
355+
assert watchdog._is_url_allowed('https://wiki.org/path') is False
356+
357+
# Internal URL prefix blocking
358+
assert watchdog._is_url_allowed('brave://anything/') is False
359+
assert watchdog._is_url_allowed('chrome://settings') is True
360+
361+
def test_internal_urls_allowed_even_when_prohibited(self):
362+
"""Internal new-tab/blank URLs are always allowed regardless of prohibited list."""
363+
from bubus import EventBus
364+
365+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
366+
367+
browser_profile = BrowserProfile(prohibited_domains=['*'], headless=True, user_data_dir=None)
368+
browser_session = BrowserSession(browser_profile=browser_profile)
369+
event_bus = EventBus()
370+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
371+
372+
assert watchdog._is_url_allowed('about:blank') is True
373+
assert watchdog._is_url_allowed('chrome://new-tab-page/') is True
374+
assert watchdog._is_url_allowed('chrome://new-tab-page') is True
375+
assert watchdog._is_url_allowed('chrome://newtab/') is True
376+
377+
def test_prohibited_ignored_when_allowlist_present(self):
378+
"""When allowlist is set, prohibited list is ignored by design."""
379+
from bubus import EventBus
380+
381+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
382+
383+
browser_profile = BrowserProfile(
384+
allowed_domains=['*.example.com'],
385+
prohibited_domains=['https://example.com'],
386+
headless=True,
387+
user_data_dir=None,
388+
)
389+
browser_session = BrowserSession(browser_profile=browser_profile)
390+
event_bus = EventBus()
391+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
392+
393+
# Allowed by allowlist even though exact URL is in prohibited list
394+
assert watchdog._is_url_allowed('https://example.com') is True
395+
assert watchdog._is_url_allowed('https://www.example.com') is True
396+
397+
# Not in allowlist => blocked (prohibited list is not consulted in this mode)
398+
assert watchdog._is_url_allowed('https://api.example.com') is True # wildcard allowlist includes this
399+
# A domain outside the allowlist should be blocked
400+
assert watchdog._is_url_allowed('https://notexample.com') is False
401+
402+
def test_auth_credentials_do_not_cause_false_block(self):
403+
"""Credentials injection with prohibited domain in username should not block unrelated hosts."""
404+
from bubus import EventBus
405+
406+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
407+
408+
browser_profile = BrowserProfile(prohibited_domains=['example.com'], headless=True, user_data_dir=None)
409+
browser_session = BrowserSession(browser_profile=browser_profile)
410+
event_bus = EventBus()
411+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
412+
413+
# Host is malicious.com, should not be blocked just because username contains example.com
414+
assert watchdog._is_url_allowed('https://example.com:[email protected]') is True
415+
assert watchdog._is_url_allowed('https://[email protected]') is True
416+
assert watchdog._is_url_allowed('https://example.com%[email protected]') is True
417+
assert watchdog._is_url_allowed('https://example.com%[email protected]') is True
418+
419+
# Legitimate credentials to a prohibited host should be blocked
420+
assert watchdog._is_url_allowed('https://user:[email protected]') is False
421+
422+
def test_case_insensitive_prohibited_domains(self):
423+
"""Prohibited domain matching should be case-insensitive."""
424+
from bubus import EventBus
425+
426+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
427+
428+
browser_profile = BrowserProfile(prohibited_domains=['Example.COM'], headless=True, user_data_dir=None)
429+
browser_session = BrowserSession(browser_profile=browser_profile)
430+
event_bus = EventBus()
431+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
432+
433+
assert watchdog._is_url_allowed('https://example.com') is False
434+
assert watchdog._is_url_allowed('https://WWW.EXAMPLE.COM') is False
435+
assert watchdog._is_url_allowed('https://mail.example.com') is True

0 commit comments

Comments
 (0)