Skip to content

Commit fd1e5fd

Browse files
authored
feat: automatically add www for allowed domains (#3101)
<!-- This is an auto-generated description by cubic. --> ## Summary by cubic Automatically allow the www subdomain when a root domain is listed in allowed_domains. This resolves flows that bounce between root and www (e.g., auth redirects) without broadening domain scope. - New Features - Added _is_root_domain helper (1 dot, no wildcard/protocol) to detect simple root domains. - _is_url_allowed now permits www.<domain> when the root domain is allowed. - Does not apply to multi-dot ccTLDs (e.g., example.co.uk), existing subdomains, wildcards (*.example.com), or full URL patterns. - Added tests covering root vs www, ccTLDs, subdomains, wildcards, and full URL patterns. <!-- End of auto-generated description by cubic. -->
2 parents b3231c5 + a1847d7 commit fd1e5fd

File tree

2 files changed

+176
-2
lines changed

2 files changed

+176
-2
lines changed

browser_use/browser/watchdogs/security_watchdog.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,24 @@ async def on_TabCreatedEvent(self, event: TabCreatedEvent) -> None:
9090
except Exception as e:
9191
self.logger.error(f'⛔️ Failed to close new tab with non-allowed URL: {type(e).__name__} {e}')
9292

93+
def _is_root_domain(self, domain: str) -> bool:
94+
"""Check if a domain is a root domain (no subdomain present).
95+
96+
Simple heuristic: only add www for domains with exactly 1 dot (domain.tld).
97+
For complex cases like country TLDs or subdomains, users should configure explicitly.
98+
99+
Args:
100+
domain: The domain to check
101+
102+
Returns:
103+
True if it's a simple root domain, False otherwise
104+
"""
105+
# Skip if it contains wildcards or protocol
106+
if '*' in domain or '://' in domain:
107+
return False
108+
109+
return domain.count('.') == 1
110+
93111
def _log_glob_warning(self) -> None:
94112
"""Log a warning about glob patterns in allowed_domains."""
95113
global _GLOB_WARNING_SHOWN
@@ -168,8 +186,11 @@ def _is_url_allowed(self, url: str) -> bool:
168186
if url.startswith(pattern):
169187
return True
170188
else:
171-
# Domain-only pattern
172-
if host == pattern:
189+
# Domain-only pattern (case-insensitive comparison)
190+
if host.lower() == pattern.lower():
191+
return True
192+
# If pattern is a root domain, also check www subdomain
193+
if self._is_root_domain(pattern) and host.lower() == f'www.{pattern.lower()}':
173194
return True
174195

175196
return False

tests/ci/test_browser_watchdog_security2.py

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,3 +134,156 @@ def test_glob_pattern_edge_cases(self):
134134
# Shouldn't match potentially malicious domains with a similar structure
135135
# This demonstrates why the previous pattern was risky and why it's now rejected
136136
assert watchdog._is_url_allowed('https://www.google.evil.com') is False
137+
138+
def test_automatic_www_subdomain_addition(self):
139+
"""Test that root domains automatically allow www subdomain."""
140+
from bubus import EventBus
141+
142+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
143+
144+
# Test with simple root domains
145+
browser_profile = BrowserProfile(allowed_domains=['example.com', 'test.org'], headless=True, user_data_dir=None)
146+
browser_session = BrowserSession(browser_profile=browser_profile)
147+
event_bus = EventBus()
148+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
149+
150+
# Root domain should allow itself
151+
assert watchdog._is_url_allowed('https://example.com') is True
152+
assert watchdog._is_url_allowed('https://test.org') is True
153+
154+
# Root domain should automatically allow www subdomain
155+
assert watchdog._is_url_allowed('https://www.example.com') is True
156+
assert watchdog._is_url_allowed('https://www.test.org') is True
157+
158+
# Should not allow other subdomains
159+
assert watchdog._is_url_allowed('https://mail.example.com') is False
160+
assert watchdog._is_url_allowed('https://sub.test.org') is False
161+
162+
# Should not allow unrelated domains
163+
assert watchdog._is_url_allowed('https://notexample.com') is False
164+
assert watchdog._is_url_allowed('https://www.notexample.com') is False
165+
166+
def test_www_subdomain_not_added_for_country_tlds(self):
167+
"""Test www subdomain is NOT automatically added for country-specific TLDs (2+ dots)."""
168+
from bubus import EventBus
169+
170+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
171+
172+
# Test with country-specific TLDs - these should NOT get automatic www
173+
browser_profile = BrowserProfile(
174+
allowed_domains=['example.co.uk', 'test.com.au', 'site.co.jp'], headless=True, user_data_dir=None
175+
)
176+
browser_session = BrowserSession(browser_profile=browser_profile)
177+
event_bus = EventBus()
178+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
179+
180+
# Root domains should work exactly as specified
181+
assert watchdog._is_url_allowed('https://example.co.uk') is True
182+
assert watchdog._is_url_allowed('https://test.com.au') is True
183+
assert watchdog._is_url_allowed('https://site.co.jp') is True
184+
185+
# www subdomains should NOT work automatically (user must specify explicitly)
186+
assert watchdog._is_url_allowed('https://www.example.co.uk') is False
187+
assert watchdog._is_url_allowed('https://www.test.com.au') is False
188+
assert watchdog._is_url_allowed('https://www.site.co.jp') is False
189+
190+
# Other subdomains should not work
191+
assert watchdog._is_url_allowed('https://mail.example.co.uk') is False
192+
assert watchdog._is_url_allowed('https://api.test.com.au') is False
193+
194+
def test_www_subdomain_not_added_for_existing_subdomains(self):
195+
"""Test that www is not automatically added for domains that already have subdomains."""
196+
from bubus import EventBus
197+
198+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
199+
200+
# Test with existing subdomains - should NOT get automatic www
201+
browser_profile = BrowserProfile(allowed_domains=['mail.example.com', 'api.test.org'], headless=True, user_data_dir=None)
202+
browser_session = BrowserSession(browser_profile=browser_profile)
203+
event_bus = EventBus()
204+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
205+
206+
# Exact subdomain should work
207+
assert watchdog._is_url_allowed('https://mail.example.com') is True
208+
assert watchdog._is_url_allowed('https://api.test.org') is True
209+
210+
# www should NOT be automatically added to subdomains
211+
assert watchdog._is_url_allowed('https://www.mail.example.com') is False
212+
assert watchdog._is_url_allowed('https://www.api.test.org') is False
213+
214+
# Root domains should not work either
215+
assert watchdog._is_url_allowed('https://example.com') is False
216+
assert watchdog._is_url_allowed('https://test.org') is False
217+
218+
def test_www_subdomain_not_added_for_wildcard_patterns(self):
219+
"""Test that www is not automatically added for wildcard patterns."""
220+
from bubus import EventBus
221+
222+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
223+
224+
# Test with wildcard patterns - should NOT get automatic www logic
225+
browser_profile = BrowserProfile(allowed_domains=['*.example.com'], headless=True, user_data_dir=None)
226+
browser_session = BrowserSession(browser_profile=browser_profile)
227+
event_bus = EventBus()
228+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
229+
230+
# Wildcard should match everything including root and www
231+
assert watchdog._is_url_allowed('https://example.com') is True
232+
assert watchdog._is_url_allowed('https://www.example.com') is True
233+
assert watchdog._is_url_allowed('https://mail.example.com') is True
234+
235+
def test_www_subdomain_not_added_for_url_patterns(self):
236+
"""Test that www is not automatically added for full URL patterns."""
237+
from bubus import EventBus
238+
239+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
240+
241+
# Test with full URL patterns - should NOT get automatic www logic
242+
browser_profile = BrowserProfile(
243+
allowed_domains=['https://example.com', 'http://test.org'], headless=True, user_data_dir=None
244+
)
245+
browser_session = BrowserSession(browser_profile=browser_profile)
246+
event_bus = EventBus()
247+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
248+
249+
# Exact URL should work
250+
assert watchdog._is_url_allowed('https://example.com/path') is True
251+
assert watchdog._is_url_allowed('http://test.org/page') is True
252+
253+
# www should NOT be automatically added for full URL patterns
254+
assert watchdog._is_url_allowed('https://www.example.com') is False
255+
assert watchdog._is_url_allowed('http://www.test.org') is False
256+
257+
def test_is_root_domain_helper(self):
258+
"""Test the _is_root_domain helper method logic."""
259+
from bubus import EventBus
260+
261+
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
262+
263+
browser_profile = BrowserProfile(allowed_domains=['example.com'], headless=True, user_data_dir=None)
264+
browser_session = BrowserSession(browser_profile=browser_profile)
265+
event_bus = EventBus()
266+
watchdog = SecurityWatchdog(browser_session=browser_session, event_bus=event_bus)
267+
268+
# Simple root domains (1 dot) - should return True
269+
assert watchdog._is_root_domain('example.com') is True
270+
assert watchdog._is_root_domain('test.org') is True
271+
assert watchdog._is_root_domain('site.net') is True
272+
273+
# Subdomains (more than 1 dot) - should return False
274+
assert watchdog._is_root_domain('www.example.com') is False
275+
assert watchdog._is_root_domain('mail.example.com') is False
276+
assert watchdog._is_root_domain('example.co.uk') is False
277+
assert watchdog._is_root_domain('test.com.au') is False
278+
279+
# Wildcards - should return False
280+
assert watchdog._is_root_domain('*.example.com') is False
281+
assert watchdog._is_root_domain('*example.com') is False
282+
283+
# Full URLs - should return False
284+
assert watchdog._is_root_domain('https://example.com') is False
285+
assert watchdog._is_root_domain('http://test.org') is False
286+
287+
# Invalid domains - should return False
288+
assert watchdog._is_root_domain('example') is False
289+
assert watchdog._is_root_domain('') is False

0 commit comments

Comments
 (0)