|
9 | 9 | from random import randint |
10 | 10 | from tempfile import TemporaryDirectory |
11 | 11 | from typing import Any, Awaitable, Callable, Optional, overload |
| 12 | +from urllib.parse import urlsplit, urlunsplit |
12 | 13 |
|
13 | 14 | from pydoll.browser.interfaces import BrowserOptionsManager |
14 | 15 | from pydoll.browser.managers import ( |
@@ -93,6 +94,7 @@ def __init__( |
93 | 94 | self._connection_handler = ConnectionHandler(self._connection_port) |
94 | 95 | self._backup_preferences_dir = '' |
95 | 96 | self._tabs_opened: dict[str, Tab] = {} |
| 97 | + self._context_proxy_auth: dict[str, tuple[str, str]] = {} |
96 | 98 |
|
97 | 99 | async def __aenter__(self) -> 'Browser': |
98 | 100 | """Async context manager entry.""" |
@@ -203,13 +205,22 @@ async def create_browser_context( |
203 | 205 | Returns: |
204 | 206 | Browser context ID for use with other methods. |
205 | 207 | """ |
| 208 | + # If proxy_server contains credentials, strip them and store per-context auth |
| 209 | + sanitized_proxy = proxy_server |
| 210 | + extracted_auth: Optional[tuple[str, str]] = None |
| 211 | + if proxy_server: |
| 212 | + sanitized_proxy, extracted_auth = self._sanitize_proxy_and_extract_auth(proxy_server) |
| 213 | + |
206 | 214 | response: CreateBrowserContextResponse = await self._execute_command( |
207 | 215 | TargetCommands.create_browser_context( |
208 | | - proxy_server=proxy_server, |
| 216 | + proxy_server=sanitized_proxy, |
209 | 217 | proxy_bypass_list=proxy_bypass_list, |
210 | 218 | ) |
211 | 219 | ) |
212 | | - return response['result']['browserContextId'] |
| 220 | + context_id = response['result']['browserContextId'] |
| 221 | + if extracted_auth: |
| 222 | + self._context_proxy_auth[context_id] = extracted_auth |
| 223 | + return context_id |
213 | 224 |
|
214 | 225 | async def delete_browser_context(self, browser_context_id: str): |
215 | 226 | """ |
@@ -251,8 +262,8 @@ async def new_tab(self, url: str = '', browser_context_id: Optional[str] = None) |
251 | 262 | target_id = response['result']['targetId'] |
252 | 263 | tab = Tab(self, **self._get_tab_kwargs(target_id, browser_context_id)) |
253 | 264 | self._tabs_opened[target_id] = tab |
254 | | - if url: |
255 | | - await tab.go_to(url) |
| 265 | + await self._setup_context_proxy_auth_for_tab(tab, browser_context_id) |
| 266 | + if url: await tab.go_to(url) |
256 | 267 | return tab |
257 | 268 |
|
258 | 269 | async def get_targets(self) -> list[TargetInfo]: |
@@ -577,6 +588,60 @@ async def _continue_request_with_auth_callback( |
577 | 588 | await self.disable_fetch_events() |
578 | 589 | return response |
579 | 590 |
|
| 591 | + @staticmethod |
| 592 | + async def _tab_continue_request_callback(event: RequestPausedEvent, tab: Tab): |
| 593 | + """Internal callback to continue paused requests at Tab level.""" |
| 594 | + request_id = event['params']['requestId'] |
| 595 | + return await tab.continue_request(request_id) |
| 596 | + |
| 597 | + @staticmethod |
| 598 | + async def _tab_continue_request_with_auth_callback( |
| 599 | + event: RequestPausedEvent, |
| 600 | + tab: Tab, |
| 601 | + proxy_username: Optional[str], |
| 602 | + proxy_password: Optional[str], |
| 603 | + ): |
| 604 | + """Internal callback for proxy/server authentication at Tab level.""" |
| 605 | + request_id = event['params']['requestId'] |
| 606 | + response: Response = await tab.continue_with_auth( |
| 607 | + request_id=request_id, |
| 608 | + auth_challenge_response=AuthChallengeResponseType.PROVIDE_CREDENTIALS, |
| 609 | + proxy_username=proxy_username, |
| 610 | + proxy_password=proxy_password, |
| 611 | + ) |
| 612 | + await tab.disable_fetch_events() |
| 613 | + return response |
| 614 | + |
| 615 | + async def _setup_context_proxy_auth_for_tab( |
| 616 | + self, tab: Tab, browser_context_id: Optional[str] |
| 617 | + ) -> None: |
| 618 | + """Enable proxy auth handling for a Tab if its context has credentials stored.""" |
| 619 | + if not browser_context_id: |
| 620 | + return |
| 621 | + creds = self._context_proxy_auth.get(browser_context_id) |
| 622 | + if not creds: |
| 623 | + return |
| 624 | + username, password = creds |
| 625 | + await tab.enable_fetch_events(handle_auth=True) |
| 626 | + await tab.on( |
| 627 | + FetchEvent.REQUEST_PAUSED, |
| 628 | + partial( |
| 629 | + self._tab_continue_request_callback, |
| 630 | + tab=tab, |
| 631 | + ), |
| 632 | + temporary=True, |
| 633 | + ) |
| 634 | + await tab.on( |
| 635 | + FetchEvent.AUTH_REQUIRED, |
| 636 | + partial( |
| 637 | + self._tab_continue_request_with_auth_callback, |
| 638 | + tab=tab, |
| 639 | + proxy_username=username, |
| 640 | + proxy_password=password, |
| 641 | + ), |
| 642 | + temporary=True, |
| 643 | + ) |
| 644 | + |
580 | 645 | async def _verify_browser_running(self): |
581 | 646 | """ |
582 | 647 | Verify browser started successfully. |
@@ -763,6 +828,49 @@ def _get_tab_ws_address(self, tab_id: str) -> str: |
763 | 828 | ws_domain = '/'.join(self._ws_address.split('/')[:3]) |
764 | 829 | return f'{ws_domain}/devtools/page/{tab_id}' |
765 | 830 |
|
| 831 | + @staticmethod |
| 832 | + def _sanitize_proxy_and_extract_auth( |
| 833 | + proxy_server: str, |
| 834 | + ) -> tuple[str, Optional[tuple[str, str]]]: |
| 835 | + """Strip credentials from a proxy URL and return sanitized URL plus (user, pass). |
| 836 | +
|
| 837 | + Accepts inputs like: |
| 838 | + - username:password@host:port |
| 839 | + - http://username:password@host:port |
| 840 | + - socks5://username:password@host:port |
| 841 | + - host:port (no credentials) |
| 842 | + Returns a (sanitized_proxy, (user, pass) | None). |
| 843 | + Ensures scheme is present in the sanitized URL (defaults to http). |
| 844 | + """ |
| 845 | + base = proxy_server if '://' in proxy_server else f'http://{proxy_server}' |
| 846 | + parts = urlsplit(base) |
| 847 | + netloc = parts.netloc |
| 848 | + creds: Optional[tuple[str, str]] = None |
| 849 | + if '@' in netloc: |
| 850 | + cred_part, host_part = netloc.split('@', 1) |
| 851 | + if ':' in cred_part: |
| 852 | + user, pwd = cred_part.split(':', 1) |
| 853 | + else: |
| 854 | + user, pwd = cred_part, '' |
| 855 | + creds = (user, pwd) |
| 856 | + sanitized = urlunsplit(( |
| 857 | + parts.scheme, |
| 858 | + host_part, |
| 859 | + parts.path, |
| 860 | + parts.query, |
| 861 | + parts.fragment, |
| 862 | + )) |
| 863 | + else: |
| 864 | + # No creds; ensure scheme |
| 865 | + sanitized = urlunsplit(( |
| 866 | + parts.scheme, |
| 867 | + parts.netloc, |
| 868 | + parts.path, |
| 869 | + parts.query, |
| 870 | + parts.fragment, |
| 871 | + )) |
| 872 | + return sanitized, creds |
| 873 | + |
766 | 874 | @abstractmethod |
767 | 875 | def _get_default_binary_location(self) -> str: |
768 | 876 | """Get default browser executable path (implemented by subclasses).""" |
|
0 commit comments