Skip to content

Commit dbe85b9

Browse files
authored
feat: Expose browser_options and page_options to PlaywrightCrawler (#730)
Closes: #719
1 parent 78ae1a8 commit dbe85b9

File tree

3 files changed

+26
-8
lines changed

3 files changed

+26
-8
lines changed

src/crawlee/browsers/_browser_pool.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,16 +97,22 @@ def with_default_plugin(
9797
*,
9898
headless: bool | None = None,
9999
browser_type: BrowserType | None = None,
100+
browser_options: Mapping[str, Any] | None = None,
101+
page_options: Mapping[str, Any] | None = None,
100102
**kwargs: Any,
101103
) -> BrowserPool:
102-
"""Create a new instance with a single `BaseBrowserPlugin` configured with the provided options.
104+
"""Create a new instance with a single `PlaywrightBrowserPlugin` configured with the provided options.
103105
104106
Args:
105107
headless: Whether to run the browser in headless mode.
106108
browser_type: The type of browser to launch ('chromium', 'firefox', or 'webkit').
109+
browser_options: Keyword arguments to pass to the browser launch method.
110+
page_options: Keyword arguments to pass to the new page method.
107111
kwargs: Additional arguments for default constructor.
108112
"""
109113
plugin_options: dict = defaultdict(dict)
114+
plugin_options['browser_options'] = browser_options or {}
115+
plugin_options['page_options'] = page_options or {}
110116

111117
if headless is not None:
112118
plugin_options['browser_options']['headless'] = headless

src/crawlee/browsers/_playwright_browser_plugin.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ def __init__(
4242
4343
Args:
4444
browser_type: The type of the browser to launch.
45-
browser_options: Options to configure the browser instance.
46-
page_options: Options to configure a new page instance.
45+
browser_options: Keyword arguments to pass to the browser launch method.
46+
page_options: Keyword arguments to pass to the new page method.
4747
max_open_pages_per_browser: The maximum number of pages that can be opened in a single browser instance.
4848
Once reached, a new browser instance will be launched to handle the excess.
4949
"""

src/crawlee/playwright_crawler/_playwright_crawler.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import annotations
22

33
import logging
4-
from typing import TYPE_CHECKING, Awaitable, Callable
4+
from typing import TYPE_CHECKING, Any, Awaitable, Callable, Mapping
55

66
from pydantic import ValidationError
77

@@ -71,6 +71,8 @@ def __init__(
7171
self,
7272
browser_pool: BrowserPool | None = None,
7373
browser_type: BrowserType | None = None,
74+
browser_options: Mapping[str, Any] | None = None,
75+
page_options: Mapping[str, Any] | None = None,
7476
headless: bool | None = None,
7577
**kwargs: Unpack[BasicCrawlerOptions[PlaywrightCrawlingContext]],
7678
) -> None:
@@ -80,20 +82,30 @@ def __init__(
8082
browser_pool: A `BrowserPool` instance to be used for launching the browsers and getting pages.
8183
browser_type: The type of browser to launch ('chromium', 'firefox', or 'webkit').
8284
This option should not be used if `browser_pool` is provided.
85+
browser_options: Keyword arguments to pass to the browser launch method.
86+
This option should not be used if `browser_pool` is provided.
87+
page_options: Keyword arguments to pass to the new page method.
88+
This option should not be used if `browser_pool` is provided.
8389
headless: Whether to run the browser in headless mode.
8490
This option should not be used if `browser_pool` is provided.
8591
kwargs: Additional keyword arguments to pass to the underlying `BasicCrawler`.
8692
"""
8793
if browser_pool:
88-
# Raise an exception if browser_pool is provided together with headless or browser_type arguments.
89-
if headless is not None or browser_type is not None:
94+
# Raise an exception if browser_pool is provided together with other browser-related arguments.
95+
if any(param is not None for param in (headless, browser_type, browser_options, page_options)):
9096
raise ValueError(
91-
'You cannot provide `headless` or `browser_type` arguments when `browser_pool` is provided.'
97+
'You cannot provide `headless`, `browser_type`, `browser_options` or `page_options` '
98+
'arguments when `browser_pool` is provided.'
9299
)
93100

94101
# If browser_pool is not provided, create a new instance of BrowserPool with specified arguments.
95102
else:
96-
browser_pool = BrowserPool.with_default_plugin(headless=headless, browser_type=browser_type)
103+
browser_pool = BrowserPool.with_default_plugin(
104+
headless=headless,
105+
browser_type=browser_type,
106+
browser_options=browser_options,
107+
page_options=page_options,
108+
)
97109

98110
self._browser_pool = browser_pool
99111

0 commit comments

Comments
 (0)