Skip to content

Commit cac4f0f

Browse files
augeasclemfromspace
authored andcommitted
Add SELENIUM_BROWSER_EXECUTABLE_PATH setting
Add the SELENIUM_BROWSER_EXECUTABLE_PATH settings to provide the path to the browser binary
1 parent 560ec91 commit cac4f0f

File tree

2 files changed

+17
-6
lines changed

2 files changed

+17
-6
lines changed

README.md

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,20 @@ You should use **python>=3.6**.
1111
You will also need one of the Selenium [compatible browsers](http://www.seleniumhq.org/about/platforms.jsp).
1212

1313
## Configuration
14-
1. Add the browser to use, the path to the executable, and the arguments to pass to the executable to the scrapy settings:
14+
1. Add the browser to use, the path to the driver executable, and the arguments to pass to the executable to the scrapy settings:
1515
```python
1616
from shutil import which
1717

18-
SELENIUM_DRIVER_NAME='firefox'
19-
SELENIUM_DRIVER_EXECUTABLE_PATH=which('geckodriver')
18+
SELENIUM_DRIVER_NAME = 'firefox'
19+
SELENIUM_DRIVER_EXECUTABLE_PATH = which('geckodriver')
2020
SELENIUM_DRIVER_ARGUMENTS=['-headless'] # '--headless' if using chrome instead of firefox
2121
```
2222

23+
Optionally, set the path to the browser executable:
24+
```python
25+
SELENIUM_BROWSER_EXECUTABLE_PATH = which('firefox')
26+
```
27+
2328
2. Add the `SeleniumMiddleware` to the downloader middlewares:
2429
```python
2530
DOWNLOADER_MIDDLEWARES = {

scrapy_selenium/middlewares.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
class SeleniumMiddleware:
1414
"""Scrapy middleware handling the requests using selenium"""
1515

16-
def __init__(self, driver_name, driver_executable_path, driver_arguments):
16+
def __init__(self, driver_name, driver_executable_path, driver_arguments,
17+
browser_executable_path):
1718
"""Initialize the selenium webdriver
1819
1920
Parameters
@@ -24,7 +25,8 @@ def __init__(self, driver_name, driver_executable_path, driver_arguments):
2425
The path of the executable binary of the driver
2526
driver_arguments: list
2627
A list of arguments to initialize the driver
27-
28+
browser_executable_path: str
29+
The path of the executable binary of the browser
2830
"""
2931

3032
webdriver_base_path = f'selenium.webdriver.{driver_name}'
@@ -36,6 +38,8 @@ def __init__(self, driver_name, driver_executable_path, driver_arguments):
3638
driver_options_klass = getattr(driver_options_module, 'Options')
3739

3840
driver_options = driver_options_klass()
41+
if browser_executable_path:
42+
driver_options.binary_location = browser_executable_path
3943
for argument in driver_arguments:
4044
driver_options.add_argument(argument)
4145

@@ -52,6 +56,7 @@ def from_crawler(cls, crawler):
5256

5357
driver_name = crawler.settings.get('SELENIUM_DRIVER_NAME')
5458
driver_executable_path = crawler.settings.get('SELENIUM_DRIVER_EXECUTABLE_PATH')
59+
browser_executable_path = crawler.settings.get('SELENIUM_BROWSER_EXECUTABLE_PATH')
5560
driver_arguments = crawler.settings.get('SELENIUM_DRIVER_ARGUMENTS')
5661

5762
if not driver_name or not driver_executable_path:
@@ -62,7 +67,8 @@ def from_crawler(cls, crawler):
6267
middleware = cls(
6368
driver_name=driver_name,
6469
driver_executable_path=driver_executable_path,
65-
driver_arguments=driver_arguments
70+
driver_arguments=driver_arguments,
71+
browser_executable_path=browser_executable_path
6672
)
6773

6874
crawler.signals.connect(middleware.spider_closed, signals.spider_closed)

0 commit comments

Comments
 (0)