From b9df4beb4e76610cc5d54dd59b8c12aef368b6bc Mon Sep 17 00:00:00 2001 From: Mindey Date: Thu, 23 Dec 2021 02:05:07 +0000 Subject: [PATCH 1/2] add SELENIUM_EXPERIMENTAL_OPTIONS parameter, to enable to pass extra options --- scrapy_selenium/middlewares.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scrapy_selenium/middlewares.py b/scrapy_selenium/middlewares.py index 201db2c..bbf48fe 100644 --- a/scrapy_selenium/middlewares.py +++ b/scrapy_selenium/middlewares.py @@ -14,7 +14,7 @@ class SeleniumMiddleware: """Scrapy middleware handling the requests using selenium""" def __init__(self, driver_name, driver_executable_path, - browser_executable_path, command_executor, driver_arguments): + browser_executable_path, command_executor, driver_arguments, experimental_options): """Initialize the selenium webdriver Parameters @@ -45,6 +45,8 @@ def __init__(self, driver_name, driver_executable_path, driver_options.binary_location = browser_executable_path for argument in driver_arguments: driver_options.add_argument(argument) + for option, value in experimental_options.items(): + driver_options.add_experimental_option(option, value) driver_kwargs = { 'executable_path': driver_executable_path, @@ -74,6 +76,7 @@ def from_crawler(cls, crawler): browser_executable_path = crawler.settings.get('SELENIUM_BROWSER_EXECUTABLE_PATH') command_executor = crawler.settings.get('SELENIUM_COMMAND_EXECUTOR') driver_arguments = crawler.settings.get('SELENIUM_DRIVER_ARGUMENTS') + driver_experimental_options = crawler.settings.get('SELENIUM_EXPERIMENTAL_OPTIONS', {}) if driver_name is None: raise NotConfigured('SELENIUM_DRIVER_NAME must be set') @@ -87,7 +90,8 @@ def from_crawler(cls, crawler): driver_executable_path=driver_executable_path, browser_executable_path=browser_executable_path, command_executor=command_executor, - driver_arguments=driver_arguments + driver_arguments=driver_arguments, + experimental_options=driver_experimental_options ) crawler.signals.connect(middleware.spider_closed, signals.spider_closed) From a546d610551bffd3b8b79320c50c02f316eab6cf Mon Sep 17 00:00:00 2001 From: Mindey Date: Thu, 23 Dec 2021 03:33:57 +0000 Subject: [PATCH 2/2] update: also optionally include the browser extensions --- scrapy_selenium/middlewares.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scrapy_selenium/middlewares.py b/scrapy_selenium/middlewares.py index bbf48fe..736ca91 100644 --- a/scrapy_selenium/middlewares.py +++ b/scrapy_selenium/middlewares.py @@ -14,7 +14,7 @@ class SeleniumMiddleware: """Scrapy middleware handling the requests using selenium""" def __init__(self, driver_name, driver_executable_path, - browser_executable_path, command_executor, driver_arguments, experimental_options): + browser_executable_path, command_executor, driver_arguments, experimental_options, extensions): """Initialize the selenium webdriver Parameters @@ -47,6 +47,8 @@ def __init__(self, driver_name, driver_executable_path, driver_options.add_argument(argument) for option, value in experimental_options.items(): driver_options.add_experimental_option(option, value) + for extension in extensions: + driver_options.add_extension(extension) driver_kwargs = { 'executable_path': driver_executable_path, @@ -77,6 +79,7 @@ def from_crawler(cls, crawler): command_executor = crawler.settings.get('SELENIUM_COMMAND_EXECUTOR') driver_arguments = crawler.settings.get('SELENIUM_DRIVER_ARGUMENTS') driver_experimental_options = crawler.settings.get('SELENIUM_EXPERIMENTAL_OPTIONS', {}) + driver_extensions = crawler.settings.get('SELENIUM_EXTENSIONS', []) if driver_name is None: raise NotConfigured('SELENIUM_DRIVER_NAME must be set') @@ -91,7 +94,8 @@ def from_crawler(cls, crawler): browser_executable_path=browser_executable_path, command_executor=command_executor, driver_arguments=driver_arguments, - experimental_options=driver_experimental_options + experimental_options=driver_experimental_options, + extensions=driver_extensions ) crawler.signals.connect(middleware.spider_closed, signals.spider_closed)