2323 Response as PlaywrightResponse ,
2424 Route ,
2525)
26- from scrapy import Spider , signals
26+ from scrapy import Spider , signals , version_info as scrapy_version_info
2727from scrapy .core .downloader .handlers .http11 import HTTP11DownloadHandler
2828from scrapy .crawler import Crawler
2929from scrapy .exceptions import NotSupported , ScrapyDeprecationWarning
3030from scrapy .http import Request , Response
3131from scrapy .http .headers import Headers
3232from scrapy .responsetypes import responsetypes
3333from scrapy .settings import Settings
34- from scrapy .utils .defer import deferred_from_coro
34+ from scrapy .utils .defer import deferred_from_coro , maybe_deferred_to_future
3535from scrapy .utils .misc import load_object
3636from scrapy .utils .reactor import verify_installed_reactor
3737from twisted .internet .defer import Deferred , inlineCallbacks
5252__all__ = ["ScrapyPlaywrightDownloadHandler" ]
5353
5454
55+ _SCRAPY_ASYNC_API = scrapy_version_info >= (2 , 14 , 0 )
56+
57+
5558PlaywrightHandler = TypeVar ("PlaywrightHandler" , bound = "ScrapyPlaywrightDownloadHandler" )
5659
5760
@@ -138,7 +141,12 @@ class ScrapyPlaywrightDownloadHandler(HTTP11DownloadHandler):
138141 playwright : Optional [AsyncPlaywright ] = None
139142
140143 def __init__ (self , crawler : Crawler ) -> None :
141- super ().__init__ (settings = crawler .settings , crawler = crawler )
144+ if _SCRAPY_ASYNC_API :
145+ super ().__init__ (crawler = crawler )
146+ else :
147+ super ().__init__ ( # pylint: disable=unexpected-keyword-arg
148+ settings = crawler .settings , crawler = crawler
149+ )
142150 verify_installed_reactor ("twisted.internet.asyncioreactor.AsyncioSelectorReactor" )
143151 crawler .signals .connect (self ._engine_started , signals .engine_started )
144152 self .stats = crawler .stats
@@ -348,13 +356,20 @@ def _set_max_concurrent_context_count(self):
348356 "playwright/context_count/max_concurrent" , len (self .context_wrappers )
349357 )
350358
351- @inlineCallbacks
352- def close (self ) -> Deferred :
353- logger .info ("Closing download handler" )
354- yield super ().close ()
355- yield self ._deferred_from_coro (self ._close ())
356- if self .config .use_threaded_loop :
357- _ThreadedLoopAdapter .stop (id (self ))
359+ if _SCRAPY_ASYNC_API :
360+
361+ async def close (self ) -> None :
362+ logger .info ("Closing download handler" )
363+ await super ().close ()
364+ await self ._close ()
365+
366+ else :
367+
368+ @inlineCallbacks
369+ def close (self ) -> Deferred : # pylint: disable=invalid-overridden-method
370+ logger .info ("Closing download handler" )
371+ yield super ().close ()
372+ yield self ._deferred_from_coro (self ._close ())
358373
359374 async def _close (self ) -> None :
360375 with suppress (TargetClosedError ):
@@ -367,11 +382,30 @@ async def _close(self) -> None:
367382 await self .playwright_context_manager .__aexit__ ()
368383 if self .playwright :
369384 await self .playwright .stop ()
385+ if self .config .use_threaded_loop :
386+ _ThreadedLoopAdapter .stop (id (self ))
387+
388+ if _SCRAPY_ASYNC_API :
389+
390+ async def download_request (self , request : Request ) -> Response :
391+ if request .meta .get ("playwright" ):
392+ return await maybe_deferred_to_future (
393+ self ._deferred_from_coro (self ._download_request (request , self ._crawler .spider ))
394+ )
395+ return await super ().download_request ( # pylint: disable=no-value-for-parameter
396+ request
397+ )
370398
371- def download_request (self , request : Request , spider : Spider ) -> Deferred :
372- if request .meta .get ("playwright" ):
373- return self ._deferred_from_coro (self ._download_request (request , spider ))
374- return super ().download_request (request , spider )
399+ else :
400+
401+ def download_request ( # type: ignore[misc] # pylint: disable=invalid-overridden-method,arguments-differ # noqa: E501
402+ self , request : Request , spider : Spider
403+ ) -> Deferred :
404+ if request .meta .get ("playwright" ):
405+ return self ._deferred_from_coro (self ._download_request (request , spider ))
406+ return super ().download_request ( # pylint: disable=unexpected-keyword-arg
407+ request = request , spider = spider
408+ )
375409
376410 async def _download_request (self , request : Request , spider : Spider ) -> Response :
377411 counter = 0
@@ -564,8 +598,7 @@ async def _handle_response(response: PlaywrightResponse) -> None:
564598 response = await page .goto (url = request .url , ** page_goto_kwargs )
565599 except PlaywrightError as err :
566600 if not (
567- self .config .browser_type_name in ("firefox" , "webkit" )
568- and "Download is starting" in err .message
601+ "Download is starting" in err .message
569602 or self .config .browser_type_name == "chromium"
570603 and "net::ERR_ABORTED" in err .message
571604 ):
0 commit comments