2121 Response as PlaywrightResponse ,
2222 Route ,
2323)
24- from scrapy import Spider , signals
25- from scrapy .core .downloader .handlers .http import HTTPDownloadHandler
24+ from scrapy import Spider , signals , version_info as scrapy_version_info
25+ from scrapy .core .downloader .handlers .http11 import HTTP11DownloadHandler
2626from scrapy .crawler import Crawler
2727from scrapy .exceptions import NotSupported
2828from scrapy .http import Request , Response
5050__all__ = ["ScrapyPlaywrightDownloadHandler" ]
5151
5252
53+ _SCRAPY_ASYNC_API = scrapy_version_info >= (2 , 14 , 0 )
54+
55+
5356PlaywrightHandler = TypeVar ("PlaywrightHandler" , bound = "ScrapyPlaywrightDownloadHandler" )
5457
5558
@@ -131,20 +134,29 @@ def from_settings(cls, settings: Settings) -> "Config":
131134 return cfg
132135
133136
134- class ScrapyPlaywrightDownloadHandler (HTTPDownloadHandler ):
137+ class ScrapyPlaywrightDownloadHandler (HTTP11DownloadHandler ):
135138 playwright_context_manager : Optional [PlaywrightContextManager ] = None
136139 playwright : Optional [AsyncPlaywright ] = None
137140
138141 def __init__ (self , crawler : Crawler ) -> None :
139- super ().__init__ (settings = crawler .settings , crawler = crawler )
140142 verify_installed_reactor ("twisted.internet.asyncioreactor.AsyncioSelectorReactor" )
141- crawler .signals .connect (self ._engine_started , signals .engine_started )
143+ if _SCRAPY_ASYNC_API :
144+ super ().__init__ (crawler = crawler )
145+ else :
146+ super ().__init__ ( # pylint: disable=unexpected-keyword-arg
147+ settings = crawler .settings , crawler = crawler
148+ )
142149 self .stats = crawler .stats
143150 self .config = Config .from_settings (crawler .settings )
144151
145152 if self .config .use_threaded_loop :
146153 _ThreadedLoopAdapter .start (id (self ))
147154
155+ if _SCRAPY_ASYNC_API :
156+ crawler .signals .connect (self ._maybe_launch_in_thread , signals .engine_started )
157+ else :
158+ crawler .signals .connect (self ._engine_started , signals .engine_started )
159+
148160 self .browser_launch_lock = asyncio .Lock ()
149161 self .context_launch_lock = asyncio .Lock ()
150162 self .context_wrappers : Dict [str , BrowserContextWrapper ] = {}
@@ -175,10 +187,17 @@ def _deferred_from_coro(self, coro: Awaitable) -> Deferred:
175187 return _ThreadedLoopAdapter ._deferred_from_coro (coro )
176188 return deferred_from_coro (coro )
177189
190+ def _maybe_future_from_coro (self , coro : Awaitable ) -> Awaitable | asyncio .Future :
191+ if self .config .use_threaded_loop :
192+ return _ThreadedLoopAdapter ._future_from_coro (coro )
193+ return coro
194+
178195 def _engine_started (self ) -> Deferred :
179- """Launch the browser. Use the engine_started signal as it supports returning deferreds."""
180196 return self ._deferred_from_coro (self ._launch ())
181197
198+ async def _maybe_launch_in_thread (self ) -> None :
199+ await self ._maybe_future_from_coro (self ._launch ())
200+
182201 async def _launch (self ) -> None :
183202 """Launch Playwright manager and configured startup context(s)."""
184203 logger .info ("Starting download handler" )
@@ -346,13 +365,24 @@ def _set_max_concurrent_context_count(self):
346365 "playwright/context_count/max_concurrent" , len (self .context_wrappers )
347366 )
348367
349- @inlineCallbacks
350- def close (self ) -> Deferred :
351- logger .info ("Closing download handler" )
352- yield super ().close ()
353- yield self ._deferred_from_coro (self ._close ())
354- if self .config .use_threaded_loop :
355- _ThreadedLoopAdapter .stop (id (self ))
368+ if _SCRAPY_ASYNC_API :
369+
370+ async def close (self ) -> None :
371+ logger .info ("Closing download handler" )
372+ await super ().close ()
373+ await self ._maybe_future_from_coro (self ._close ())
374+ if self .config .use_threaded_loop :
375+ _ThreadedLoopAdapter .stop (id (self ))
376+
377+ else :
378+
379+ @inlineCallbacks
380+ def close (self ) -> Deferred : # pylint: disable=invalid-overridden-method
381+ logger .info ("Closing download handler" )
382+ yield super ().close ()
383+ yield self ._deferred_from_coro (self ._close ())
384+ if self .config .use_threaded_loop :
385+ _ThreadedLoopAdapter .stop (id (self ))
356386
357387 async def _close (self ) -> None :
358388 with suppress (TargetClosedError ):
@@ -366,12 +396,29 @@ async def _close(self) -> None:
366396 if self .playwright :
367397 await self .playwright .stop ()
368398
369- def download_request (self , request : Request , spider : Spider ) -> Deferred :
370- if request .meta .get ("playwright" ):
371- return self ._deferred_from_coro (self ._download_request (request , spider ))
372- return super ().download_request (request , spider )
399+ if _SCRAPY_ASYNC_API :
400+
401+ async def download_request (self , request : Request ) -> Response :
402+ if request .meta .get ("playwright" ):
403+ coro = self ._download_request (request )
404+ return await self ._maybe_future_from_coro (coro )
405+ return await super ().download_request ( # pylint: disable=no-value-for-parameter
406+ request
407+ )
408+
409+ else :
410+
411+ def download_request ( # type: ignore[misc] # pylint: disable=invalid-overridden-method,arguments-differ # noqa: E501
412+ self , request : Request , spider : Spider
413+ ) -> Deferred :
414+ if request .meta .get ("playwright" ):
415+ return self ._deferred_from_coro (self ._download_request (request , spider ))
416+ return super ().download_request ( # pylint: disable=unexpected-keyword-arg
417+ request = request , spider = spider
418+ )
373419
374- async def _download_request (self , request : Request , spider : Spider ) -> Response :
420+ async def _download_request (self , request : Request , spider : Spider | None = None ) -> Response :
421+ spider = spider or self ._crawler .spider
375422 counter = 0
376423 while True :
377424 try :
@@ -562,8 +609,7 @@ async def _handle_response(response: PlaywrightResponse) -> None:
562609 response = await page .goto (url = request .url , ** page_goto_kwargs )
563610 except PlaywrightError as err :
564611 if not (
565- self .config .browser_type_name in ("firefox" , "webkit" )
566- and "Download is starting" in err .message
612+ "Download is starting" in err .message
567613 or self .config .browser_type_name == "chromium"
568614 and "net::ERR_ABORTED" in err .message
569615 ):
0 commit comments