File tree Expand file tree Collapse file tree 2 files changed +27
-1
lines changed
src/crawlee/crawlers/_basic
tests/unit/crawlers/_basic Expand file tree Collapse file tree 2 files changed +27
-1
lines changed Original file line number Diff line number Diff line change @@ -659,7 +659,10 @@ async def run(
659659 request_manager = await self .get_request_manager ()
660660 if purge_request_queue and isinstance (request_manager , RequestQueue ):
661661 await request_manager .drop ()
662- self ._request_manager = await RequestQueue .open ()
662+ self ._request_manager = await RequestQueue .open (
663+ storage_client = self ._service_locator .get_storage_client (),
664+ configuration = self ._service_locator .get_configuration (),
665+ )
663666
664667 if requests is not None :
665668 await self .add_requests (requests )
Original file line number Diff line number Diff line change @@ -1620,3 +1620,26 @@ async def handler(context: BasicCrawlingContext) -> None:
16201620 )
16211621
16221622 await crawler .run (['https://start.placeholder.com' ])
1623+
1624+
1625+ async def test_crawler_purge_request_queue_uses_same_storage_client () -> None :
1626+ """Make sure that purge on start does not replace the storage client in the underlying storage manager"""
1627+
1628+ # Set some different storage_client globally and different for Crawlee.
1629+ service_locator .set_storage_client (FileSystemStorageClient ())
1630+ unrelated_rq = await RequestQueue .open ()
1631+ unrelated_request = Request .from_url ('https://x.placeholder.com' )
1632+ await unrelated_rq .add_request (unrelated_request )
1633+
1634+ crawler = BasicCrawler (storage_client = MemoryStorageClient ())
1635+
1636+ @crawler .router .default_handler
1637+ async def handler (context : BasicCrawlingContext ) -> None :
1638+ context .log .info (context .request .url )
1639+
1640+ for _ in (1 , 2 ):
1641+ await crawler .run (requests = [Request .from_url ('https://a.placeholder.com' )], purge_request_queue = True )
1642+ assert crawler .statistics .state .requests_finished == 1
1643+
1644+ # Crawler should not fall back to the default storage after the purge
1645+ assert await unrelated_rq .fetch_next_request () == unrelated_request
You can’t perform that action at this time.
0 commit comments