File tree Expand file tree Collapse file tree 2 files changed +27
-1
lines changed
src/crawlee/crawlers/_basic
tests/unit/crawlers/_basic Expand file tree Collapse file tree 2 files changed +27
-1
lines changed Original file line number Diff line number Diff line change @@ -659,7 +659,10 @@ async def run(
659
659
request_manager = await self .get_request_manager ()
660
660
if purge_request_queue and isinstance (request_manager , RequestQueue ):
661
661
await request_manager .drop ()
662
- self ._request_manager = await RequestQueue .open ()
662
+ self ._request_manager = await RequestQueue .open (
663
+ storage_client = self ._service_locator .get_storage_client (),
664
+ configuration = self ._service_locator .get_configuration (),
665
+ )
663
666
664
667
if requests is not None :
665
668
await self .add_requests (requests )
Original file line number Diff line number Diff line change @@ -1620,3 +1620,26 @@ async def handler(context: BasicCrawlingContext) -> None:
1620
1620
)
1621
1621
1622
1622
await crawler .run (['https://start.placeholder.com' ])
1623
+
1624
+
1625
+ async def test_crawler_purge_request_queue_uses_same_storage_client () -> None :
1626
+ """Make sure that purge on start does not replace the storage client in the underlying storage manager"""
1627
+
1628
+ # Set some different storage_client globally and different for Crawlee.
1629
+ service_locator .set_storage_client (FileSystemStorageClient ())
1630
+ unrelated_rq = await RequestQueue .open ()
1631
+ unrelated_request = Request .from_url ('https://x.placeholder.com' )
1632
+ await unrelated_rq .add_request (unrelated_request )
1633
+
1634
+ crawler = BasicCrawler (storage_client = MemoryStorageClient ())
1635
+
1636
+ @crawler .router .default_handler
1637
+ async def handler (context : BasicCrawlingContext ) -> None :
1638
+ context .log .info (context .request .url )
1639
+
1640
+ for _ in (1 , 2 ):
1641
+ await crawler .run (requests = [Request .from_url ('https://a.placeholder.com' )], purge_request_queue = True )
1642
+ assert crawler .statistics .state .requests_finished == 1
1643
+
1644
+ # Crawler should not fall back to the default storage after the purge
1645
+ assert await unrelated_rq .fetch_next_request () == unrelated_request
You can’t perform that action at this time.
0 commit comments