@@ -163,18 +163,28 @@ async def test_request_queue_parallel_deduplication(
163
163
make_actor : MakeActorFunction ,
164
164
run_actor : RunActorFunction ,
165
165
) -> None :
166
- """Test that the deduplication works correctly even with parallel attempts to add same links."""
166
+ """Test that the deduplication works correctly even with parallel attempts to add same links.
167
+
168
+ The test is set up in a way for workers to have some requests that were already added to the queue and some new
169
+ requests. The function must correctly deduplicate the requests and add only new requests. For example:
170
+ First worker adding 10 new requests,
171
+ second worker adding 10 new requests and 10 known requests,
172
+ third worker adding 10 new requests and 20 known requests and so on"""
167
173
168
174
async def main () -> None :
169
175
import asyncio
170
176
import logging
171
177
172
178
from apify import Actor , Request
173
179
180
+ worker_count = 10
181
+ max_requests = 100
182
+ batch_size = iter (range (10 , max_requests + 1 , int (max_requests / worker_count )))
183
+
174
184
async with Actor :
175
185
logging .getLogger ('apify.storage_clients._apify._request_queue_client' ).setLevel (logging .DEBUG )
176
186
177
- requests = [Request .from_url (f'http://example.com/{ i } ' ) for i in range (100 )]
187
+ requests = [Request .from_url (f'http://example.com/{ i } ' ) for i in range (max_requests )]
178
188
rq = await Actor .open_request_queue ()
179
189
180
190
await asyncio .sleep (10 ) # Wait to be sure that metadata are updated
@@ -186,11 +196,12 @@ async def main() -> None:
186
196
stats_before = _rq .get ('stats' , {})
187
197
Actor .log .info (stats_before )
188
198
189
- # Add same requests in 10 parallel workers
199
+ # Add batches of some new and some already present requests in workers
190
200
async def add_requests_worker () -> None :
191
- await rq .add_requests (requests )
201
+ await rq .add_requests (requests [: next ( batch_size )] )
192
202
193
- add_requests_workers = [asyncio .create_task (add_requests_worker ()) for _ in range (10 )]
203
+ # Start all workers
204
+ add_requests_workers = [asyncio .create_task (add_requests_worker ()) for _ in range (worker_count )]
194
205
await asyncio .gather (* add_requests_workers )
195
206
196
207
await asyncio .sleep (10 ) # Wait to be sure that metadata are updated
0 commit comments