11from __future__ import annotations
22
3+ import asyncio
34from typing import TYPE_CHECKING
45
5- from crawlee .storages import RequestQueue
6-
76from ._utils import generate_unique_resource_name
87from apify import Actor , Request
9- from apify .storage_clients import ApifyStorageClient
108
119if TYPE_CHECKING :
1210 from apify_client import ApifyClientAsync
11+ from crawlee .storages import RequestQueue
1312
1413 from .conftest import MakeActorFunction , RunActorFunction
1514
@@ -103,57 +102,83 @@ async def test_request_queue_is_finished() -> None:
103102 finally :
104103 await request_queue .drop ()
105104
106- # TODO, add more metadata tests
107105
108- async def test_request_queue_had_multiple_clients_localaaaa (
106+ async def test_request_queue_enhanced_metadata (
107+ request_queue_force_cloud : RequestQueue ,
109108 apify_client_async : ApifyClientAsync ,
110109) -> None :
111- """`RequestQueue` clients created with different `client_key` should appear as distinct clients."""
112- #request_queue_name = generate_unique_resource_name('request_queue')
113- rq_client = await ApifyStorageClient ().create_rq_client (name = None , id = None )
114- client_metadata = await rq_client .get_metadata ()
115- rq = RequestQueue (name = client_metadata .name , id = client_metadata .id , client = rq_client )
116- await rq .fetch_next_request ()
117- await rq .fetch_next_request ()
110+ """Test metadata tracking.
111+
112+ Multiple clients scenarios are not guaranteed to give correct results without delay. But at least multiple clients,
113+ single producer, should be reliable on the producer side."""
114+
115+ for i in range (1 , 10 ):
116+ await request_queue_force_cloud .add_request (Request .from_url (f'http://example.com/{ i } ' ))
117+ # Reliable information as the API response is enhanced with local metadata estimation.
118+ assert (await request_queue_force_cloud .get_metadata ()).total_request_count == i
119+
120+ # Accessed with client created explicitly with `client_key=None` should appear as distinct client
121+ api_client = apify_client_async .request_queue (request_queue_id = request_queue_force_cloud .id , client_key = None )
122+ await api_client .list_head ()
123+
124+ # The presence of another non-producing client should not affect the metadata
125+ for i in range (10 , 20 ):
126+ await request_queue_force_cloud .add_request (Request .from_url (f'http://example.com/{ i } ' ))
127+ # Reliable information as the API response is enhanced with local metadata estimation.
128+ assert (await request_queue_force_cloud .get_metadata ()).total_request_count == i
129+
130+
131+ async def test_request_queue_metadata_another_client (
132+ request_queue_force_cloud : RequestQueue ,
133+ apify_client_async : ApifyClientAsync ,
134+ ) -> None :
135+ """Test metadata tracking. The delayed metadata should be reliable even when changed by another client."""
136+ api_client = apify_client_async .request_queue (request_queue_id = request_queue_force_cloud .id , client_key = None )
137+ await api_client .add_request (Request .from_url ('http://example.com/1' ).model_dump (by_alias = True , exclude = {'id' }))
138+
139+ # Wait to be sure that the API has updated the global metadata
140+ await asyncio .sleep (10 )
141+
142+ assert (await request_queue_force_cloud .get_metadata ()).total_request_count == 1
118143
119- # Check that it is correctly in the RequestQueueClient metadata
120- assert (await rq .get_metadata ()).had_multiple_clients is False
121144
122145async def test_request_queue_had_multiple_clients_local (
146+ request_queue_force_cloud : RequestQueue ,
123147 apify_client_async : ApifyClientAsync ,
124148) -> None :
125- """`RequestQueue` clients created with different `client_key` should appear as distinct clients."""
126- request_queue_name = generate_unique_resource_name ('request_queue' )
149+ """Test that `RequestQueue` correctly detects multiple clients.
127150
128- async with Actor :
129- rq_1 = await Actor .open_request_queue (name = request_queue_name , force_cloud = True )
130- await rq_1 .fetch_next_request ()
151+ Clients created with different `client_key` should appear as distinct clients."""
152+ await request_queue_force_cloud .fetch_next_request ()
131153
132- # Accessed with client created explicitly with `client_key=None` should appear as distinct client
133- api_client = apify_client_async .request_queue (request_queue_id = rq_1 .id , client_key = None )
134- await api_client .list_head ()
154+ # Accessed with client created explicitly with `client_key=None` should appear as distinct client
155+ api_client = apify_client_async .request_queue (request_queue_id = request_queue_force_cloud .id , client_key = None )
156+ await api_client .list_head ()
135157
136- # Check that it is correctly in the RequestQueueClient metadata
137- assert (await rq_1 .get_metadata ()).had_multiple_clients is True # Currently broken
138- # Check that it is correctly in the API
139- assert ((await api_client .get ())['hadMultipleClients' ]) is True
158+ # Check that it is correctly in the RequestQueueClient metadata
159+ assert (await request_queue_force_cloud .get_metadata ()).had_multiple_clients is True
160+ # Check that it is correctly in the API
161+ api_response = await api_client .get ()
162+ assert api_response
163+ assert api_response ['hadMultipleClients' ] is True
140164
141165
142- async def test_request_queue_not_had_multiple_clients_local (apify_client_async : ApifyClientAsync ,) -> None :
166+ async def test_request_queue_not_had_multiple_clients_local (
167+ request_queue_force_cloud : RequestQueue , apify_client_async : ApifyClientAsync
168+ ) -> None :
143169 """Test that same `RequestQueue` created from Actor does not act as multiple clients."""
144- request_queue_name = generate_unique_resource_name ('request_queue' )
145170
146- async with Actor :
147- rq_1 = await Actor .open_request_queue (name = request_queue_name , force_cloud = True )
148- # Two calls to API to create situation where different `client_key` can set `had_multiple_clients` to True
149- await rq_1 .fetch_next_request ()
150- await rq_1 .fetch_next_request ()
171+ # Two calls to API to create situation where different `client_key` can set `had_multiple_clients` to True
172+ await request_queue_force_cloud .fetch_next_request ()
173+ await request_queue_force_cloud .fetch_next_request ()
151174
152- # Check that it is correctly in the RequestQueueClient metadata
153- assert (await rq_1 .get_metadata ()).had_multiple_clients is False
154- # Check that it is correctly in the API
155- api_client = apify_client_async .request_queue (request_queue_id = rq_1 .id )
156- assert ((await api_client .get ())['hadMultipleClients' ]) is False
175+ # Check that it is correctly in the RequestQueueClient metadata
176+ assert (await request_queue_force_cloud .get_metadata ()).had_multiple_clients is False
177+ # Check that it is correctly in the API
178+ api_client = apify_client_async .request_queue (request_queue_id = request_queue_force_cloud .id )
179+ api_response = await api_client .get ()
180+ assert api_response
181+ assert api_response ['hadMultipleClients' ] is False
157182
158183
159184async def test_request_queue_had_multiple_clients_platform (
@@ -175,11 +200,9 @@ async def main() -> None:
175200 await api_client .list_head ()
176201
177202 # Check that it is correctly in the RequestQueueClient metadata
178- assert (await rq_1 .get_metadata ()).had_multiple_clients is True # Currently broken
179- # Check that it is correctly in the API
180- assert ((await rq_1 ._client ._api_client .get ())['hadMultipleClients' ]) is True
203+ assert (await rq_1 .get_metadata ()).had_multiple_clients is True
181204
182- actor = await make_actor (label = 'rq-same-ref-default ' , main_func = main )
205+ actor = await make_actor (label = 'rq-had-multiple-clients ' , main_func = main )
183206 run_result = await run_actor (actor )
184207
185208 assert run_result .status == 'SUCCEEDED'
@@ -199,10 +222,8 @@ async def main() -> None:
199222
200223 # Check that it is correctly in the RequestQueueClient metadata
201224 assert (await rq_1 .get_metadata ()).had_multiple_clients is False
202- # Check that it is correctly in the API
203- assert ((await rq_1 ._client ._api_client .get ())['hadMultipleClients' ]) is False
204225
205- actor = await make_actor (label = 'rq-same-ref-default ' , main_func = main )
226+ actor = await make_actor (label = 'rq-not-had-multiple-clients ' , main_func = main )
206227 run_result = await run_actor (actor )
207228
208229 assert run_result .status == 'SUCCEEDED'
0 commit comments