1
1
from __future__ import annotations
2
2
3
+ import asyncio
3
4
from typing import TYPE_CHECKING
4
5
5
- from crawlee .storages import RequestQueue
6
-
7
6
from ._utils import generate_unique_resource_name
8
7
from apify import Actor , Request
9
- from apify .storage_clients import ApifyStorageClient
10
8
11
9
if TYPE_CHECKING :
12
10
from apify_client import ApifyClientAsync
11
+ from crawlee .storages import RequestQueue
13
12
14
13
from .conftest import MakeActorFunction , RunActorFunction
15
14
@@ -103,57 +102,83 @@ async def test_request_queue_is_finished() -> None:
103
102
finally :
104
103
await request_queue .drop ()
105
104
106
- # TODO, add more metadata tests
107
105
108
- async def test_request_queue_had_multiple_clients_localaaaa (
106
+ async def test_request_queue_enhanced_metadata (
107
+ request_queue_force_cloud : RequestQueue ,
109
108
apify_client_async : ApifyClientAsync ,
110
109
) -> None :
111
- """`RequestQueue` clients created with different `client_key` should appear as distinct clients."""
112
- #request_queue_name = generate_unique_resource_name('request_queue')
113
- rq_client = await ApifyStorageClient ().create_rq_client (name = None , id = None )
114
- client_metadata = await rq_client .get_metadata ()
115
- rq = RequestQueue (name = client_metadata .name , id = client_metadata .id , client = rq_client )
116
- await rq .fetch_next_request ()
117
- await rq .fetch_next_request ()
110
+ """Test metadata tracking.
111
+
112
+ Multiple clients scenarios are not guaranteed to give correct results without delay. But at least multiple clients,
113
+ single producer, should be reliable on the producer side."""
114
+
115
+ for i in range (1 , 10 ):
116
+ await request_queue_force_cloud .add_request (Request .from_url (f'http://example.com/{ i } ' ))
117
+ # Reliable information as the API response is enhanced with local metadata estimation.
118
+ assert (await request_queue_force_cloud .get_metadata ()).total_request_count == i
119
+
120
+ # Accessed with client created explicitly with `client_key=None` should appear as distinct client
121
+ api_client = apify_client_async .request_queue (request_queue_id = request_queue_force_cloud .id , client_key = None )
122
+ await api_client .list_head ()
123
+
124
+ # The presence of another non-producing client should not affect the metadata
125
+ for i in range (10 , 20 ):
126
+ await request_queue_force_cloud .add_request (Request .from_url (f'http://example.com/{ i } ' ))
127
+ # Reliable information as the API response is enhanced with local metadata estimation.
128
+ assert (await request_queue_force_cloud .get_metadata ()).total_request_count == i
129
+
130
+
131
+ async def test_request_queue_metadata_another_client (
132
+ request_queue_force_cloud : RequestQueue ,
133
+ apify_client_async : ApifyClientAsync ,
134
+ ) -> None :
135
+ """Test metadata tracking. The delayed metadata should be reliable even when changed by another client."""
136
+ api_client = apify_client_async .request_queue (request_queue_id = request_queue_force_cloud .id , client_key = None )
137
+ await api_client .add_request (Request .from_url ('http://example.com/1' ).model_dump (by_alias = True , exclude = {'id' }))
138
+
139
+ # Wait to be sure that the API has updated the global metadata
140
+ await asyncio .sleep (10 )
141
+
142
+ assert (await request_queue_force_cloud .get_metadata ()).total_request_count == 1
118
143
119
- # Check that it is correctly in the RequestQueueClient metadata
120
- assert (await rq .get_metadata ()).had_multiple_clients is False
121
144
122
145
async def test_request_queue_had_multiple_clients_local (
146
+ request_queue_force_cloud : RequestQueue ,
123
147
apify_client_async : ApifyClientAsync ,
124
148
) -> None :
125
- """`RequestQueue` clients created with different `client_key` should appear as distinct clients."""
126
- request_queue_name = generate_unique_resource_name ('request_queue' )
149
+ """Test that `RequestQueue` correctly detects multiple clients.
127
150
128
- async with Actor :
129
- rq_1 = await Actor .open_request_queue (name = request_queue_name , force_cloud = True )
130
- await rq_1 .fetch_next_request ()
151
+ Clients created with different `client_key` should appear as distinct clients."""
152
+ await request_queue_force_cloud .fetch_next_request ()
131
153
132
- # Accessed with client created explicitly with `client_key=None` should appear as distinct client
133
- api_client = apify_client_async .request_queue (request_queue_id = rq_1 .id , client_key = None )
134
- await api_client .list_head ()
154
+ # Accessed with client created explicitly with `client_key=None` should appear as distinct client
155
+ api_client = apify_client_async .request_queue (request_queue_id = request_queue_force_cloud .id , client_key = None )
156
+ await api_client .list_head ()
135
157
136
- # Check that it is correctly in the RequestQueueClient metadata
137
- assert (await rq_1 .get_metadata ()).had_multiple_clients is True # Currently broken
138
- # Check that it is correctly in the API
139
- assert ((await api_client .get ())['hadMultipleClients' ]) is True
158
+ # Check that it is correctly in the RequestQueueClient metadata
159
+ assert (await request_queue_force_cloud .get_metadata ()).had_multiple_clients is True
160
+ # Check that it is correctly in the API
161
+ api_response = await api_client .get ()
162
+ assert api_response
163
+ assert api_response ['hadMultipleClients' ] is True
140
164
141
165
142
- async def test_request_queue_not_had_multiple_clients_local (apify_client_async : ApifyClientAsync ,) -> None :
166
+ async def test_request_queue_not_had_multiple_clients_local (
167
+ request_queue_force_cloud : RequestQueue , apify_client_async : ApifyClientAsync
168
+ ) -> None :
143
169
"""Test that same `RequestQueue` created from Actor does not act as multiple clients."""
144
- request_queue_name = generate_unique_resource_name ('request_queue' )
145
170
146
- async with Actor :
147
- rq_1 = await Actor .open_request_queue (name = request_queue_name , force_cloud = True )
148
- # Two calls to API to create situation where different `client_key` can set `had_multiple_clients` to True
149
- await rq_1 .fetch_next_request ()
150
- await rq_1 .fetch_next_request ()
171
+ # Two calls to API to create situation where different `client_key` can set `had_multiple_clients` to True
172
+ await request_queue_force_cloud .fetch_next_request ()
173
+ await request_queue_force_cloud .fetch_next_request ()
151
174
152
- # Check that it is correctly in the RequestQueueClient metadata
153
- assert (await rq_1 .get_metadata ()).had_multiple_clients is False
154
- # Check that it is correctly in the API
155
- api_client = apify_client_async .request_queue (request_queue_id = rq_1 .id )
156
- assert ((await api_client .get ())['hadMultipleClients' ]) is False
175
+ # Check that it is correctly in the RequestQueueClient metadata
176
+ assert (await request_queue_force_cloud .get_metadata ()).had_multiple_clients is False
177
+ # Check that it is correctly in the API
178
+ api_client = apify_client_async .request_queue (request_queue_id = request_queue_force_cloud .id )
179
+ api_response = await api_client .get ()
180
+ assert api_response
181
+ assert api_response ['hadMultipleClients' ] is False
157
182
158
183
159
184
async def test_request_queue_had_multiple_clients_platform (
@@ -175,11 +200,9 @@ async def main() -> None:
175
200
await api_client .list_head ()
176
201
177
202
# Check that it is correctly in the RequestQueueClient metadata
178
- assert (await rq_1 .get_metadata ()).had_multiple_clients is True # Currently broken
179
- # Check that it is correctly in the API
180
- assert ((await rq_1 ._client ._api_client .get ())['hadMultipleClients' ]) is True
203
+ assert (await rq_1 .get_metadata ()).had_multiple_clients is True
181
204
182
- actor = await make_actor (label = 'rq-same-ref-default ' , main_func = main )
205
+ actor = await make_actor (label = 'rq-had-multiple-clients ' , main_func = main )
183
206
run_result = await run_actor (actor )
184
207
185
208
assert run_result .status == 'SUCCEEDED'
@@ -199,10 +222,8 @@ async def main() -> None:
199
222
200
223
# Check that it is correctly in the RequestQueueClient metadata
201
224
assert (await rq_1 .get_metadata ()).had_multiple_clients is False
202
- # Check that it is correctly in the API
203
- assert ((await rq_1 ._client ._api_client .get ())['hadMultipleClients' ]) is False
204
225
205
- actor = await make_actor (label = 'rq-same-ref-default ' , main_func = main )
226
+ actor = await make_actor (label = 'rq-not-had-multiple-clients ' , main_func = main )
206
227
run_result = await run_actor (actor )
207
228
208
229
assert run_result .status == 'SUCCEEDED'
0 commit comments