|
1 | 1 | from __future__ import annotations
|
2 | 2 |
|
| 3 | +import asyncio |
3 | 4 | from typing import TYPE_CHECKING
|
4 | 5 |
|
5 | 6 | import pytest
|
6 | 7 |
|
| 8 | +from crawlee import Request |
| 9 | + |
7 | 10 | from apify import Actor
|
8 | 11 |
|
9 | 12 | if TYPE_CHECKING:
|
| 13 | + from apify_client import ApifyClientAsync |
| 14 | + from crawlee.storages import RequestQueue |
| 15 | + |
10 | 16 | from .conftest import MakeActorFunction, RunActorFunction
|
11 | 17 |
|
12 | 18 |
|
@@ -1195,3 +1201,83 @@ async def consumer() -> int:
|
1195 | 1201 | actor = await make_actor(label='rq-performance-pattern-test', main_func=main)
|
1196 | 1202 | run_result = await run_actor(actor)
|
1197 | 1203 | assert run_result.status == 'SUCCEEDED'
|
| 1204 | + |
| 1205 | + |
| 1206 | +async def test_request_queue_enhanced_metadata( |
| 1207 | + request_queue_force_cloud: RequestQueue, |
| 1208 | + apify_client_async: ApifyClientAsync, |
| 1209 | +) -> None: |
| 1210 | + """Test metadata tracking. |
| 1211 | +
|
| 1212 | + Multiple clients scenarios are not guaranteed to give correct results without delay. But at least multiple clients, |
| 1213 | + single producer, should be reliable on the producer side.""" |
| 1214 | + |
| 1215 | + for i in range(1, 10): |
| 1216 | + await request_queue_force_cloud.add_request(Request.from_url(f'http://example.com/{i}')) |
| 1217 | + # Reliable information as the API response is enhanced with local metadata estimation. |
| 1218 | + assert (await request_queue_force_cloud.get_metadata()).total_request_count == i |
| 1219 | + |
| 1220 | + # Accessed with client created explicitly with `client_key=None` should appear as distinct client |
| 1221 | + api_client = apify_client_async.request_queue(request_queue_id=request_queue_force_cloud.id, client_key=None) |
| 1222 | + await api_client.list_head() |
| 1223 | + |
| 1224 | + # The presence of another non-producing client should not affect the metadata |
| 1225 | + for i in range(10, 20): |
| 1226 | + await request_queue_force_cloud.add_request(Request.from_url(f'http://example.com/{i}')) |
| 1227 | + # Reliable information as the API response is enhanced with local metadata estimation. |
| 1228 | + assert (await request_queue_force_cloud.get_metadata()).total_request_count == i |
| 1229 | + |
| 1230 | + |
| 1231 | +async def test_request_queue_metadata_another_client( |
| 1232 | + request_queue_force_cloud: RequestQueue, |
| 1233 | + apify_client_async: ApifyClientAsync, |
| 1234 | +) -> None: |
| 1235 | + """Test metadata tracking. The delayed metadata should be reliable even when changed by another client.""" |
| 1236 | + api_client = apify_client_async.request_queue(request_queue_id=request_queue_force_cloud.id, client_key=None) |
| 1237 | + await api_client.add_request(Request.from_url('http://example.com/1').model_dump(by_alias=True, exclude={'id'})) |
| 1238 | + |
| 1239 | + # Wait to be sure that the API has updated the global metadata |
| 1240 | + await asyncio.sleep(10) |
| 1241 | + |
| 1242 | + assert (await request_queue_force_cloud.get_metadata()).total_request_count == 1 |
| 1243 | + |
| 1244 | + |
| 1245 | +async def test_request_queue_had_multiple_clients( |
| 1246 | + request_queue_force_cloud: RequestQueue, |
| 1247 | + apify_client_async: ApifyClientAsync, |
| 1248 | +) -> None: |
| 1249 | + """Test that `RequestQueue` correctly detects multiple clients. |
| 1250 | +
|
| 1251 | + Clients created with different `client_key` should appear as distinct clients.""" |
| 1252 | + await request_queue_force_cloud.fetch_next_request() |
| 1253 | + |
| 1254 | + # Accessed with client created explicitly with `client_key=None` should appear as distinct client |
| 1255 | + api_client = apify_client_async.request_queue(request_queue_id=request_queue_force_cloud.id, client_key=None) |
| 1256 | + await api_client.list_head() |
| 1257 | + |
| 1258 | + # Check that it is correctly in the RequestQueueClient metadata |
| 1259 | + assert (await request_queue_force_cloud.get_metadata()).had_multiple_clients is True |
| 1260 | + |
| 1261 | + # Check that it is correctly in the API |
| 1262 | + api_response = await api_client.get() |
| 1263 | + assert api_response |
| 1264 | + assert api_response['hadMultipleClients'] is True |
| 1265 | + |
| 1266 | + |
| 1267 | +async def test_request_queue_not_had_multiple_clients( |
| 1268 | + request_queue_force_cloud: RequestQueue, apify_client_async: ApifyClientAsync |
| 1269 | +) -> None: |
| 1270 | + """Test that same `RequestQueue` created from Actor does not act as multiple clients.""" |
| 1271 | + |
| 1272 | + # Two calls to API to create situation where different `client_key` can set `had_multiple_clients` to True |
| 1273 | + await request_queue_force_cloud.fetch_next_request() |
| 1274 | + await request_queue_force_cloud.fetch_next_request() |
| 1275 | + |
| 1276 | + # Check that it is correctly in the RequestQueueClient metadata |
| 1277 | + assert (await request_queue_force_cloud.get_metadata()).had_multiple_clients is False |
| 1278 | + |
| 1279 | + # Check that it is correctly in the API |
| 1280 | + api_client = apify_client_async.request_queue(request_queue_id=request_queue_force_cloud.id) |
| 1281 | + api_response = await api_client.get() |
| 1282 | + assert api_response |
| 1283 | + assert api_response['hadMultipleClients'] is False |
0 commit comments