|
1 | 1 | from __future__ import annotations
|
2 | 2 |
|
3 | 3 | import asyncio
|
| 4 | +from datetime import datetime, timezone |
4 | 5 | from typing import TYPE_CHECKING
|
5 | 6 |
|
6 | 7 | import pytest
|
7 |
| -from apify_shared.consts import ApifyEnvVars |
8 | 8 |
|
9 |
| -from crawlee import Request |
| 9 | +from apify_shared.consts import ApifyEnvVars |
| 10 | +from crawlee import Request, service_locator |
10 | 11 |
|
11 |
| -from apify import Actor |
12 | 12 | from ._utils import generate_unique_resource_name
|
| 13 | +from apify import Actor |
13 | 14 |
|
14 | 15 | if TYPE_CHECKING:
|
15 | 16 | from apify_client import ApifyClientAsync
|
@@ -1072,29 +1073,47 @@ async def test_request_queue_not_had_multiple_clients(
|
1072 | 1073 | assert api_response['hadMultipleClients'] is False
|
1073 | 1074 |
|
1074 | 1075 |
|
1075 |
| -async def test_cache_initialization( |
1076 |
| - apify_token: str, monkeypatch: pytest.MonkeyPatch, apify_client_async: ApifyClientAsync |
1077 |
| -) -> None: |
1078 |
| - """Test that same `RequestQueue` created from Actor does not act as multiple clients.""" |
| 1076 | +async def test_cache_initialization(apify_token: str, monkeypatch: pytest.MonkeyPatch) -> None: |
| 1077 | + """Test that Apify based `RequestQueue` initializes cache correctly to reduce unnecessary API calls.""" |
1079 | 1078 |
|
1080 |
| - """Create an instance of the Apify request queue on the platform and drop it when the test is finished.""" |
| 1079 | + # Create an instance of the Apify request queue on the platform and drop it when the test is finished. |
1081 | 1080 | request_queue_name = generate_unique_resource_name('request_queue')
|
1082 | 1081 | monkeypatch.setenv(ApifyEnvVars.TOKEN, apify_token)
|
1083 | 1082 |
|
| 1083 | + requests = [Request.from_url(f'http://example.com/{i}', handled_at=datetime.now(timezone.utc)) for i in range(10)] |
| 1084 | + |
1084 | 1085 | async with Actor:
|
1085 | 1086 | rq = await Actor.open_request_queue(name=request_queue_name, force_cloud=True)
|
1086 |
| - yield rq |
1087 |
| - await rq.drop() |
1088 |
| - |
1089 |
| - |
1090 |
| - await request_queue_force_cloud.fetch_next_request() |
1091 |
| - await request_queue_force_cloud.fetch_next_request() |
1092 |
| - |
1093 |
| - # Check that it is correctly in the RequestQueueClient metadata |
1094 |
| - assert (await request_queue_force_cloud.get_metadata()).had_multiple_clients is False |
1095 |
| - |
1096 |
| - # Check that it is correctly in the API |
1097 |
| - api_client = apify_client_async.request_queue(request_queue_id=request_queue_force_cloud.id) |
1098 |
| - api_response = await api_client.get() |
1099 |
| - assert api_response |
1100 |
| - assert api_response['hadMultipleClients'] is False |
| 1087 | + try: |
| 1088 | + await rq.add_requests(requests) |
| 1089 | + |
| 1090 | + # Check that it is correctly in the API |
| 1091 | + await asyncio.sleep(10) # Wait to be sure that metadata are updated |
| 1092 | + |
| 1093 | + # Get raw client, because stats are not exposed in `RequestQueue` class, but are available in raw client |
| 1094 | + rq_client = Actor.apify_client.request_queue(request_queue_id=rq.id) |
| 1095 | + _rq = await rq_client.get() |
| 1096 | + assert _rq |
| 1097 | + stats_before = _rq.get('stats', {}) |
| 1098 | + Actor.log.info(stats_before) |
| 1099 | + |
| 1100 | + # Clear service locator cache to simulate creating RQ instance from scratch |
| 1101 | + service_locator.storage_instance_manager.clear_cache() |
| 1102 | + |
| 1103 | + # Try to enqueue same requests again. It should be deduplicated from local cache created on initialization |
| 1104 | + rq = await Actor.open_request_queue(name=request_queue_name, force_cloud=True) |
| 1105 | + await rq.add_requests(requests) |
| 1106 | + |
| 1107 | + await asyncio.sleep(10) # Wait to be sure that metadata are updated |
| 1108 | + _rq = await rq_client.get() |
| 1109 | + assert _rq |
| 1110 | + stats_after = _rq.get('stats', {}) |
| 1111 | + Actor.log.info(stats_after) |
| 1112 | + |
| 1113 | + # Cache was actually initialized, readCount increased |
| 1114 | + assert (stats_after['readCount'] - stats_before['readCount']) == len(requests) |
| 1115 | + # Deduplication happened locally, writeCount should be the same |
| 1116 | + assert stats_after['writeCount'] == stats_before['writeCount'] |
| 1117 | + |
| 1118 | + finally: |
| 1119 | + await rq.drop() |
0 commit comments