Skip to content

Commit 301e294

Browse files
committed
Add more tests
1 parent b159d3e commit 301e294

File tree

3 files changed

+99
-86
lines changed

3 files changed

+99
-86
lines changed

src/apify/storage_clients/_apify/_request_queue_client.py

Lines changed: 19 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -68,52 +68,31 @@ def __init__(
6868
self._fetch_lock = asyncio.Lock()
6969
"""Fetch lock to minimize race conditions when communicating with API."""
7070

71-
@override
72-
async def get_metadata(self) -> RequestQueueMetadata:
73-
"""Get metadata about the request queue."""
71+
async def _get_metadata(self) -> RequestQueueMetadata:
72+
"""Try to get cached metadata first. If multiple clients, fuse with global metadata."""
7473
if self._metadata.had_multiple_clients:
75-
# Enhanced from API (can be delayed few seconds)
76-
response = await self._api_client.get()
77-
if response is None:
78-
raise ValueError('Failed to fetch request queue metadata from the API.')
79-
return RequestQueueMetadata(
80-
id=response['id'],
81-
name=response['name'],
82-
total_request_count=max(response['totalRequestCount'], self._metadata.total_request_count),
83-
handled_request_count=max(response['handledRequestCount'], self._metadata.handled_request_count),
84-
pending_request_count=response['pendingRequestCount'],
85-
created_at=response['createdAt'],
86-
modified_at=max(response['modifiedAt'], self._metadata.modified_at),
87-
accessed_at=max(response['accessedAt'], self._metadata.accessed_at),
88-
had_multiple_clients=response['hadMultipleClients'],
89-
)
90-
# Update local estimation?
74+
return await self.get_metadata()
9175
# Get local estimation (will not include changes done bo another client)
9276
return self._metadata
9377

94-
9578
@override
9679
async def get_metadata(self) -> RequestQueueMetadata:
9780
"""Get metadata about the request queue."""
98-
if self._metadata.had_multiple_clients:
99-
# Enhanced from API (can be delayed few seconds)
100-
response = await self._api_client.get()
101-
if response is None:
102-
raise ValueError('Failed to fetch request queue metadata from the API.')
103-
return RequestQueueMetadata(
104-
id=response['id'],
105-
name=response['name'],
106-
total_request_count=max(response['totalRequestCount'], self._metadata.total_request_count),
107-
handled_request_count=max(response['handledRequestCount'], self._metadata.handled_request_count),
108-
pending_request_count=response['pendingRequestCount'],
109-
created_at=response['createdAt'],
110-
modified_at=max(response['modifiedAt'], self._metadata.modified_at),
111-
accessed_at=max(response['accessedAt'], self._metadata.accessed_at),
112-
had_multiple_clients=response['hadMultipleClients'],
113-
)
114-
# Update local estimation?
115-
# Get local estimation (will not include changes done bo another client)
116-
return self._metadata
81+
response = await self._api_client.get()
82+
if response is None:
83+
raise ValueError('Failed to fetch request queue metadata from the API.')
84+
# Enhance API response by local estimations (API can be delayed few seconds, while local estimation not.)
85+
return RequestQueueMetadata(
86+
id=response['id'],
87+
name=response['name'],
88+
total_request_count=max(response['totalRequestCount'], self._metadata.total_request_count),
89+
handled_request_count=max(response['handledRequestCount'], self._metadata.handled_request_count),
90+
pending_request_count=response['pendingRequestCount'],
91+
created_at=min(response['createdAt'], self._metadata.created_at),
92+
modified_at=max(response['modifiedAt'], self._metadata.modified_at),
93+
accessed_at=max(response['accessedAt'], self._metadata.accessed_at),
94+
had_multiple_clients=response['hadMultipleClients'] or self._metadata.had_multiple_clients,
95+
)
11796

11897
@classmethod
11998
async def open(
@@ -570,7 +549,7 @@ async def _list_head(
570549
if cached_request and cached_request.hydrated:
571550
items.append(cached_request.hydrated)
572551

573-
metadata = await self.get_metadata()
552+
metadata = await self._get_metadata()
574553

575554
return RequestQueueHead(
576555
limit=limit,

tests/integration/conftest.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@
1818

1919
import apify._actor
2020
from ._utils import generate_unique_resource_name
21+
from apify import Actor
2122
from apify._models import ActorRun
2223

2324
if TYPE_CHECKING:
24-
from collections.abc import Awaitable, Callable, Coroutine, Iterator, Mapping
25+
from collections.abc import AsyncGenerator, Awaitable, Callable, Coroutine, Iterator, Mapping
2526
from decimal import Decimal
2627

2728
from apify_client.clients.resource_clients import ActorClientAsync
29+
from crawlee.storages import RequestQueue
2830

2931
_TOKEN_ENV_VAR = 'APIFY_TEST_USER_API_TOKEN'
3032
_API_URL_ENV_VAR = 'APIFY_INTEGRATION_TESTS_API_URL'
@@ -114,6 +116,17 @@ def apify_client_async(apify_token: str) -> ApifyClientAsync:
114116
return ApifyClientAsync(apify_token, api_url=api_url)
115117

116118

119+
@pytest.fixture
120+
async def request_queue_force_cloud() -> AsyncGenerator[RequestQueue]:
121+
"""Create an instance of the Apify request queue on the platform and drop it when the test is finished."""
122+
request_queue_name = generate_unique_resource_name('request_queue')
123+
124+
async with Actor:
125+
rq = await Actor.open_request_queue(name=request_queue_name, force_cloud=True)
126+
yield rq
127+
await rq.drop()
128+
129+
117130
@pytest.fixture(scope='session')
118131
def sdk_wheel_path(tmp_path_factory: pytest.TempPathFactory, testrun_uid: str) -> Path:
119132
"""Build the package wheel if it hasn't been built yet, and return the path to the wheel."""

tests/integration/test_actor_request_queue.py

Lines changed: 66 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
11
from __future__ import annotations
22

3+
import asyncio
34
from typing import TYPE_CHECKING
45

5-
from crawlee.storages import RequestQueue
6-
76
from ._utils import generate_unique_resource_name
87
from apify import Actor, Request
9-
from apify.storage_clients import ApifyStorageClient
108

119
if TYPE_CHECKING:
1210
from apify_client import ApifyClientAsync
11+
from crawlee.storages import RequestQueue
1312

1413
from .conftest import MakeActorFunction, RunActorFunction
1514

@@ -103,57 +102,83 @@ async def test_request_queue_is_finished() -> None:
103102
finally:
104103
await request_queue.drop()
105104

106-
# TODO, add more metadata tests
107105

108-
async def test_request_queue_had_multiple_clients_localaaaa(
106+
async def test_request_queue_enhanced_metadata(
107+
request_queue_force_cloud: RequestQueue,
109108
apify_client_async: ApifyClientAsync,
110109
) -> None:
111-
"""`RequestQueue` clients created with different `client_key` should appear as distinct clients."""
112-
#request_queue_name = generate_unique_resource_name('request_queue')
113-
rq_client = await ApifyStorageClient().create_rq_client(name=None, id=None)
114-
client_metadata = await rq_client.get_metadata()
115-
rq = RequestQueue(name=client_metadata.name, id=client_metadata.id, client=rq_client)
116-
await rq.fetch_next_request()
117-
await rq.fetch_next_request()
110+
"""Test metadata tracking.
111+
112+
Multiple clients scenarios are not guaranteed to give correct results without delay. But at least multiple clients,
113+
single producer, should be reliable on the producer side."""
114+
115+
for i in range(1, 10):
116+
await request_queue_force_cloud.add_request(Request.from_url(f'http://example.com/{i}'))
117+
# Reliable information as the API response is enhanced with local metadata estimation.
118+
assert (await request_queue_force_cloud.get_metadata()).total_request_count == i
119+
120+
# Accessed with client created explicitly with `client_key=None` should appear as distinct client
121+
api_client = apify_client_async.request_queue(request_queue_id=request_queue_force_cloud.id, client_key=None)
122+
await api_client.list_head()
123+
124+
# The presence of another non-producing client should not affect the metadata
125+
for i in range(10, 20):
126+
await request_queue_force_cloud.add_request(Request.from_url(f'http://example.com/{i}'))
127+
# Reliable information as the API response is enhanced with local metadata estimation.
128+
assert (await request_queue_force_cloud.get_metadata()).total_request_count == i
129+
130+
131+
async def test_request_queue_metadata_another_client(
132+
request_queue_force_cloud: RequestQueue,
133+
apify_client_async: ApifyClientAsync,
134+
) -> None:
135+
"""Test metadata tracking. The delayed metadata should be reliable even when changed by another client."""
136+
api_client = apify_client_async.request_queue(request_queue_id=request_queue_force_cloud.id, client_key=None)
137+
await api_client.add_request(Request.from_url('http://example.com/1').model_dump(by_alias=True, exclude={'id'}))
138+
139+
# Wait to be sure that the API has updated the global metadata
140+
await asyncio.sleep(10)
141+
142+
assert (await request_queue_force_cloud.get_metadata()).total_request_count == 1
118143

119-
# Check that it is correctly in the RequestQueueClient metadata
120-
assert (await rq.get_metadata()).had_multiple_clients is False
121144

122145
async def test_request_queue_had_multiple_clients_local(
146+
request_queue_force_cloud: RequestQueue,
123147
apify_client_async: ApifyClientAsync,
124148
) -> None:
125-
"""`RequestQueue` clients created with different `client_key` should appear as distinct clients."""
126-
request_queue_name = generate_unique_resource_name('request_queue')
149+
"""Test that `RequestQueue` correctly detects multiple clients.
127150
128-
async with Actor:
129-
rq_1 = await Actor.open_request_queue(name=request_queue_name, force_cloud=True)
130-
await rq_1.fetch_next_request()
151+
Clients created with different `client_key` should appear as distinct clients."""
152+
await request_queue_force_cloud.fetch_next_request()
131153

132-
# Accessed with client created explicitly with `client_key=None` should appear as distinct client
133-
api_client = apify_client_async.request_queue(request_queue_id=rq_1.id, client_key=None)
134-
await api_client.list_head()
154+
# Accessed with client created explicitly with `client_key=None` should appear as distinct client
155+
api_client = apify_client_async.request_queue(request_queue_id=request_queue_force_cloud.id, client_key=None)
156+
await api_client.list_head()
135157

136-
# Check that it is correctly in the RequestQueueClient metadata
137-
assert (await rq_1.get_metadata()).had_multiple_clients is True # Currently broken
138-
# Check that it is correctly in the API
139-
assert ((await api_client.get())['hadMultipleClients']) is True
158+
# Check that it is correctly in the RequestQueueClient metadata
159+
assert (await request_queue_force_cloud.get_metadata()).had_multiple_clients is True
160+
# Check that it is correctly in the API
161+
api_response = await api_client.get()
162+
assert api_response
163+
assert api_response['hadMultipleClients'] is True
140164

141165

142-
async def test_request_queue_not_had_multiple_clients_local(apify_client_async: ApifyClientAsync,) -> None:
166+
async def test_request_queue_not_had_multiple_clients_local(
167+
request_queue_force_cloud: RequestQueue, apify_client_async: ApifyClientAsync
168+
) -> None:
143169
"""Test that same `RequestQueue` created from Actor does not act as multiple clients."""
144-
request_queue_name = generate_unique_resource_name('request_queue')
145170

146-
async with Actor:
147-
rq_1 = await Actor.open_request_queue(name=request_queue_name, force_cloud=True)
148-
# Two calls to API to create situation where different `client_key` can set `had_multiple_clients` to True
149-
await rq_1.fetch_next_request()
150-
await rq_1.fetch_next_request()
171+
# Two calls to API to create situation where different `client_key` can set `had_multiple_clients` to True
172+
await request_queue_force_cloud.fetch_next_request()
173+
await request_queue_force_cloud.fetch_next_request()
151174

152-
# Check that it is correctly in the RequestQueueClient metadata
153-
assert (await rq_1.get_metadata()).had_multiple_clients is False
154-
# Check that it is correctly in the API
155-
api_client = apify_client_async.request_queue(request_queue_id=rq_1.id)
156-
assert ((await api_client.get())['hadMultipleClients']) is False
175+
# Check that it is correctly in the RequestQueueClient metadata
176+
assert (await request_queue_force_cloud.get_metadata()).had_multiple_clients is False
177+
# Check that it is correctly in the API
178+
api_client = apify_client_async.request_queue(request_queue_id=request_queue_force_cloud.id)
179+
api_response = await api_client.get()
180+
assert api_response
181+
assert api_response['hadMultipleClients'] is False
157182

158183

159184
async def test_request_queue_had_multiple_clients_platform(
@@ -175,11 +200,9 @@ async def main() -> None:
175200
await api_client.list_head()
176201

177202
# Check that it is correctly in the RequestQueueClient metadata
178-
assert (await rq_1.get_metadata()).had_multiple_clients is True # Currently broken
179-
# Check that it is correctly in the API
180-
assert ((await rq_1._client._api_client.get())['hadMultipleClients']) is True
203+
assert (await rq_1.get_metadata()).had_multiple_clients is True
181204

182-
actor = await make_actor(label='rq-same-ref-default', main_func=main)
205+
actor = await make_actor(label='rq-had-multiple-clients', main_func=main)
183206
run_result = await run_actor(actor)
184207

185208
assert run_result.status == 'SUCCEEDED'
@@ -199,10 +222,8 @@ async def main() -> None:
199222

200223
# Check that it is correctly in the RequestQueueClient metadata
201224
assert (await rq_1.get_metadata()).had_multiple_clients is False
202-
# Check that it is correctly in the API
203-
assert ((await rq_1._client._api_client.get())['hadMultipleClients']) is False
204225

205-
actor = await make_actor(label='rq-same-ref-default', main_func=main)
226+
actor = await make_actor(label='rq-not-had-multiple-clients', main_func=main)
206227
run_result = await run_actor(actor)
207228

208229
assert run_result.status == 'SUCCEEDED'

0 commit comments

Comments
 (0)