Skip to content

Commit 5763b48

Browse files
committed
Review comments
1 parent 31ca22e commit 5763b48

File tree

4 files changed

+94
-87
lines changed

4 files changed

+94
-87
lines changed

src/apify/storage_clients/_apify/_request_queue_client.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,12 @@ def __init__(
6969
"""Fetch lock to minimize race conditions when communicating with API."""
7070

7171
async def _get_metadata(self) -> RequestQueueMetadata:
72-
"""Try to get cached metadata first. If multiple clients, fuse with global metadata."""
72+
"""Try to get cached metadata first. If multiple clients, fuse with global metadata.
73+
74+
This method is used internally to avoid unnecessary API call unless needed (multiple clients).
75+
Local estimation of metadata is without delay, unlike metadata from API. In situation where there is only one
76+
client, it is the better choice.
77+
"""
7378
if self._metadata.had_multiple_clients:
7479
return await self.get_metadata()
7580
# Get local estimation (will not include changes done bo another client)

tests/integration/conftest.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,6 @@ def apify_token() -> str:
9898
return api_token
9999

100100

101-
@pytest.fixture(autouse=True)
102-
def set_token(apify_token: str, monkeypatch: pytest.MonkeyPatch) -> None:
103-
monkeypatch.setenv(ApifyEnvVars.TOKEN, apify_token)
104-
105-
106101
@pytest.fixture
107102
def apify_client_async(apify_token: str) -> ApifyClientAsync:
108103
"""Create an instance of the ApifyClientAsync.
@@ -117,9 +112,10 @@ def apify_client_async(apify_token: str) -> ApifyClientAsync:
117112

118113

119114
@pytest.fixture
120-
async def request_queue_force_cloud() -> AsyncGenerator[RequestQueue]:
115+
async def request_queue_force_cloud(apify_token: str, monkeypatch: pytest.MonkeyPatch) -> AsyncGenerator[RequestQueue]:
121116
"""Create an instance of the Apify request queue on the platform and drop it when the test is finished."""
122117
request_queue_name = generate_unique_resource_name('request_queue')
118+
monkeypatch.setenv(ApifyEnvVars.TOKEN, apify_token)
123119

124120
async with Actor:
125121
rq = await Actor.open_request_queue(name=request_queue_name, force_cloud=True)

tests/integration/test_actor_request_queue.py

Lines changed: 0 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -318,86 +318,6 @@ def return_unprocessed_requests(requests: list[dict], *_: Any, **__: Any) -> dic
318318
assert (stats_after['writeCount'] - stats_before['writeCount']) == 1
319319

320320

321-
async def test_request_queue_enhanced_metadata(
322-
request_queue_force_cloud: RequestQueue,
323-
apify_client_async: ApifyClientAsync,
324-
) -> None:
325-
"""Test metadata tracking.
326-
327-
Multiple clients scenarios are not guaranteed to give correct results without delay. But at least multiple clients,
328-
single producer, should be reliable on the producer side."""
329-
330-
for i in range(1, 10):
331-
await request_queue_force_cloud.add_request(Request.from_url(f'http://example.com/{i}'))
332-
# Reliable information as the API response is enhanced with local metadata estimation.
333-
assert (await request_queue_force_cloud.get_metadata()).total_request_count == i
334-
335-
# Accessed with client created explicitly with `client_key=None` should appear as distinct client
336-
api_client = apify_client_async.request_queue(request_queue_id=request_queue_force_cloud.id, client_key=None)
337-
await api_client.list_head()
338-
339-
# The presence of another non-producing client should not affect the metadata
340-
for i in range(10, 20):
341-
await request_queue_force_cloud.add_request(Request.from_url(f'http://example.com/{i}'))
342-
# Reliable information as the API response is enhanced with local metadata estimation.
343-
assert (await request_queue_force_cloud.get_metadata()).total_request_count == i
344-
345-
346-
async def test_request_queue_metadata_another_client(
347-
request_queue_force_cloud: RequestQueue,
348-
apify_client_async: ApifyClientAsync,
349-
) -> None:
350-
"""Test metadata tracking. The delayed metadata should be reliable even when changed by another client."""
351-
api_client = apify_client_async.request_queue(request_queue_id=request_queue_force_cloud.id, client_key=None)
352-
await api_client.add_request(Request.from_url('http://example.com/1').model_dump(by_alias=True, exclude={'id'}))
353-
354-
# Wait to be sure that the API has updated the global metadata
355-
await asyncio.sleep(10)
356-
357-
assert (await request_queue_force_cloud.get_metadata()).total_request_count == 1
358-
359-
360-
async def test_request_queue_had_multiple_clients_local(
361-
request_queue_force_cloud: RequestQueue,
362-
apify_client_async: ApifyClientAsync,
363-
) -> None:
364-
"""Test that `RequestQueue` correctly detects multiple clients.
365-
366-
Clients created with different `client_key` should appear as distinct clients."""
367-
await request_queue_force_cloud.fetch_next_request()
368-
369-
# Accessed with client created explicitly with `client_key=None` should appear as distinct client
370-
api_client = apify_client_async.request_queue(request_queue_id=request_queue_force_cloud.id, client_key=None)
371-
await api_client.list_head()
372-
373-
# Check that it is correctly in the RequestQueueClient metadata
374-
assert (await request_queue_force_cloud.get_metadata()).had_multiple_clients is True
375-
376-
# Check that it is correctly in the API
377-
api_response = await api_client.get()
378-
assert api_response
379-
assert api_response['hadMultipleClients'] is True
380-
381-
382-
async def test_request_queue_not_had_multiple_clients_local(
383-
request_queue_force_cloud: RequestQueue, apify_client_async: ApifyClientAsync
384-
) -> None:
385-
"""Test that same `RequestQueue` created from Actor does not act as multiple clients."""
386-
387-
# Two calls to API to create situation where different `client_key` can set `had_multiple_clients` to True
388-
await request_queue_force_cloud.fetch_next_request()
389-
await request_queue_force_cloud.fetch_next_request()
390-
391-
# Check that it is correctly in the RequestQueueClient metadata
392-
assert (await request_queue_force_cloud.get_metadata()).had_multiple_clients is False
393-
394-
# Check that it is correctly in the API
395-
api_client = apify_client_async.request_queue(request_queue_id=request_queue_force_cloud.id)
396-
api_response = await api_client.get()
397-
assert api_response
398-
assert api_response['hadMultipleClients'] is False
399-
400-
401321
async def test_request_queue_had_multiple_clients_platform(
402322
make_actor: MakeActorFunction,
403323
run_actor: RunActorFunction,

tests/integration/test_request_queue.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
11
from __future__ import annotations
22

3+
import asyncio
34
from typing import TYPE_CHECKING
45

56
import pytest
67

8+
from crawlee import Request
9+
710
from apify import Actor
811

912
if TYPE_CHECKING:
13+
from apify_client import ApifyClientAsync
14+
from crawlee.storages import RequestQueue
15+
1016
from .conftest import MakeActorFunction, RunActorFunction
1117

1218

@@ -1195,3 +1201,83 @@ async def consumer() -> int:
11951201
actor = await make_actor(label='rq-performance-pattern-test', main_func=main)
11961202
run_result = await run_actor(actor)
11971203
assert run_result.status == 'SUCCEEDED'
1204+
1205+
1206+
async def test_request_queue_enhanced_metadata(
1207+
request_queue_force_cloud: RequestQueue,
1208+
apify_client_async: ApifyClientAsync,
1209+
) -> None:
1210+
"""Test metadata tracking.
1211+
1212+
Multiple clients scenarios are not guaranteed to give correct results without delay. But at least multiple clients,
1213+
single producer, should be reliable on the producer side."""
1214+
1215+
for i in range(1, 10):
1216+
await request_queue_force_cloud.add_request(Request.from_url(f'http://example.com/{i}'))
1217+
# Reliable information as the API response is enhanced with local metadata estimation.
1218+
assert (await request_queue_force_cloud.get_metadata()).total_request_count == i
1219+
1220+
# Accessed with client created explicitly with `client_key=None` should appear as distinct client
1221+
api_client = apify_client_async.request_queue(request_queue_id=request_queue_force_cloud.id, client_key=None)
1222+
await api_client.list_head()
1223+
1224+
# The presence of another non-producing client should not affect the metadata
1225+
for i in range(10, 20):
1226+
await request_queue_force_cloud.add_request(Request.from_url(f'http://example.com/{i}'))
1227+
# Reliable information as the API response is enhanced with local metadata estimation.
1228+
assert (await request_queue_force_cloud.get_metadata()).total_request_count == i
1229+
1230+
1231+
async def test_request_queue_metadata_another_client(
1232+
request_queue_force_cloud: RequestQueue,
1233+
apify_client_async: ApifyClientAsync,
1234+
) -> None:
1235+
"""Test metadata tracking. The delayed metadata should be reliable even when changed by another client."""
1236+
api_client = apify_client_async.request_queue(request_queue_id=request_queue_force_cloud.id, client_key=None)
1237+
await api_client.add_request(Request.from_url('http://example.com/1').model_dump(by_alias=True, exclude={'id'}))
1238+
1239+
# Wait to be sure that the API has updated the global metadata
1240+
await asyncio.sleep(10)
1241+
1242+
assert (await request_queue_force_cloud.get_metadata()).total_request_count == 1
1243+
1244+
1245+
async def test_request_queue_had_multiple_clients(
1246+
request_queue_force_cloud: RequestQueue,
1247+
apify_client_async: ApifyClientAsync,
1248+
) -> None:
1249+
"""Test that `RequestQueue` correctly detects multiple clients.
1250+
1251+
Clients created with different `client_key` should appear as distinct clients."""
1252+
await request_queue_force_cloud.fetch_next_request()
1253+
1254+
# Accessed with client created explicitly with `client_key=None` should appear as distinct client
1255+
api_client = apify_client_async.request_queue(request_queue_id=request_queue_force_cloud.id, client_key=None)
1256+
await api_client.list_head()
1257+
1258+
# Check that it is correctly in the RequestQueueClient metadata
1259+
assert (await request_queue_force_cloud.get_metadata()).had_multiple_clients is True
1260+
1261+
# Check that it is correctly in the API
1262+
api_response = await api_client.get()
1263+
assert api_response
1264+
assert api_response['hadMultipleClients'] is True
1265+
1266+
1267+
async def test_request_queue_not_had_multiple_clients(
1268+
request_queue_force_cloud: RequestQueue, apify_client_async: ApifyClientAsync
1269+
) -> None:
1270+
"""Test that same `RequestQueue` created from Actor does not act as multiple clients."""
1271+
1272+
# Two calls to API to create situation where different `client_key` can set `had_multiple_clients` to True
1273+
await request_queue_force_cloud.fetch_next_request()
1274+
await request_queue_force_cloud.fetch_next_request()
1275+
1276+
# Check that it is correctly in the RequestQueueClient metadata
1277+
assert (await request_queue_force_cloud.get_metadata()).had_multiple_clients is False
1278+
1279+
# Check that it is correctly in the API
1280+
api_client = apify_client_async.request_queue(request_queue_id=request_queue_force_cloud.id)
1281+
api_response = await api_client.get()
1282+
assert api_response
1283+
assert api_response['hadMultipleClients'] is False

0 commit comments

Comments
 (0)