Skip to content

Commit 10bc7e2

Browse files
committed
Merge remote-tracking branch 'origin/master' into no-locking-queue
2 parents 7ec13ef + 21f6782 commit 10bc7e2

File tree

3 files changed

+63
-24
lines changed

3 files changed

+63
-24
lines changed

src/apify/storage_clients/_apify/_models.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from pydantic import BaseModel, ConfigDict, Field
77

8-
from crawlee.storage_clients.models import KeyValueStoreMetadata
8+
from crawlee.storage_clients.models import KeyValueStoreMetadata, RequestQueueMetadata
99

1010
from apify import Request
1111
from apify._utils import docs_group
@@ -105,3 +105,27 @@ class CachedRequest(BaseModel):
105105

106106
lock_expires_at: datetime | None = None
107107
"""The expiration time of the lock on the request."""
108+
109+
110+
class RequestQueueStats(BaseModel):
111+
model_config = ConfigDict(populate_by_name=True)
112+
113+
delete_count: Annotated[int, Field(alias='deleteCount', default=0)]
114+
""""The number of request queue deletes."""
115+
116+
head_item_read_count: Annotated[int, Field(alias='headItemReadCount', default=0)]
117+
"""The number of request queue head reads."""
118+
119+
read_count: Annotated[int, Field(alias='readCount', default=0)]
120+
"""The number of request queue reads."""
121+
122+
storage_bytes: Annotated[int, Field(alias='storageBytes', default=0)]
123+
"""Storage size in Bytes."""
124+
125+
write_count: Annotated[int, Field(alias='writeCount', default=0)]
126+
"""The number of request queue writes."""
127+
128+
129+
class ApifyRequestQueueMetadata(RequestQueueMetadata):
130+
stats: Annotated[RequestQueueStats, Field(alias='stats', default_factory=RequestQueueStats)]
131+
"""Additional statistics about the request queue."""

src/apify/storage_clients/_apify/_request_queue_client.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from crawlee.storage_clients.models import RequestQueueMetadata
1515
from crawlee.storages import RequestQueue
1616

17+
from ._models import ApifyRequestQueueMetadata, RequestQueueStats
1718
from ._utils import AliasResolver
1819

1920
if TYPE_CHECKING:
@@ -70,7 +71,7 @@ def __init__(
7071
"""Additional data related to the RequestQueue."""
7172

7273
@override
73-
async def get_metadata(self) -> RequestQueueMetadata:
74+
async def get_metadata(self) -> ApifyRequestQueueMetadata:
7475
"""Get metadata about the request queue.
7576
7677
Returns:
@@ -81,7 +82,7 @@ async def get_metadata(self) -> RequestQueueMetadata:
8182
if response is None:
8283
raise ValueError('Failed to fetch request queue metadata from the API.')
8384
# Enhance API response by local estimations (API can be delayed few seconds, while local estimation not.)
84-
return RequestQueueMetadata(
85+
return ApifyRequestQueueMetadata(
8586
id=response['id'],
8687
name=response['name'],
8788
total_request_count=max(response['totalRequestCount'], self._metadata.total_request_count),
@@ -91,6 +92,7 @@ async def get_metadata(self) -> RequestQueueMetadata:
9192
modified_at=max(response['modifiedAt'], self._metadata.modified_at),
9293
accessed_at=max(response['accessedAt'], self._metadata.accessed_at),
9394
had_multiple_clients=response['hadMultipleClients'] or self._metadata.had_multiple_clients,
95+
stats=RequestQueueStats.model_validate(response['stats'], by_alias=True),
9496
)
9597

9698
@classmethod

tests/integration/test_request_queue.py

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import asyncio
44
from datetime import datetime, timezone
5-
from typing import TYPE_CHECKING
5+
from typing import TYPE_CHECKING, cast
66

77
import pytest
88

@@ -20,6 +20,7 @@
2020
from crawlee._types import BasicCrawlingContext
2121

2222
from .conftest import MakeActorFunction, RunActorFunction
23+
from apify.storage_clients._apify._models import ApifyRequestQueueMetadata
2324

2425

2526
async def test_add_and_fetch_requests(default_request_queue_apify: RequestQueue) -> None:
@@ -1126,14 +1127,9 @@ async def default_handler(context: BasicCrawlingContext) -> None:
11261127
# Check the request queue stats
11271128
await asyncio.sleep(10) # Wait to be sure that metadata are updated
11281129

1129-
# Get raw client, because stats are not exposed in `RequestQueue` class, but are available in raw client
1130-
# https://github.com/apify/apify-sdk-python/pull/574
1131-
rq_client = Actor.apify_client.request_queue(request_queue_id=rq.id)
1132-
_rq = await rq_client.get()
1133-
assert _rq
1134-
request_queue_stats = _rq.get('stats', {})
1135-
Actor.log.info(f'{request_queue_stats=}')
1136-
assert request_queue_stats['writeCount'] == requests * expected_write_count_per_request
1130+
metadata = cast('ApifyRequestQueueMetadata', await rq.get_metadata())
1131+
Actor.log.info(f'{metadata.stats=}')
1132+
assert metadata.stats.write_count == requests * expected_write_count_per_request
11371133
await rq.drop()
11381134

11391135

@@ -1153,13 +1149,8 @@ async def test_cache_initialization(apify_token: str, monkeypatch: pytest.Monkey
11531149

11541150
# Check that it is correctly in the API
11551151
await asyncio.sleep(10) # Wait to be sure that metadata are updated
1156-
1157-
# Get raw client, because stats are not exposed in `RequestQueue` class, but are available in raw client
1158-
# https://github.com/apify/apify-sdk-python/pull/574
1159-
rq_client = Actor.apify_client.request_queue(request_queue_id=rq.id)
1160-
_rq = await rq_client.get()
1161-
assert _rq
1162-
stats_before = _rq.get('stats', {})
1152+
metadata = cast('ApifyRequestQueueMetadata', await rq.get_metadata())
1153+
stats_before = metadata.stats
11631154
Actor.log.info(stats_before)
11641155

11651156
# Clear service locator cache to simulate creating RQ instance from scratch
@@ -1170,15 +1161,37 @@ async def test_cache_initialization(apify_token: str, monkeypatch: pytest.Monkey
11701161
await rq.add_requests(requests)
11711162

11721163
await asyncio.sleep(10) # Wait to be sure that metadata are updated
1173-
_rq = await rq_client.get()
1174-
assert _rq
1175-
stats_after = _rq.get('stats', {})
1164+
metadata = cast('ApifyRequestQueueMetadata', await rq.get_metadata())
1165+
stats_after = metadata.stats
11761166
Actor.log.info(stats_after)
11771167

11781168
# Cache was actually initialized, readCount increased
1179-
assert (stats_after['readCount'] - stats_before['readCount']) == len(requests)
1169+
assert (stats_after.read_count - stats_before.read_count) == len(requests)
11801170
# Deduplication happened locally, writeCount should be the same
1181-
assert stats_after['writeCount'] == stats_before['writeCount']
1171+
assert stats_after.write_count == stats_before.write_count
11821172

11831173
finally:
11841174
await rq.drop()
1175+
1176+
1177+
async def test_request_queue_has_stats(request_queue_force_cloud: RequestQueue) -> None:
1178+
"""Test that Apify based request queue has stats in metadata."""
1179+
1180+
add_request_count = 3
1181+
read_request_count = 2
1182+
1183+
await request_queue_force_cloud.add_requests(
1184+
[Request.from_url(f'http://example.com/{i}') for i in range(add_request_count)]
1185+
)
1186+
for _ in range(read_request_count):
1187+
await request_queue_force_cloud.get_request(Request.from_url('http://example.com/1').unique_key)
1188+
1189+
# Wait for stats to become stable
1190+
await asyncio.sleep(10)
1191+
1192+
metadata = await request_queue_force_cloud.get_metadata()
1193+
1194+
assert hasattr(metadata, 'stats')
1195+
apify_metadata = cast('ApifyRequestQueueMetadata', metadata)
1196+
assert apify_metadata.stats.read_count == read_request_count
1197+
assert apify_metadata.stats.write_count == add_request_count

0 commit comments

Comments
 (0)