diff --git a/src/apify/storage_clients/_apify/_models.py b/src/apify/storage_clients/_apify/_models.py index 993ea8db..b1b3a425 100644 --- a/src/apify/storage_clients/_apify/_models.py +++ b/src/apify/storage_clients/_apify/_models.py @@ -5,7 +5,7 @@ from pydantic import BaseModel, ConfigDict, Field -from crawlee.storage_clients.models import KeyValueStoreMetadata +from crawlee.storage_clients.models import KeyValueStoreMetadata, RequestQueueMetadata from apify import Request from apify._utils import docs_group @@ -105,3 +105,27 @@ class CachedRequest(BaseModel): lock_expires_at: datetime | None = None """The expiration time of the lock on the request.""" + + +class RequestQueueStats(BaseModel): + model_config = ConfigDict(populate_by_name=True) + + delete_count: Annotated[int, Field(alias='deleteCount', default=0)] + """"The number of request queue deletes.""" + + head_item_read_count: Annotated[int, Field(alias='headItemReadCount', default=0)] + """The number of request queue head reads.""" + + read_count: Annotated[int, Field(alias='readCount', default=0)] + """The number of request queue reads.""" + + storage_bytes: Annotated[int, Field(alias='storageBytes', default=0)] + """Storage size in Bytes.""" + + write_count: Annotated[int, Field(alias='writeCount', default=0)] + """The number of request queue writes.""" + + +class ApifyRequestQueueMetadata(RequestQueueMetadata): + stats: Annotated[RequestQueueStats, Field(alias='stats', default_factory=RequestQueueStats)] + """Additional statistics about the request queue.""" diff --git a/src/apify/storage_clients/_apify/_request_queue_client.py b/src/apify/storage_clients/_apify/_request_queue_client.py index 3ffefcd0..39556d2d 100644 --- a/src/apify/storage_clients/_apify/_request_queue_client.py +++ b/src/apify/storage_clients/_apify/_request_queue_client.py @@ -18,7 +18,13 @@ from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata from crawlee.storages import RequestQueue -from ._models import CachedRequest, ProlongRequestLockResponse, RequestQueueHead +from ._models import ( + ApifyRequestQueueMetadata, + CachedRequest, + ProlongRequestLockResponse, + RequestQueueHead, + RequestQueueStats, +) from ._utils import AliasResolver from apify import Request @@ -108,7 +114,7 @@ async def _get_metadata_estimate(self) -> RequestQueueMetadata: return self._metadata @override - async def get_metadata(self) -> RequestQueueMetadata: + async def get_metadata(self) -> ApifyRequestQueueMetadata: """Get metadata about the request queue. Returns: @@ -119,7 +125,7 @@ async def get_metadata(self) -> RequestQueueMetadata: if response is None: raise ValueError('Failed to fetch request queue metadata from the API.') # Enhance API response by local estimations (API can be delayed few seconds, while local estimation not.) - return RequestQueueMetadata( + return ApifyRequestQueueMetadata( id=response['id'], name=response['name'], total_request_count=max(response['totalRequestCount'], self._metadata.total_request_count), @@ -129,6 +135,7 @@ async def get_metadata(self) -> RequestQueueMetadata: modified_at=max(response['modifiedAt'], self._metadata.modified_at), accessed_at=max(response['accessedAt'], self._metadata.accessed_at), had_multiple_clients=response['hadMultipleClients'] or self._metadata.had_multiple_clients, + stats=RequestQueueStats.model_validate(response['stats'], by_alias=True), ) @classmethod diff --git a/tests/integration/test_request_queue.py b/tests/integration/test_request_queue.py index ed913b89..17ef094f 100644 --- a/tests/integration/test_request_queue.py +++ b/tests/integration/test_request_queue.py @@ -1,7 +1,7 @@ from __future__ import annotations import asyncio -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast import pytest @@ -14,6 +14,7 @@ from crawlee.storages import RequestQueue from .conftest import MakeActorFunction, RunActorFunction + from apify.storage_clients._apify._models import ApifyRequestQueueMetadata async def test_add_and_fetch_requests( @@ -1278,3 +1279,26 @@ async def test_request_queue_not_had_multiple_clients( api_response = await api_client.get() assert api_response assert api_response['hadMultipleClients'] is False + + +async def test_request_queue_has_stats(request_queue_force_cloud: RequestQueue) -> None: + """Test that Apify based request queue has stats in metadata.""" + + add_request_count = 3 + read_request_count = 2 + + await request_queue_force_cloud.add_requests( + [Request.from_url(f'http://example.com/{i}') for i in range(add_request_count)] + ) + for _ in range(read_request_count): + await request_queue_force_cloud.get_request(Request.from_url('http://example.com/1').unique_key) + + # Wait for stats to become stable + await asyncio.sleep(10) + + metadata = await request_queue_force_cloud.get_metadata() + + assert hasattr(metadata, 'stats') + apify_metadata = cast('ApifyRequestQueueMetadata', metadata) + assert apify_metadata.stats.read_count == read_request_count + assert apify_metadata.stats.write_count == add_request_count