diff --git a/src/apify_client/_http_client.py b/src/apify_client/_http_client.py index ca4e39c1..2a23516f 100644 --- a/src/apify_client/_http_client.py +++ b/src/apify_client/_http_client.py @@ -14,6 +14,7 @@ from apify_client._errors import ApifyApiError, InvalidResponseBodyError, is_retryable_error from apify_client._logging import log_context, logger_name +from apify_client._statistics import Statistics from apify_client._utils import retry_with_exp_backoff, retry_with_exp_backoff_async if TYPE_CHECKING: @@ -35,6 +36,7 @@ def __init__( max_retries: int = 8, min_delay_between_retries_millis: int = 500, timeout_secs: int = 360, + stats: Statistics | None = None, ) -> None: self.max_retries = max_retries self.min_delay_between_retries_millis = min_delay_between_retries_millis @@ -59,6 +61,8 @@ def __init__( self.httpx_client = httpx.Client(headers=headers, follow_redirects=True, timeout=timeout_secs) self.httpx_async_client = httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=timeout_secs) + self.stats = stats or Statistics() + @staticmethod def _maybe_parse_response(response: httpx.Response) -> Any: if response.status_code == HTTPStatus.NO_CONTENT: @@ -143,6 +147,8 @@ def call( log_context.method.set(method) log_context.url.set(url) + self.stats.calls += 1 + if stream and parse_response: raise ValueError('Cannot stream response and parse it at the same time!') @@ -153,6 +159,9 @@ def call( def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response: log_context.attempt.set(attempt) logger.debug('Sending request') + + self.stats.requests += 1 + try: request = httpx_client.build_request( method=method, @@ -177,6 +186,9 @@ def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response: return response + if response.status_code == HTTPStatus.TOO_MANY_REQUESTS: + self.stats.add_rate_limit_error(attempt) + except Exception as e: logger.debug('Request threw exception', exc_info=e) if not is_retryable_error(e): @@ -217,6 +229,8 @@ async def call( log_context.method.set(method) log_context.url.set(url) + self.stats.calls += 1 + if stream and parse_response: raise ValueError('Cannot stream response and parse it at the same time!') @@ -251,6 +265,9 @@ async def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response return response + if response.status_code == HTTPStatus.TOO_MANY_REQUESTS: + self.stats.add_rate_limit_error(attempt) + except Exception as e: logger.debug('Request threw exception', exc_info=e) if not is_retryable_error(e): diff --git a/src/apify_client/_statistics.py b/src/apify_client/_statistics.py new file mode 100644 index 00000000..d06d8d82 --- /dev/null +++ b/src/apify_client/_statistics.py @@ -0,0 +1,27 @@ +from collections import defaultdict +from dataclasses import dataclass, field + + +@dataclass +class Statistics: + """Statistics about API client usage and rate limit errors.""" + + calls: int = 0 + """Total number of API method calls made by the client.""" + + requests: int = 0 + """Total number of HTTP requests sent, including retries.""" + + rate_limit_errors: defaultdict[int, int] = field(default_factory=lambda: defaultdict(int)) + """List tracking which retry attempts encountered rate limit (429) errors.""" + + def add_rate_limit_error(self, attempt: int) -> None: + """Add rate limit error for specific attempt. + + Args: + attempt: The attempt number (1-based indexing). + """ + if attempt < 1: + raise ValueError('Attempt must be greater than 0') + + self.rate_limit_errors[attempt - 1] += 1 diff --git a/src/apify_client/client.py b/src/apify_client/client.py index 09a04a0d..e26bb736 100644 --- a/src/apify_client/client.py +++ b/src/apify_client/client.py @@ -3,6 +3,7 @@ from apify_shared.utils import ignore_docs from apify_client._http_client import HTTPClient, HTTPClientAsync +from apify_client._statistics import Statistics from apify_client.clients import ( ActorClient, ActorClientAsync, @@ -126,11 +127,13 @@ def __init__( timeout_secs=timeout_secs, ) + self.stats = Statistics() self.http_client = HTTPClient( token=token, max_retries=self.max_retries, min_delay_between_retries_millis=self.min_delay_between_retries_millis, timeout_secs=self.timeout_secs, + stats=self.stats, ) def actor(self, actor_id: str) -> ActorClient: diff --git a/tests/unit/test_statistics.py b/tests/unit/test_statistics.py new file mode 100644 index 00000000..9775aa01 --- /dev/null +++ b/tests/unit/test_statistics.py @@ -0,0 +1,44 @@ +import pytest + +from apify_client._statistics import Statistics + + +@pytest.mark.parametrize( + ('attempts', 'expected_errors'), + [ + pytest.param([1], {0: 1}, id='single error'), + pytest.param([1, 5], {0: 1, 4: 1}, id='two single errors'), + pytest.param([5, 1], {0: 1, 4: 1}, id='two single errors reversed'), + pytest.param([3, 5, 1], {0: 1, 2: 1, 4: 1}, id='three single errors'), + pytest.param([1, 5, 3], {0: 1, 2: 1, 4: 1}, id='three single errors reordered'), + pytest.param([2, 1, 2, 1, 5, 2, 1], {0: 3, 1: 3, 4: 1}, id='multiple errors per attempt'), + ], +) +def test_add_rate_limit_error(attempts: list[int], expected_errors: list[int]) -> None: + """Test that add_rate_limit_error correctly tracks errors for different attempt sequences.""" + stats = Statistics() + for attempt in attempts: + stats.add_rate_limit_error(attempt) + assert stats.rate_limit_errors == expected_errors + + +def test_add_rate_limit_error_invalid_attempt() -> None: + """Test that add_rate_limit_error raises ValueError for invalid attempt.""" + stats = Statistics() + with pytest.raises(ValueError, match='Attempt must be greater than 0'): + stats.add_rate_limit_error(0) + + +def test_statistics_initial_state() -> None: + """Test initial state of Statistics instance.""" + stats = Statistics() + assert stats.calls == 0 + assert stats.requests == 0 + assert stats.rate_limit_errors == {} + + +def test_add_rate_limit_error_type_validation() -> None: + """Test type validation in add_rate_limit_error.""" + stats = Statistics() + with pytest.raises(TypeError): + stats.add_rate_limit_error('1') # type: ignore[arg-type]