Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/crawlee/_autoscaling/snapshotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,11 +305,11 @@ def _snapshot_client(self) -> None:
Only errors produced by a 2nd retry of the API call are considered for snapshotting since earlier errors may
just be caused by a random spike in the number of requests and do not necessarily signify API overloading.
"""
# TODO: This is just a dummy placeholder. It can be implemented once `StorageClient` is ready.
# Attribute `self._client_rate_limit_error_retry_count` will be used here.
# https://github.com/apify/crawlee-python/issues/60
client = service_locator.get_storage_client()

error_count = 0
rate_limit_errors: dict[int, int] = client.get_rate_limit_errors()

error_count = rate_limit_errors.get(self._CLIENT_RATE_LIMIT_ERROR_RETRY_COUNT, 0)
snapshot = ClientSnapshot(error_count=error_count, max_error_count=self._max_client_errors)

snapshots = cast(list[Snapshot], self._client_snapshots)
Expand Down
4 changes: 4 additions & 0 deletions src/crawlee/storage_clients/_base/_base_storage_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,7 @@ async def purge_on_start(self) -> None:
It is primarily used to clean up residual data from previous runs to maintain a clean state.
If the storage client does not support purging, leave it empty.
"""

def get_rate_limit_errors(self) -> dict[int, int]:
"""Returns statistics about rate limit errors encountered by the HTTP client in storage client."""
return {}
Loading