Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ classifiers = [
]
keywords = ["apify", "api", "client", "automation", "crawling", "scraping"]
dependencies = [
"apify-shared<2.0.0",
"apify-shared>=2.0.0,<3.0.0",
"colorama>=0.4.0",
"impit>=0.5.3",
"more_itertools>=10.0.0",
Expand Down
4 changes: 1 addition & 3 deletions src/apify_client/_http_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from urllib.parse import urlencode

import impit
from apify_shared.utils import ignore_docs

from apify_client._logging import log_context, logger_name
from apify_client._statistics import Statistics
Expand All @@ -21,7 +20,7 @@
if TYPE_CHECKING:
from collections.abc import Callable

from apify_shared.types import JSONSerializable
from apify_client._types import JSONSerializable

DEFAULT_BACKOFF_EXPONENTIAL_FACTOR = 2
DEFAULT_BACKOFF_RANDOM_FACTOR = 1
Expand All @@ -30,7 +29,6 @@


class _BaseHTTPClient:
@ignore_docs
def __init__(
self,
*,
Expand Down
42 changes: 42 additions & 0 deletions src/apify_client/_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from __future__ import annotations

from typing import Any, Generic, TypeVar

JSONSerializable = str | int | float | bool | None | dict[str, Any] | list[Any]
"""Type for representing json-serializable values. It's close enough to the real thing supported
by json.parse, and the best we can do until mypy supports recursive types. It was suggested in
a discussion with (and approved by) Guido van Rossum, so I'd consider it correct enough.
"""

T = TypeVar('T')


class ListPage(Generic[T]):
"""A single page of items returned from a list() method."""

items: list[T]
"""List of returned objects on this page."""

count: int
"""Count of the returned objects on this page."""

offset: int
"""The limit on the number of returned objects offset specified in the API call."""

limit: int
"""The offset of the first object specified in the API call"""

total: int
"""Total number of objects matching the API call criteria."""

desc: bool
"""Whether the listing is descending or not."""

def __init__(self: ListPage, data: dict) -> None:
"""Initialize a ListPage instance from the API response data."""
self.items = data.get('items', [])
self.offset = data.get('offset', 0)
self.limit = data.get('limit', 0)
self.count = data['count'] if 'count' in data else len(self.items)
self.total = data.get('total', self.offset + self.count)
self.desc = data.get('desc', False)
106 changes: 98 additions & 8 deletions src/apify_client/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,20 @@

import asyncio
import base64
import contextlib
import io
import json
import json as jsonlib
import random
import re
import time
from collections.abc import Callable
from datetime import datetime, timezone
from enum import Enum
from http import HTTPStatus
from typing import TYPE_CHECKING, Any, TypeVar, cast

import impit
from apify_shared.utils import (
is_content_type_json,
is_content_type_text,
is_content_type_xml,
is_file_or_bytes,
maybe_extract_enum_member_value,
)

from apify_client.errors import InvalidResponseBodyError

Expand All @@ -29,11 +28,102 @@

PARSE_DATE_FIELDS_MAX_DEPTH = 3
PARSE_DATE_FIELDS_KEY_SUFFIX = 'At'

RECORD_NOT_FOUND_EXCEPTION_TYPES = ['record-not-found', 'record-or-token-not-found']

T = TypeVar('T')
StopRetryingType = Callable[[], None]
ListOrDict = TypeVar('ListOrDict', list, dict)


def filter_out_none_values_recursively(dictionary: dict) -> dict:
"""Return copy of the dictionary, recursively omitting all keys for which values are None."""
return cast('dict', filter_out_none_values_recursively_internal(dictionary))


def filter_out_none_values_recursively_internal(
dictionary: dict,
*,
remove_empty_dicts: bool | None = None,
) -> dict | None:
"""Recursively filters out None values from a dictionary.

Unfortunately, it's necessary to have an internal function for the correct result typing,
without having to create complicated overloads
"""
result = {}
for k, v in dictionary.items():
if isinstance(v, dict):
v = filter_out_none_values_recursively_internal( # noqa: PLW2901
v, remove_empty_dicts=remove_empty_dicts is True or remove_empty_dicts is None
)
if v is not None:
result[k] = v
if not result and remove_empty_dicts:
return None
return result


def parse_date_fields(data: ListOrDict, max_depth: int = PARSE_DATE_FIELDS_MAX_DEPTH) -> ListOrDict:
"""Recursively parse date fields in a list or dictionary up to the specified depth."""
if max_depth < 0:
return data

if isinstance(data, list):
return [parse_date_fields(item, max_depth - 1) for item in data]

if isinstance(data, dict):

def parse(key: str, value: object) -> object:
parsed_value = value
if key.endswith(PARSE_DATE_FIELDS_KEY_SUFFIX) and isinstance(value, str):
with contextlib.suppress(ValueError):
parsed_value = datetime.strptime(value, '%Y-%m-%dT%H:%M:%S.%fZ').replace(tzinfo=timezone.utc)
elif isinstance(value, dict):
parsed_value = parse_date_fields(value, max_depth - 1)
elif isinstance(value, list):
parsed_value = parse_date_fields(value, max_depth)
return parsed_value

return {key: parse(key, value) for (key, value) in data.items()}

return data


def is_content_type_json(content_type: str) -> bool:
"""Check if the given content type is JSON."""
return bool(re.search(r'^application/json', content_type, flags=re.IGNORECASE))


def is_content_type_xml(content_type: str) -> bool:
"""Check if the given content type is XML."""
return bool(re.search(r'^application/.*xml$', content_type, flags=re.IGNORECASE))


def is_content_type_text(content_type: str) -> bool:
"""Check if the given content type is text."""
return bool(re.search(r'^text/', content_type, flags=re.IGNORECASE))


def is_file_or_bytes(value: Any) -> bool:
"""Check if the input value is a file-like object or bytes.

The check for IOBase is not ideal, it would be better to use duck typing,
but then the check would be super complex, judging from how the 'requests' library does it.
This way should be good enough for the vast majority of use cases, if it causes issues, we can improve it later.
"""
return isinstance(value, (bytes, bytearray, io.IOBase))


def json_dumps(obj: Any) -> str:
"""Dump JSON to a string with the correct settings and serializer."""
return json.dumps(obj, ensure_ascii=False, indent=2, default=str)


def maybe_extract_enum_member_value(maybe_enum_member: Any) -> Any:
"""Extract the value of an enumeration member if it is an Enum, otherwise return the original value."""
if isinstance(maybe_enum_member, Enum):
return maybe_enum_member.value
return maybe_enum_member


def to_safe_id(id: str) -> str:
Expand Down
3 changes: 0 additions & 3 deletions src/apify_client/client.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from __future__ import annotations

from apify_shared.utils import ignore_docs

from apify_client._http_client import HTTPClient, HTTPClientAsync
from apify_client._statistics import Statistics
from apify_client.clients import (
Expand Down Expand Up @@ -61,7 +59,6 @@
class _BaseApifyClient:
http_client: HTTPClient | HTTPClientAsync

@ignore_docs
def __init__(
self,
token: str | None = None,
Expand Down
5 changes: 1 addition & 4 deletions src/apify_client/clients/base/actor_job_base_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
from datetime import datetime, timezone

from apify_shared.consts import ActorJobStatus
from apify_shared.utils import ignore_docs, parse_date_fields

from apify_client._utils import catch_not_found_or_throw, pluck_data
from apify_client._utils import catch_not_found_or_throw, parse_date_fields, pluck_data
from apify_client.clients.base.resource_client import ResourceClient, ResourceClientAsync
from apify_client.errors import ApifyApiError

Expand All @@ -19,7 +18,6 @@
DEFAULT_WAIT_WHEN_JOB_NOT_EXIST_SEC = 3


@ignore_docs
class ActorJobBaseClient(ResourceClient):
"""Base sub-client class for Actor runs and Actor builds."""

Expand Down Expand Up @@ -74,7 +72,6 @@ def _abort(self, *, gracefully: bool | None = None) -> dict:
return parse_date_fields(pluck_data(jsonlib.loads(response.text)))


@ignore_docs
class ActorJobBaseClientAsync(ResourceClientAsync):
"""Base async sub-client class for Actor runs and Actor builds."""

Expand Down
6 changes: 0 additions & 6 deletions src/apify_client/clients/base/base_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

from typing import TYPE_CHECKING, Any

from apify_shared.utils import ignore_docs

from apify_client._logging import WithLogDetailsClient
from apify_client._utils import to_safe_id

Expand Down Expand Up @@ -45,14 +43,12 @@ def _sub_resource_init_options(self, **kwargs: Any) -> dict:
}


@ignore_docs
class BaseClient(_BaseBaseClient):
"""Base class for sub-clients."""

http_client: HTTPClient
root_client: ApifyClient

@ignore_docs
def __init__(
self,
*,
Expand Down Expand Up @@ -88,14 +84,12 @@ def __init__(
self.url = f'{self.url}/{self.safe_id}'


@ignore_docs
class BaseClientAsync(_BaseBaseClient):
"""Base class for async sub-clients."""

http_client: HTTPClientAsync
root_client: ApifyClientAsync

@ignore_docs
def __init__(
self,
*,
Expand Down
6 changes: 1 addition & 5 deletions src/apify_client/clients/base/resource_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,11 @@

import json as jsonlib

from apify_shared.utils import ignore_docs, parse_date_fields

from apify_client._utils import catch_not_found_or_throw, pluck_data
from apify_client._utils import catch_not_found_or_throw, parse_date_fields, pluck_data
from apify_client.clients.base.base_client import BaseClient, BaseClientAsync
from apify_client.errors import ApifyApiError


@ignore_docs
class ResourceClient(BaseClient):
"""Base class for sub-clients manipulating a single resource."""

Expand Down Expand Up @@ -53,7 +50,6 @@ def _delete(self, timeout_secs: int | None = None) -> None:
catch_not_found_or_throw(exc)


@ignore_docs
class ResourceClientAsync(BaseClientAsync):
"""Base class for async sub-clients manipulating a single resource."""

Expand Down
7 changes: 1 addition & 6 deletions src/apify_client/clients/base/resource_collection_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
import json as jsonlib
from typing import Any, Generic, TypeVar

from apify_shared.utils import ignore_docs, parse_date_fields

from apify_client._utils import pluck_data
from apify_client._utils import parse_date_fields, pluck_data
from apify_client.clients.base.base_client import BaseClient, BaseClientAsync

T = TypeVar('T')
Expand All @@ -32,7 +30,6 @@ class ListPage(Generic[T]):
desc: bool
"""Whether the listing is descending or not"""

@ignore_docs
def __init__(self, data: dict) -> None:
"""Initialize a ListPage instance from the API response data."""
self.items = data.get('items', [])
Expand All @@ -43,7 +40,6 @@ def __init__(self, data: dict) -> None:
self.desc = data.get('desc', False)


@ignore_docs
class ResourceCollectionClient(BaseClient):
"""Base class for sub-clients manipulating a resource collection."""

Expand Down Expand Up @@ -77,7 +73,6 @@ def _get_or_create(self, name: str | None = None, resource: dict | None = None)
return parse_date_fields(pluck_data(jsonlib.loads(response.text)))


@ignore_docs
class ResourceCollectionClientAsync(BaseClientAsync):
"""Base class for async sub-clients manipulating a resource collection."""

Expand Down
10 changes: 4 additions & 6 deletions src/apify_client/clients/resource_clients/actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
import json as jsonlib
from typing import TYPE_CHECKING, Any, Literal

from apify_shared.utils import (
from apify_client._utils import (
encode_key_value_store_record_value,
encode_webhook_list_to_base64,
filter_out_none_values_recursively,
ignore_docs,
maybe_extract_enum_member_value,
parse_date_fields,
pluck_data,
)

from apify_client._utils import encode_key_value_store_record_value, encode_webhook_list_to_base64, pluck_data
from apify_client.clients.base import ResourceClient, ResourceClientAsync
from apify_client.clients.resource_clients.actor_version import ActorVersionClient, ActorVersionClientAsync
from apify_client.clients.resource_clients.actor_version_collection import (
Expand Down Expand Up @@ -98,7 +98,6 @@ def get_actor_representation(
class ActorClient(ResourceClient):
"""Sub-client for manipulating a single Actor."""

@ignore_docs
def __init__(self, *args: Any, **kwargs: Any) -> None:
resource_path = kwargs.pop('resource_path', 'acts')
super().__init__(*args, resource_path=resource_path, **kwargs)
Expand Down Expand Up @@ -503,7 +502,6 @@ def validate_input(
class ActorClientAsync(ResourceClientAsync):
"""Async sub-client for manipulating a single Actor."""

@ignore_docs
def __init__(self, *args: Any, **kwargs: Any) -> None:
resource_path = kwargs.pop('resource_path', 'acts')
super().__init__(*args, resource_path=resource_path, **kwargs)
Expand Down
Loading
Loading