Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ keywords = [
dependencies = [
"apify-client>=2.0.0,<3.0.0",
"apify-shared>=2.0.0,<3.0.0",
"crawlee==1.0.0rc1",
"crawlee==0.6.13b37",
"cachetools>=5.5.0",
"cryptography>=42.0.0",
"impit>=0.5.3",
Expand Down
36 changes: 24 additions & 12 deletions src/apify/_actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,7 @@ async def open_dataset(
self,
*,
id: str | None = None,
alias: str | None = None,
name: str | None = None,
force_cloud: bool = False,
) -> Dataset:
Expand All @@ -411,10 +412,12 @@ async def open_dataset(
the Apify cloud.

Args:
id: ID of the dataset to be opened. If neither `id` nor `name` are provided, the method returns
the default dataset associated with the Actor run.
name: Name of the dataset to be opened. If neither `id` nor `name` are provided, the method returns
the default dataset associated with the Actor run.
id: The ID of the dataset to open. If provided, searches for existing dataset by ID.
Mutually exclusive with name and alias.
name: The name of the dataset to open (global scope, persists across runs).
Mutually exclusive with id and alias.
alias: The alias of the dataset to open (run scope, creates unnamed storage).
Mutually exclusive with id and name.
force_cloud: If set to `True` then the Apify cloud storage is always used. This way it is possible
to combine local and cloud storage.

Expand All @@ -428,6 +431,7 @@ async def open_dataset(

return await Dataset.open(
id=id,
alias=alias,
name=name,
configuration=self._configuration,
storage_client=storage_client,
Expand All @@ -437,6 +441,7 @@ async def open_key_value_store(
self,
*,
id: str | None = None,
alias: str | None = None,
name: str | None = None,
force_cloud: bool = False,
) -> KeyValueStore:
Expand All @@ -446,10 +451,12 @@ async def open_key_value_store(
and retrieved using a unique key. The actual data is stored either on a local filesystem or in the Apify cloud.

Args:
id: ID of the key-value store to be opened. If neither `id` nor `name` are provided, the method returns
the default key-value store associated with the Actor run.
name: Name of the key-value store to be opened. If neither `id` nor `name` are provided, the method
returns the default key-value store associated with the Actor run.
id: The ID of the KVS to open. If provided, searches for existing KVS by ID.
Mutually exclusive with name and alias.
name: The name of the KVS to open (global scope, persists across runs).
Mutually exclusive with id and alias.
alias: The alias of the KVS to open (run scope, creates unnamed storage).
Mutually exclusive with id and name.
force_cloud: If set to `True` then the Apify cloud storage is always used. This way it is possible
to combine local and cloud storage.

Expand All @@ -462,6 +469,7 @@ async def open_key_value_store(

return await KeyValueStore.open(
id=id,
alias=alias,
name=name,
configuration=self._configuration,
storage_client=storage_client,
Expand All @@ -471,6 +479,7 @@ async def open_request_queue(
self,
*,
id: str | None = None,
alias: str | None = None,
name: str | None = None,
force_cloud: bool = False,
) -> RequestQueue:
Expand All @@ -482,10 +491,12 @@ async def open_request_queue(
crawling orders.

Args:
id: ID of the request queue to be opened. If neither `id` nor `name` are provided, the method returns
the default request queue associated with the Actor run.
name: Name of the request queue to be opened. If neither `id` nor `name` are provided, the method returns
the default request queue associated with the Actor run.
id: The ID of the RQ to open. If provided, searches for existing RQ by ID.
Mutually exclusive with name and alias.
name: The name of the RQ to open (global scope, persists across runs).
Mutually exclusive with id and alias.
alias: The alias of the RQ to open (run scope, creates unnamed storage).
Mutually exclusive with id and name.
force_cloud: If set to `True` then the Apify cloud storage is always used. This way it is possible
to combine local and cloud storage.

Expand All @@ -499,6 +510,7 @@ async def open_request_queue(

return await RequestQueue.open(
id=id,
alias=alias,
name=name,
configuration=self._configuration,
storage_client=storage_client,
Expand Down
4 changes: 2 additions & 2 deletions src/apify/events/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from crawlee.events import EventManager, LocalEventManager
from crawlee.events import Event, EventManager, LocalEventManager

from ._apify_event_manager import ApifyEventManager

__all__ = ['ApifyEventManager', 'EventManager', 'LocalEventManager']
__all__ = ['ApifyEventManager', 'Event', 'EventManager', 'LocalEventManager']
60 changes: 38 additions & 22 deletions src/apify/storage_clients/_apify/_dataset_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from crawlee.storage_clients._base import DatasetClient
from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata

from ._utils import resolve_alias_to_id, store_alias_mapping

if TYPE_CHECKING:
from collections.abc import AsyncIterator

Expand Down Expand Up @@ -66,6 +68,7 @@ async def open(
*,
id: str | None,
name: str | None,
alias: str | None,
configuration: Configuration,
) -> ApifyDatasetClient:
"""Open an Apify dataset client.
Expand All @@ -74,22 +77,27 @@ async def open(
It handles authentication, storage lookup/creation, and metadata retrieval.

Args:
id: The ID of an existing dataset to open. If provided, the client will connect to this specific storage.
Cannot be used together with `name`.
name: The name of a dataset to get or create. If a storage with this name exists, it will be opened;
otherwise, a new one will be created. Cannot be used together with `id`.
id: The ID of the dataset to open. If provided, searches for existing dataset by ID.
Mutually exclusive with name and alias.
name: The name of the dataset to open (global scope, persists across runs).
Mutually exclusive with id and alias.
alias: The alias of the dataset to open (run scope, creates unnamed storage).
Mutually exclusive with id and name.
configuration: The configuration object containing API credentials and settings. Must include a valid
`token` and `api_base_url`. May also contain a `default_dataset_id` for fallback when neither
`id` nor `name` is provided.
`id`, `name`, nor `alias` is provided.

Returns:
An instance for the opened or created storage client.

Raises:
ValueError: If the configuration is missing required fields (token, api_base_url), if both `id` and `name`
are provided, or if neither `id` nor `name` is provided and no default storage ID is available in
the configuration.
ValueError: If the configuration is missing required fields (token, api_base_url), if more than one of
`id`, `name`, or `alias` is provided, or if none are provided and no default storage ID is available
in the configuration.
"""
if sum(1 for param in [id, name, alias] if param is not None) > 1:
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')

token = configuration.token
if not token:
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
Expand All @@ -115,27 +123,35 @@ async def open(
)
apify_datasets_client = apify_client_async.datasets()

# If both id and name are provided, raise an error.
if id and name:
raise ValueError('Only one of "id" or "name" can be specified, not both.')
# Normalize 'default' alias to None
alias = None if alias == 'default' else alias

# If id is provided, get the storage by ID.
if id and name is None:
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
# Handle alias resolution
if alias:
# Try to resolve alias to existing storage ID
resolved_id = await resolve_alias_to_id(alias, 'dataset', configuration)
if resolved_id:
id = resolved_id
else:
# Create a new storage and store the alias mapping
new_storage_metadata = DatasetMetadata.model_validate(
await apify_datasets_client.get_or_create(),
)
id = new_storage_metadata.id
await store_alias_mapping(alias, 'dataset', id, configuration)

# If name is provided, get or create the storage by name.
if name and id is None:
elif name:
id = DatasetMetadata.model_validate(
await apify_datasets_client.get_or_create(name=name),
).id
apify_dataset_client = apify_client_async.dataset(dataset_id=id)

# If both id and name are None, try to get the default storage ID from environment variables.
# The default storage ID environment variable is set by the Apify platform. It also contains
# a new storage ID after Actor's reboot or migration.
if id is None and name is None:
# If none are provided, try to get the default storage ID from environment variables.
elif id is None:
id = configuration.default_dataset_id
apify_dataset_client = apify_client_async.dataset(dataset_id=id)

# Now create the client for the determined ID
apify_dataset_client = apify_client_async.dataset(dataset_id=id)

# Fetch its metadata.
metadata = await apify_dataset_client.get()
Expand All @@ -150,7 +166,7 @@ async def open(
# Verify that the storage exists by fetching its metadata again.
metadata = await apify_dataset_client.get()
if metadata is None:
raise ValueError(f'Opening dataset with id={id} and name={name} failed.')
raise ValueError(f'Opening dataset with id={id}, name={name}, and alias={alias} failed.')

return cls(
api_client=apify_dataset_client,
Expand Down
59 changes: 37 additions & 22 deletions src/apify/storage_clients/_apify/_key_value_store_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from crawlee.storage_clients.models import KeyValueStoreRecord, KeyValueStoreRecordMetadata

from ._models import ApifyKeyValueStoreMetadata, KeyValueStoreListKeysPage
from ._utils import resolve_alias_to_id, store_alias_mapping
from apify._crypto import create_hmac_signature

if TYPE_CHECKING:
Expand Down Expand Up @@ -58,6 +59,7 @@ async def open(
*,
id: str | None,
name: str | None,
alias: str | None,
configuration: Configuration,
) -> ApifyKeyValueStoreClient:
"""Open an Apify key-value store client.
Expand All @@ -66,22 +68,27 @@ async def open(
It handles authentication, storage lookup/creation, and metadata retrieval.

Args:
id: The ID of an existing key-value store to open. If provided, the client will connect to this specific
storage. Cannot be used together with `name`.
name: The name of a key-value store to get or create. If a storage with this name exists, it will be
opened; otherwise, a new one will be created. Cannot be used together with `id`.
id: The ID of the KVS to open. If provided, searches for existing KVS by ID.
Mutually exclusive with name and alias.
name: The name of the KVS to open (global scope, persists across runs).
Mutually exclusive with id and alias.
alias: The alias of the KVS to open (run scope, creates unnamed storage).
Mutually exclusive with id and name.
configuration: The configuration object containing API credentials and settings. Must include a valid
`token` and `api_base_url`. May also contain a `default_key_value_store_id` for fallback when
neither `id` nor `name` is provided.
neither `id`, `name`, nor `alias` is provided.

Returns:
An instance for the opened or created storage client.

Raises:
ValueError: If the configuration is missing required fields (token, api_base_url), if both `id` and `name`
are provided, or if neither `id` nor `name` is provided and no default storage ID is available
ValueError: If the configuration is missing required fields (token, api_base_url), if more than one of
`id`, `name`, or `alias` is provided, or if none are provided and no default storage ID is available
in the configuration.
"""
if sum(1 for param in [id, name, alias] if param is not None) > 1:
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')

token = configuration.token
if not token:
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
Expand All @@ -107,27 +114,35 @@ async def open(
)
apify_kvss_client = apify_client_async.key_value_stores()

# If both id and name are provided, raise an error.
if id and name:
raise ValueError('Only one of "id" or "name" can be specified, not both.')

# If id is provided, get the storage by ID.
if id and name is None:
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
# Normalize 'default' alias to None
alias = None if alias == 'default' else alias

# Handle alias resolution
if alias:
# Try to resolve alias to existing storage ID
resolved_id = await resolve_alias_to_id(alias, 'kvs', configuration)
if resolved_id:
id = resolved_id
else:
# Create a new storage and store the alias mapping
new_storage_metadata = ApifyKeyValueStoreMetadata.model_validate(
await apify_kvss_client.get_or_create(),
)
id = new_storage_metadata.id
await store_alias_mapping(alias, 'kvs', id, configuration)

# If name is provided, get or create the storage by name.
if name and id is None:
elif name:
id = ApifyKeyValueStoreMetadata.model_validate(
await apify_kvss_client.get_or_create(name=name),
).id
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)

# If both id and name are None, try to get the default storage ID from environment variables.
# The default storage ID environment variable is set by the Apify platform. It also contains
# a new storage ID after Actor's reboot or migration.
if id is None and name is None:
# If none are provided, try to get the default storage ID from environment variables.
elif id is None:
id = configuration.default_key_value_store_id
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)

# Now create the client for the determined ID
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)

# Fetch its metadata.
metadata = await apify_kvs_client.get()
Expand All @@ -142,7 +157,7 @@ async def open(
# Verify that the storage exists by fetching its metadata again.
metadata = await apify_kvs_client.get()
if metadata is None:
raise ValueError(f'Opening key-value store with id={id} and name={name} failed.')
raise ValueError(f'Opening key-value store with id={id}, name={name}, and alias={alias} failed.')

return cls(
api_client=apify_kvs_client,
Expand Down
Loading
Loading