Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ keywords = [
dependencies = [
"apify-client>=2.0.0,<3.0.0",
"apify-shared>=2.0.0,<3.0.0",
"crawlee==1.0.0rc1",
"crawlee==0.6.13b37",
"cachetools>=5.5.0",
"cryptography>=42.0.0",
"impit>=0.5.3",
Expand Down
36 changes: 24 additions & 12 deletions src/apify/_actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,7 @@ async def open_dataset(
self,
*,
id: str | None = None,
alias: str | None = None,
name: str | None = None,
force_cloud: bool = False,
) -> Dataset:
Expand All @@ -411,10 +412,12 @@ async def open_dataset(
the Apify cloud.

Args:
id: ID of the dataset to be opened. If neither `id` nor `name` are provided, the method returns
the default dataset associated with the Actor run.
name: Name of the dataset to be opened. If neither `id` nor `name` are provided, the method returns
the default dataset associated with the Actor run.
id: The ID of the dataset to open. If provided, searches for existing dataset by ID.
Mutually exclusive with name and alias.
name: The name of the dataset to open (global scope, persists across runs).
Mutually exclusive with id and alias.
alias: The alias of the dataset to open (run scope, creates unnamed storage).
Mutually exclusive with id and name.
force_cloud: If set to `True` then the Apify cloud storage is always used. This way it is possible
to combine local and cloud storage.

Expand All @@ -428,6 +431,7 @@ async def open_dataset(

return await Dataset.open(
id=id,
alias=alias,
name=name,
configuration=self._configuration,
storage_client=storage_client,
Expand All @@ -437,6 +441,7 @@ async def open_key_value_store(
self,
*,
id: str | None = None,
alias: str | None = None,
name: str | None = None,
force_cloud: bool = False,
) -> KeyValueStore:
Expand All @@ -446,10 +451,12 @@ async def open_key_value_store(
and retrieved using a unique key. The actual data is stored either on a local filesystem or in the Apify cloud.

Args:
id: ID of the key-value store to be opened. If neither `id` nor `name` are provided, the method returns
the default key-value store associated with the Actor run.
name: Name of the key-value store to be opened. If neither `id` nor `name` are provided, the method
returns the default key-value store associated with the Actor run.
id: The ID of the KVS to open. If provided, searches for existing KVS by ID.
Mutually exclusive with name and alias.
name: The name of the KVS to open (global scope, persists across runs).
Mutually exclusive with id and alias.
alias: The alias of the KVS to open (run scope, creates unnamed storage).
Mutually exclusive with id and name.
force_cloud: If set to `True` then the Apify cloud storage is always used. This way it is possible
to combine local and cloud storage.

Expand All @@ -462,6 +469,7 @@ async def open_key_value_store(

return await KeyValueStore.open(
id=id,
alias=alias,
name=name,
configuration=self._configuration,
storage_client=storage_client,
Expand All @@ -471,6 +479,7 @@ async def open_request_queue(
self,
*,
id: str | None = None,
alias: str | None = None,
name: str | None = None,
force_cloud: bool = False,
) -> RequestQueue:
Expand All @@ -482,10 +491,12 @@ async def open_request_queue(
crawling orders.

Args:
id: ID of the request queue to be opened. If neither `id` nor `name` are provided, the method returns
the default request queue associated with the Actor run.
name: Name of the request queue to be opened. If neither `id` nor `name` are provided, the method returns
the default request queue associated with the Actor run.
id: The ID of the RQ to open. If provided, searches for existing RQ by ID.
Mutually exclusive with name and alias.
name: The name of the RQ to open (global scope, persists across runs).
Mutually exclusive with id and alias.
alias: The alias of the RQ to open (run scope, creates unnamed storage).
Mutually exclusive with id and name.
force_cloud: If set to `True` then the Apify cloud storage is always used. This way it is possible
to combine local and cloud storage.

Expand All @@ -499,6 +510,7 @@ async def open_request_queue(

return await RequestQueue.open(
id=id,
alias=alias,
name=name,
configuration=self._configuration,
storage_client=storage_client,
Expand Down
4 changes: 2 additions & 2 deletions src/apify/events/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from crawlee.events import EventManager, LocalEventManager
from crawlee.events import Event, EventManager, LocalEventManager

from ._apify_event_manager import ApifyEventManager

__all__ = ['ApifyEventManager', 'EventManager', 'LocalEventManager']
__all__ = ['ApifyEventManager', 'Event', 'EventManager', 'LocalEventManager']
60 changes: 38 additions & 22 deletions src/apify/storage_clients/_apify/_dataset_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from crawlee.storage_clients._base import DatasetClient
from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata

from ._utils import resolve_alias_to_id, store_alias_mapping

if TYPE_CHECKING:
from collections.abc import AsyncIterator

Expand Down Expand Up @@ -66,6 +68,7 @@ async def open(
*,
id: str | None,
name: str | None,
alias: str | None,
configuration: Configuration,
) -> ApifyDatasetClient:
"""Open an Apify dataset client.
Expand All @@ -74,22 +77,27 @@ async def open(
It handles authentication, storage lookup/creation, and metadata retrieval.

Args:
id: The ID of an existing dataset to open. If provided, the client will connect to this specific storage.
Cannot be used together with `name`.
name: The name of a dataset to get or create. If a storage with this name exists, it will be opened;
otherwise, a new one will be created. Cannot be used together with `id`.
id: The ID of the dataset to open. If provided, searches for existing dataset by ID.
Mutually exclusive with name and alias.
name: The name of the dataset to open (global scope, persists across runs).
Mutually exclusive with id and alias.
alias: The alias of the dataset to open (run scope, creates unnamed storage).
Mutually exclusive with id and name.
configuration: The configuration object containing API credentials and settings. Must include a valid
`token` and `api_base_url`. May also contain a `default_dataset_id` for fallback when neither
`id` nor `name` is provided.
`id`, `name`, nor `alias` is provided.

Returns:
An instance for the opened or created storage client.

Raises:
ValueError: If the configuration is missing required fields (token, api_base_url), if both `id` and `name`
are provided, or if neither `id` nor `name` is provided and no default storage ID is available in
the configuration.
ValueError: If the configuration is missing required fields (token, api_base_url), if more than one of
`id`, `name`, or `alias` is provided, or if none are provided and no default storage ID is available
in the configuration.
"""
if sum(1 for param in [id, name, alias] if param is not None) > 1:
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')

token = configuration.token
if not token:
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
Expand All @@ -115,27 +123,35 @@ async def open(
)
apify_datasets_client = apify_client_async.datasets()

# If both id and name are provided, raise an error.
if id and name:
raise ValueError('Only one of "id" or "name" can be specified, not both.')
# Normalize 'default' alias to None
alias = None if alias == 'default' else alias

# If id is provided, get the storage by ID.
if id and name is None:
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
# Handle alias resolution
if alias:
# Try to resolve alias to existing storage ID
resolved_id = await resolve_alias_to_id(alias, 'dataset', configuration)
if resolved_id:
id = resolved_id
else:
# Create a new storage and store the alias mapping
new_storage_metadata = DatasetMetadata.model_validate(
await apify_datasets_client.get_or_create(),
)
id = new_storage_metadata.id
await store_alias_mapping(alias, 'dataset', id, configuration)

# If name is provided, get or create the storage by name.
if name and id is None:
elif name:
id = DatasetMetadata.model_validate(
await apify_datasets_client.get_or_create(name=name),
).id
apify_dataset_client = apify_client_async.dataset(dataset_id=id)

# If both id and name are None, try to get the default storage ID from environment variables.
# The default storage ID environment variable is set by the Apify platform. It also contains
# a new storage ID after Actor's reboot or migration.
if id is None and name is None:
# If none are provided, try to get the default storage ID from environment variables.
elif id is None:
id = configuration.default_dataset_id
apify_dataset_client = apify_client_async.dataset(dataset_id=id)

# Now create the client for the determined ID
apify_dataset_client = apify_client_async.dataset(dataset_id=id)

# Fetch its metadata.
metadata = await apify_dataset_client.get()
Expand All @@ -150,7 +166,7 @@ async def open(
# Verify that the storage exists by fetching its metadata again.
metadata = await apify_dataset_client.get()
if metadata is None:
raise ValueError(f'Opening dataset with id={id} and name={name} failed.')
raise ValueError(f'Opening dataset with id={id}, name={name}, and alias={alias} failed.')

return cls(
api_client=apify_dataset_client,
Expand Down
59 changes: 37 additions & 22 deletions src/apify/storage_clients/_apify/_key_value_store_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from crawlee.storage_clients.models import KeyValueStoreRecord, KeyValueStoreRecordMetadata

from ._models import ApifyKeyValueStoreMetadata, KeyValueStoreListKeysPage
from ._utils import resolve_alias_to_id, store_alias_mapping
from apify._crypto import create_hmac_signature

if TYPE_CHECKING:
Expand Down Expand Up @@ -58,6 +59,7 @@ async def open(
*,
id: str | None,
name: str | None,
alias: str | None,
configuration: Configuration,
) -> ApifyKeyValueStoreClient:
"""Open an Apify key-value store client.
Expand All @@ -66,22 +68,27 @@ async def open(
It handles authentication, storage lookup/creation, and metadata retrieval.

Args:
id: The ID of an existing key-value store to open. If provided, the client will connect to this specific
storage. Cannot be used together with `name`.
name: The name of a key-value store to get or create. If a storage with this name exists, it will be
opened; otherwise, a new one will be created. Cannot be used together with `id`.
id: The ID of the KVS to open. If provided, searches for existing KVS by ID.
Mutually exclusive with name and alias.
name: The name of the KVS to open (global scope, persists across runs).
Mutually exclusive with id and alias.
alias: The alias of the KVS to open (run scope, creates unnamed storage).
Mutually exclusive with id and name.
configuration: The configuration object containing API credentials and settings. Must include a valid
`token` and `api_base_url`. May also contain a `default_key_value_store_id` for fallback when
neither `id` nor `name` is provided.
neither `id`, `name`, nor `alias` is provided.

Returns:
An instance for the opened or created storage client.

Raises:
ValueError: If the configuration is missing required fields (token, api_base_url), if both `id` and `name`
are provided, or if neither `id` nor `name` is provided and no default storage ID is available
ValueError: If the configuration is missing required fields (token, api_base_url), if more than one of
`id`, `name`, or `alias` is provided, or if none are provided and no default storage ID is available
in the configuration.
"""
if sum(1 for param in [id, name, alias] if param is not None) > 1:
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')

token = configuration.token
if not token:
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
Expand All @@ -107,27 +114,35 @@ async def open(
)
apify_kvss_client = apify_client_async.key_value_stores()

# If both id and name are provided, raise an error.
if id and name:
raise ValueError('Only one of "id" or "name" can be specified, not both.')

# If id is provided, get the storage by ID.
if id and name is None:
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
# Normalize 'default' alias to None
alias = None if alias == 'default' else alias

# Handle alias resolution
if alias:
# Try to resolve alias to existing storage ID
resolved_id = await resolve_alias_to_id(alias, 'kvs', configuration)
if resolved_id:
id = resolved_id
else:
# Create a new storage and store the alias mapping
new_storage_metadata = ApifyKeyValueStoreMetadata.model_validate(
await apify_kvss_client.get_or_create(),
)
id = new_storage_metadata.id
await store_alias_mapping(alias, 'kvs', id, configuration)

# If name is provided, get or create the storage by name.
if name and id is None:
elif name:
id = ApifyKeyValueStoreMetadata.model_validate(
await apify_kvss_client.get_or_create(name=name),
).id
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)

# If both id and name are None, try to get the default storage ID from environment variables.
# The default storage ID environment variable is set by the Apify platform. It also contains
# a new storage ID after Actor's reboot or migration.
if id is None and name is None:
# If none are provided, try to get the default storage ID from environment variables.
elif id is None:
id = configuration.default_key_value_store_id
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)

# Now create the client for the determined ID
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)

# Fetch its metadata.
metadata = await apify_kvs_client.get()
Expand All @@ -142,7 +157,7 @@ async def open(
# Verify that the storage exists by fetching its metadata again.
metadata = await apify_kvs_client.get()
if metadata is None:
raise ValueError(f'Opening key-value store with id={id} and name={name} failed.')
raise ValueError(f'Opening key-value store with id={id}, name={name}, and alias={alias} failed.')

return cls(
api_client=apify_kvs_client,
Expand Down
Loading