Skip to content

Commit 8721ef5

Browse files
authored
feat: Add support for NDU storages (#594)
### Description - Implement support for NDUs (non-default unnamed storages) for Apify storage client. ### Issues - Closes: apify/crawlee-python#1175 ### Testing - New integration tests were implemented. ### Checklist - [x] CI passed ### Manual testing Actor ```python import asyncio from apify import Actor async def main() -> None: async with Actor: cnt = await Actor.get_value('cnt', 0) cnt += 1 Actor.log.info('Actor is running for the %d time', cnt) env_dict = Actor.get_env() env_dict = { 'id': env_dict['id'], 'build_id': env_dict['build_id'], 'default_dataset_id': env_dict['default_dataset_id'], 'default_key_value_store_id': env_dict['default_key_value_store_id'], 'default_request_queue_id': env_dict['default_request_queue_id'], } Actor.log.info(f'Environment variables: {env_dict}') dataset_default = await Actor.open_dataset(force_cloud=True) dataset_alias = await Actor.open_dataset(force_cloud=True, alias='my-alias-dataset') dataset_alias_2 = await Actor.open_dataset(force_cloud=True, alias='my-alias-dataset-2') dataset_named = await Actor.open_dataset(force_cloud=True, name='my-named-dataset') Actor.log.info(f'dataset default ID: {dataset_default.id}') Actor.log.info(f'dataset alias ID: {dataset_alias.id}') Actor.log.info(f'dataset alias 2 ID: {dataset_alias_2.id}') Actor.log.info(f'dataset named ID: {dataset_named.id}') await dataset_default.push_data({'data': 'default'}) await dataset_alias.push_data({'data': 'alias'}) await dataset_alias_2.push_data({'data': 'alias 2'}) await dataset_named.push_data({'data': 'named'}) await asyncio.sleep(3) dataset_items_default = await dataset_default.list_items() dataset_items_alias = await dataset_alias.list_items() dataset_items_alias_2 = await dataset_alias_2.list_items() dataset_items_named = await dataset_named.list_items() Actor.log.info(f'Default dataset items: {dataset_items_default}') Actor.log.info(f'Alias dataset items: {dataset_items_alias}') Actor.log.info(f'Alias 2 dataset items: {dataset_items_alias_2}') Actor.log.info(f'Named dataset items: {dataset_items_named}') if cnt < 3: await Actor.set_value('cnt', cnt) await Actor.reboot() Actor.log.info('Actor is finishing...') await asyncio.sleep(3) env_dict = Actor.get_env() env_dict = { 'id': env_dict['id'], 'build_id': env_dict['build_id'], 'default_dataset_id': env_dict['default_dataset_id'], 'default_key_value_store_id': env_dict['default_key_value_store_id'], 'default_request_queue_id': env_dict['default_request_queue_id'], } Actor.log.info(f'Environment variables: {env_dict}') if __name__ == '__main__': asyncio.run(main()) ``` Log: ``` 2025-09-16T08:15:29.454Z ACTOR: Pulling Docker image of build Cs6vcRruiN3XWMBde from registry. 2025-09-16T08:15:31.429Z ACTOR: Creating Docker container. 2025-09-16T08:15:31.614Z ACTOR: Starting Docker container. 2025-09-16T08:15:32.780Z Actor is running on the Apify platform, `disable_browser_sandbox` was changed to True. 2025-09-16T08:15:32.783Z [apify] INFO Initializing Actor... 2025-09-16T08:15:32.788Z [apify] INFO System info ({"apify_sdk_version": "2.7.1", "apify_client_version": "2.1.0", "crawlee_version": "0.6.13b37", "python_version": "3.13.7", "os": "linux"}) 2025-09-16T08:15:32.919Z [apify] INFO Actor is running for the 1 time 2025-09-16T08:15:32.921Z [apify] INFO Environment variables: {'id': 'yFiEdI2cQnAwgWuWL', 'build_id': 'Cs6vcRruiN3XWMBde', 'default_dataset_id': 'dzFyI0aGwQGby34fi', 'default_key_value_store_id': '2IMIBuOc6j7OJnhf0', 'default_request_queue_id': 'e498h6IN2aTatWSoN'} 2025-09-16T08:15:33.509Z [apify] INFO dataset default ID: dzFyI0aGwQGby34fi 2025-09-16T08:15:33.511Z [apify] INFO dataset alias ID: f7fgsLCbw2wsQ46pa 2025-09-16T08:15:33.512Z [apify] INFO dataset alias 2 ID: tee4ve0yVg8VkTf5U 2025-09-16T08:15:33.514Z [apify] INFO dataset named ID: 5derRGi9fgpeknbaH 2025-09-16T08:15:37.086Z [apify] INFO Default dataset items: [{'data': 'default'}] 2025-09-16T08:15:37.087Z [apify] INFO Alias dataset items: [{'data': 'alias'}] 2025-09-16T08:15:37.089Z [apify] INFO Alias 2 dataset items: [{'data': 'alias 2'}] 2025-09-16T08:15:37.091Z [apify] INFO Named dataset items: [{'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, ... [line-too-long] 2025-09-16T08:15:37.190Z ACTOR: Actor run will reboot. 2025-09-16T08:15:37.192Z ACTOR: Sending Docker container SIGTERM signal. 2025-09-16T08:15:37.221Z ACTOR: Run was rebooted. 2025-09-16T08:15:37.222Z ACTOR: Pulling Docker image of build Cs6vcRruiN3XWMBde from registry. 2025-09-16T08:15:37.224Z ACTOR: Creating Docker container. 2025-09-16T08:15:37.368Z ACTOR: Starting Docker container. 2025-09-16T08:15:38.375Z Actor is running on the Apify platform, `disable_browser_sandbox` was changed to True. 2025-09-16T08:15:38.377Z [apify] INFO Initializing Actor... 2025-09-16T08:15:38.380Z [apify] INFO System info ({"apify_sdk_version": "2.7.1", "apify_client_version": "2.1.0", "crawlee_version": "0.6.13b37", "python_version": "3.13.7", "os": "linux"}) 2025-09-16T08:15:38.504Z [apify] INFO Actor is running for the 2 time 2025-09-16T08:15:38.506Z [apify] INFO Environment variables: {'id': 'yFiEdI2cQnAwgWuWL', 'build_id': 'Cs6vcRruiN3XWMBde', 'default_dataset_id': 'dzFyI0aGwQGby34fi', 'default_key_value_store_id': '2IMIBuOc6j7OJnhf0', 'default_request_queue_id': 'e498h6IN2aTatWSoN'} 2025-09-16T08:15:39.152Z [apify] INFO dataset default ID: dzFyI0aGwQGby34fi 2025-09-16T08:15:39.154Z [apify] INFO dataset alias ID: f7fgsLCbw2wsQ46pa 2025-09-16T08:15:39.156Z [apify] INFO dataset alias 2 ID: tee4ve0yVg8VkTf5U 2025-09-16T08:15:39.158Z [apify] INFO dataset named ID: 5derRGi9fgpeknbaH 2025-09-16T08:15:42.680Z [apify] INFO Default dataset items: [{'data': 'default'}, {'data': 'default'}] 2025-09-16T08:15:42.682Z [apify] INFO Alias dataset items: [{'data': 'alias'}, {'data': 'alias'}] 2025-09-16T08:15:42.684Z [apify] INFO Alias 2 dataset items: [{'data': 'alias 2'}, {'data': 'alias 2'}] 2025-09-16T08:15:42.686Z [apify] INFO Named dataset items: [{'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, ... [line-too-long] 2025-09-16T08:15:42.788Z ACTOR: Actor run will reboot. 2025-09-16T08:15:42.790Z ACTOR: Sending Docker container SIGTERM signal. 2025-09-16T08:15:42.811Z ACTOR: Run was rebooted. 2025-09-16T08:15:42.813Z ACTOR: Pulling Docker image of build Cs6vcRruiN3XWMBde from registry. 2025-09-16T08:15:42.815Z ACTOR: Creating Docker container. 2025-09-16T08:15:42.890Z ACTOR: Starting Docker container. 2025-09-16T08:15:44.101Z Actor is running on the Apify platform, `disable_browser_sandbox` was changed to True. 2025-09-16T08:15:44.108Z [apify] INFO Initializing Actor... 2025-09-16T08:15:44.110Z [apify] INFO System info ({"apify_sdk_version": "2.7.1", "apify_client_version": "2.1.0", "crawlee_version": "0.6.13b37", "python_version": "3.13.7", "os": "linux"}) 2025-09-16T08:15:44.212Z [apify] INFO Actor is running for the 3 time 2025-09-16T08:15:44.214Z [apify] INFO Environment variables: {'id': 'yFiEdI2cQnAwgWuWL', 'build_id': 'Cs6vcRruiN3XWMBde', 'default_dataset_id': 'dzFyI0aGwQGby34fi', 'default_key_value_store_id': '2IMIBuOc6j7OJnhf0', 'default_request_queue_id': 'e498h6IN2aTatWSoN'} 2025-09-16T08:15:44.535Z [apify] INFO dataset default ID: dzFyI0aGwQGby34fi 2025-09-16T08:15:44.537Z [apify] INFO dataset alias ID: f7fgsLCbw2wsQ46pa 2025-09-16T08:15:44.539Z [apify] INFO dataset alias 2 ID: tee4ve0yVg8VkTf5U 2025-09-16T08:15:44.541Z [apify] INFO dataset named ID: 5derRGi9fgpeknbaH 2025-09-16T08:15:48.067Z [apify] INFO Default dataset items: [{'data': 'default'}, {'data': 'default'}, {'data': 'default'}] 2025-09-16T08:15:48.069Z [apify] INFO Alias dataset items: [{'data': 'alias'}, {'data': 'alias'}, {'data': 'alias'}] 2025-09-16T08:15:48.071Z [apify] INFO Alias 2 dataset items: [{'data': 'alias 2'}, {'data': 'alias 2'}, {'data': 'alias 2'}] 2025-09-16T08:15:48.073Z [apify] INFO Named dataset items: [{'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, {'data': 'named'}, ... [line-too-long] 2025-09-16T08:15:48.075Z [apify] INFO Actor is finishing... 2025-09-16T08:15:51.068Z [apify] INFO Environment variables: {'id': 'yFiEdI2cQnAwgWuWL', 'build_id': 'Cs6vcRruiN3XWMBde', 'default_dataset_id': 'dzFyI0aGwQGby34fi', 'default_key_value_store_id': '2IMIBuOc6j7OJnhf0', 'default_request_queue_id': 'e498h6IN2aTatWSoN'} 2025-09-16T08:15:51.070Z [apify] INFO Exiting Actor ({"exit_code": 0}) ``` Default KVS content under `__STORAGE_ALIASES_MAPPING` key: ``` { "alias-dataset-my-alias-dataset": "f7fgsLCbw2wsQ46pa", "alias-dataset-my-alias-dataset-2": "tee4ve0yVg8VkTf5U" } ```
1 parent 6bcbd28 commit 8721ef5

File tree

15 files changed

+577
-132
lines changed

15 files changed

+577
-132
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ keywords = [
3636
dependencies = [
3737
"apify-client>=2.0.0,<3.0.0",
3838
"apify-shared>=2.0.0,<3.0.0",
39-
"crawlee==1.0.0rc1",
39+
"crawlee==0.6.13b37",
4040
"cachetools>=5.5.0",
4141
"cryptography>=42.0.0",
4242
"impit>=0.5.3",

src/apify/_actor.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,7 @@ async def open_dataset(
401401
self,
402402
*,
403403
id: str | None = None,
404+
alias: str | None = None,
404405
name: str | None = None,
405406
force_cloud: bool = False,
406407
) -> Dataset:
@@ -411,10 +412,12 @@ async def open_dataset(
411412
the Apify cloud.
412413
413414
Args:
414-
id: ID of the dataset to be opened. If neither `id` nor `name` are provided, the method returns
415-
the default dataset associated with the Actor run.
416-
name: Name of the dataset to be opened. If neither `id` nor `name` are provided, the method returns
417-
the default dataset associated with the Actor run.
415+
id: The ID of the dataset to open. If provided, searches for existing dataset by ID.
416+
Mutually exclusive with name and alias.
417+
name: The name of the dataset to open (global scope, persists across runs).
418+
Mutually exclusive with id and alias.
419+
alias: The alias of the dataset to open (run scope, creates unnamed storage).
420+
Mutually exclusive with id and name.
418421
force_cloud: If set to `True` then the Apify cloud storage is always used. This way it is possible
419422
to combine local and cloud storage.
420423
@@ -428,6 +431,7 @@ async def open_dataset(
428431

429432
return await Dataset.open(
430433
id=id,
434+
alias=alias,
431435
name=name,
432436
configuration=self._configuration,
433437
storage_client=storage_client,
@@ -437,6 +441,7 @@ async def open_key_value_store(
437441
self,
438442
*,
439443
id: str | None = None,
444+
alias: str | None = None,
440445
name: str | None = None,
441446
force_cloud: bool = False,
442447
) -> KeyValueStore:
@@ -446,10 +451,12 @@ async def open_key_value_store(
446451
and retrieved using a unique key. The actual data is stored either on a local filesystem or in the Apify cloud.
447452
448453
Args:
449-
id: ID of the key-value store to be opened. If neither `id` nor `name` are provided, the method returns
450-
the default key-value store associated with the Actor run.
451-
name: Name of the key-value store to be opened. If neither `id` nor `name` are provided, the method
452-
returns the default key-value store associated with the Actor run.
454+
id: The ID of the KVS to open. If provided, searches for existing KVS by ID.
455+
Mutually exclusive with name and alias.
456+
name: The name of the KVS to open (global scope, persists across runs).
457+
Mutually exclusive with id and alias.
458+
alias: The alias of the KVS to open (run scope, creates unnamed storage).
459+
Mutually exclusive with id and name.
453460
force_cloud: If set to `True` then the Apify cloud storage is always used. This way it is possible
454461
to combine local and cloud storage.
455462
@@ -462,6 +469,7 @@ async def open_key_value_store(
462469

463470
return await KeyValueStore.open(
464471
id=id,
472+
alias=alias,
465473
name=name,
466474
configuration=self._configuration,
467475
storage_client=storage_client,
@@ -471,6 +479,7 @@ async def open_request_queue(
471479
self,
472480
*,
473481
id: str | None = None,
482+
alias: str | None = None,
474483
name: str | None = None,
475484
force_cloud: bool = False,
476485
) -> RequestQueue:
@@ -482,10 +491,12 @@ async def open_request_queue(
482491
crawling orders.
483492
484493
Args:
485-
id: ID of the request queue to be opened. If neither `id` nor `name` are provided, the method returns
486-
the default request queue associated with the Actor run.
487-
name: Name of the request queue to be opened. If neither `id` nor `name` are provided, the method returns
488-
the default request queue associated with the Actor run.
494+
id: The ID of the RQ to open. If provided, searches for existing RQ by ID.
495+
Mutually exclusive with name and alias.
496+
name: The name of the RQ to open (global scope, persists across runs).
497+
Mutually exclusive with id and alias.
498+
alias: The alias of the RQ to open (run scope, creates unnamed storage).
499+
Mutually exclusive with id and name.
489500
force_cloud: If set to `True` then the Apify cloud storage is always used. This way it is possible
490501
to combine local and cloud storage.
491502
@@ -499,6 +510,7 @@ async def open_request_queue(
499510

500511
return await RequestQueue.open(
501512
id=id,
513+
alias=alias,
502514
name=name,
503515
configuration=self._configuration,
504516
storage_client=storage_client,

src/apify/events/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
from crawlee.events import EventManager, LocalEventManager
1+
from crawlee.events import Event, EventManager, LocalEventManager
22

33
from ._apify_event_manager import ApifyEventManager
44

5-
__all__ = ['ApifyEventManager', 'EventManager', 'LocalEventManager']
5+
__all__ = ['ApifyEventManager', 'Event', 'EventManager', 'LocalEventManager']

src/apify/storage_clients/_apify/_dataset_client.py

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
from crawlee.storage_clients._base import DatasetClient
1313
from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
1414

15+
from ._utils import resolve_alias_to_id, store_alias_mapping
16+
1517
if TYPE_CHECKING:
1618
from collections.abc import AsyncIterator
1719

@@ -66,6 +68,7 @@ async def open(
6668
*,
6769
id: str | None,
6870
name: str | None,
71+
alias: str | None,
6972
configuration: Configuration,
7073
) -> ApifyDatasetClient:
7174
"""Open an Apify dataset client.
@@ -74,22 +77,27 @@ async def open(
7477
It handles authentication, storage lookup/creation, and metadata retrieval.
7578
7679
Args:
77-
id: The ID of an existing dataset to open. If provided, the client will connect to this specific storage.
78-
Cannot be used together with `name`.
79-
name: The name of a dataset to get or create. If a storage with this name exists, it will be opened;
80-
otherwise, a new one will be created. Cannot be used together with `id`.
80+
id: The ID of the dataset to open. If provided, searches for existing dataset by ID.
81+
Mutually exclusive with name and alias.
82+
name: The name of the dataset to open (global scope, persists across runs).
83+
Mutually exclusive with id and alias.
84+
alias: The alias of the dataset to open (run scope, creates unnamed storage).
85+
Mutually exclusive with id and name.
8186
configuration: The configuration object containing API credentials and settings. Must include a valid
8287
`token` and `api_base_url`. May also contain a `default_dataset_id` for fallback when neither
83-
`id` nor `name` is provided.
88+
`id`, `name`, nor `alias` is provided.
8489
8590
Returns:
8691
An instance for the opened or created storage client.
8792
8893
Raises:
89-
ValueError: If the configuration is missing required fields (token, api_base_url), if both `id` and `name`
90-
are provided, or if neither `id` nor `name` is provided and no default storage ID is available in
91-
the configuration.
94+
ValueError: If the configuration is missing required fields (token, api_base_url), if more than one of
95+
`id`, `name`, or `alias` is provided, or if none are provided and no default storage ID is available
96+
in the configuration.
9297
"""
98+
if sum(1 for param in [id, name, alias] if param is not None) > 1:
99+
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
100+
93101
token = configuration.token
94102
if not token:
95103
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
@@ -115,27 +123,35 @@ async def open(
115123
)
116124
apify_datasets_client = apify_client_async.datasets()
117125

118-
# If both id and name are provided, raise an error.
119-
if id and name:
120-
raise ValueError('Only one of "id" or "name" can be specified, not both.')
126+
# Normalize 'default' alias to None
127+
alias = None if alias == 'default' else alias
121128

122-
# If id is provided, get the storage by ID.
123-
if id and name is None:
124-
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
129+
# Handle alias resolution
130+
if alias:
131+
# Try to resolve alias to existing storage ID
132+
resolved_id = await resolve_alias_to_id(alias, 'dataset', configuration)
133+
if resolved_id:
134+
id = resolved_id
135+
else:
136+
# Create a new storage and store the alias mapping
137+
new_storage_metadata = DatasetMetadata.model_validate(
138+
await apify_datasets_client.get_or_create(),
139+
)
140+
id = new_storage_metadata.id
141+
await store_alias_mapping(alias, 'dataset', id, configuration)
125142

126143
# If name is provided, get or create the storage by name.
127-
if name and id is None:
144+
elif name:
128145
id = DatasetMetadata.model_validate(
129146
await apify_datasets_client.get_or_create(name=name),
130147
).id
131-
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
132148

133-
# If both id and name are None, try to get the default storage ID from environment variables.
134-
# The default storage ID environment variable is set by the Apify platform. It also contains
135-
# a new storage ID after Actor's reboot or migration.
136-
if id is None and name is None:
149+
# If none are provided, try to get the default storage ID from environment variables.
150+
elif id is None:
137151
id = configuration.default_dataset_id
138-
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
152+
153+
# Now create the client for the determined ID
154+
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
139155

140156
# Fetch its metadata.
141157
metadata = await apify_dataset_client.get()
@@ -150,7 +166,7 @@ async def open(
150166
# Verify that the storage exists by fetching its metadata again.
151167
metadata = await apify_dataset_client.get()
152168
if metadata is None:
153-
raise ValueError(f'Opening dataset with id={id} and name={name} failed.')
169+
raise ValueError(f'Opening dataset with id={id}, name={name}, and alias={alias} failed.')
154170

155171
return cls(
156172
api_client=apify_dataset_client,

src/apify/storage_clients/_apify/_key_value_store_client.py

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from crawlee.storage_clients.models import KeyValueStoreRecord, KeyValueStoreRecordMetadata
1313

1414
from ._models import ApifyKeyValueStoreMetadata, KeyValueStoreListKeysPage
15+
from ._utils import resolve_alias_to_id, store_alias_mapping
1516
from apify._crypto import create_hmac_signature
1617

1718
if TYPE_CHECKING:
@@ -58,6 +59,7 @@ async def open(
5859
*,
5960
id: str | None,
6061
name: str | None,
62+
alias: str | None,
6163
configuration: Configuration,
6264
) -> ApifyKeyValueStoreClient:
6365
"""Open an Apify key-value store client.
@@ -66,22 +68,27 @@ async def open(
6668
It handles authentication, storage lookup/creation, and metadata retrieval.
6769
6870
Args:
69-
id: The ID of an existing key-value store to open. If provided, the client will connect to this specific
70-
storage. Cannot be used together with `name`.
71-
name: The name of a key-value store to get or create. If a storage with this name exists, it will be
72-
opened; otherwise, a new one will be created. Cannot be used together with `id`.
71+
id: The ID of the KVS to open. If provided, searches for existing KVS by ID.
72+
Mutually exclusive with name and alias.
73+
name: The name of the KVS to open (global scope, persists across runs).
74+
Mutually exclusive with id and alias.
75+
alias: The alias of the KVS to open (run scope, creates unnamed storage).
76+
Mutually exclusive with id and name.
7377
configuration: The configuration object containing API credentials and settings. Must include a valid
7478
`token` and `api_base_url`. May also contain a `default_key_value_store_id` for fallback when
75-
neither `id` nor `name` is provided.
79+
neither `id`, `name`, nor `alias` is provided.
7680
7781
Returns:
7882
An instance for the opened or created storage client.
7983
8084
Raises:
81-
ValueError: If the configuration is missing required fields (token, api_base_url), if both `id` and `name`
82-
are provided, or if neither `id` nor `name` is provided and no default storage ID is available
85+
ValueError: If the configuration is missing required fields (token, api_base_url), if more than one of
86+
`id`, `name`, or `alias` is provided, or if none are provided and no default storage ID is available
8387
in the configuration.
8488
"""
89+
if sum(1 for param in [id, name, alias] if param is not None) > 1:
90+
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
91+
8592
token = configuration.token
8693
if not token:
8794
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
@@ -107,27 +114,35 @@ async def open(
107114
)
108115
apify_kvss_client = apify_client_async.key_value_stores()
109116

110-
# If both id and name are provided, raise an error.
111-
if id and name:
112-
raise ValueError('Only one of "id" or "name" can be specified, not both.')
113-
114-
# If id is provided, get the storage by ID.
115-
if id and name is None:
116-
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
117+
# Normalize 'default' alias to None
118+
alias = None if alias == 'default' else alias
119+
120+
# Handle alias resolution
121+
if alias:
122+
# Try to resolve alias to existing storage ID
123+
resolved_id = await resolve_alias_to_id(alias, 'kvs', configuration)
124+
if resolved_id:
125+
id = resolved_id
126+
else:
127+
# Create a new storage and store the alias mapping
128+
new_storage_metadata = ApifyKeyValueStoreMetadata.model_validate(
129+
await apify_kvss_client.get_or_create(),
130+
)
131+
id = new_storage_metadata.id
132+
await store_alias_mapping(alias, 'kvs', id, configuration)
117133

118134
# If name is provided, get or create the storage by name.
119-
if name and id is None:
135+
elif name:
120136
id = ApifyKeyValueStoreMetadata.model_validate(
121137
await apify_kvss_client.get_or_create(name=name),
122138
).id
123-
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
124139

125-
# If both id and name are None, try to get the default storage ID from environment variables.
126-
# The default storage ID environment variable is set by the Apify platform. It also contains
127-
# a new storage ID after Actor's reboot or migration.
128-
if id is None and name is None:
140+
# If none are provided, try to get the default storage ID from environment variables.
141+
elif id is None:
129142
id = configuration.default_key_value_store_id
130-
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
143+
144+
# Now create the client for the determined ID
145+
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
131146

132147
# Fetch its metadata.
133148
metadata = await apify_kvs_client.get()
@@ -142,7 +157,7 @@ async def open(
142157
# Verify that the storage exists by fetching its metadata again.
143158
metadata = await apify_kvs_client.get()
144159
if metadata is None:
145-
raise ValueError(f'Opening key-value store with id={id} and name={name} failed.')
160+
raise ValueError(f'Opening key-value store with id={id}, name={name}, and alias={alias} failed.')
146161

147162
return cls(
148163
api_client=apify_kvs_client,

0 commit comments

Comments
 (0)