Skip to content

Commit c79a3f3

Browse files
committed
Make default Apify storages use alias mechanism
1 parent 4cd6f36 commit c79a3f3

File tree

7 files changed

+60
-14
lines changed

7 files changed

+60
-14
lines changed

docs/04_upgrading/upgrading_to_v3.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,13 @@ async def main():
4545
## Removed Actor.config property
4646
- `Actor.config` property has been removed. Use `Actor.configuration` instead.
4747

48+
## Default storage ids in configuration changed to None
49+
- `Configuration.default_key_value_store_id` changed from `'default'` to `None`.
50+
- `Configuration.default_dataset_id` changed from `'default'` to `None`.
51+
- `Configuration.default_request_queue_id` changed from `'default'` to `None`.
52+
53+
As a consequence of this change, using default storage without specifying its `id` in `Configuration` will use unnamed storage.
54+
4855
## Storages
4956

5057
<!-- TODO -->

src/apify/_configuration.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -142,37 +142,37 @@ class Configuration(CrawleeConfiguration):
142142
] = None
143143

144144
default_dataset_id: Annotated[
145-
str,
145+
str | None,
146146
Field(
147147
validation_alias=AliasChoices(
148148
'actor_default_dataset_id',
149149
'apify_default_dataset_id',
150150
),
151151
description='Default dataset ID used by the Apify storage client when no ID or name is provided.',
152152
),
153-
] = 'default'
153+
] = None
154154

155155
default_key_value_store_id: Annotated[
156-
str,
156+
str | None,
157157
Field(
158158
validation_alias=AliasChoices(
159159
'actor_default_key_value_store_id',
160160
'apify_default_key_value_store_id',
161161
),
162162
description='Default key-value store ID for the Apify storage client when no ID or name is provided.',
163163
),
164-
] = 'default'
164+
] = None
165165

166166
default_request_queue_id: Annotated[
167-
str,
167+
str | None,
168168
Field(
169169
validation_alias=AliasChoices(
170170
'actor_default_request_queue_id',
171171
'apify_default_request_queue_id',
172172
),
173173
description='Default request queue ID for the Apify storage client when no ID or name is provided.',
174174
),
175-
] = 'default'
175+
] = None
176176

177177
disable_outdated_warning: Annotated[
178178
bool,

src/apify/storage_clients/_apify/_dataset_client.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,10 @@ async def open(
124124
)
125125
apify_datasets_client = apify_client_async.datasets()
126126

127-
# Normalize 'default' alias to None
128-
alias = None if alias == 'default' else alias
127+
# Normalize unnamed default storage in cases where not defined in `configuration.default_dataset_id` to unnamed
128+
# storage aliased as `__default__`
129+
if not any([alias, name, id, configuration.default_dataset_id]):
130+
alias = '__default__'
129131

130132
if alias:
131133
# Check if there is pre-existing alias mapping in the default KVS.
@@ -150,6 +152,11 @@ async def open(
150152
# If none are provided, try to get the default storage ID from environment variables.
151153
elif id is None:
152154
id = configuration.default_dataset_id
155+
if not id:
156+
raise ValueError(
157+
'Dataset "id", "name", or "alias" must be specified, '
158+
'or a default dataset ID must be set in the configuration.'
159+
)
153160

154161
# Now create the client for the determined ID
155162
apify_dataset_client = apify_client_async.dataset(dataset_id=id)

src/apify/storage_clients/_apify/_key_value_store_client.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,10 @@ async def open(
115115
)
116116
apify_kvss_client = apify_client_async.key_value_stores()
117117

118-
# Normalize 'default' alias to None
119-
alias = None if alias == 'default' else alias
118+
# Normalize unnamed default storage in cases where not defined in `configuration.default_key_value_store_id` to
119+
# unnamed storage aliased as `__default__`
120+
if not any([alias, name, id, configuration.default_key_value_store_id]):
121+
alias = '__default__'
120122

121123
if alias:
122124
# Check if there is pre-existing alias mapping in the default KVS.
@@ -142,6 +144,11 @@ async def open(
142144
# If none are provided, try to get the default storage ID from environment variables.
143145
elif id is None:
144146
id = configuration.default_key_value_store_id
147+
if not id:
148+
raise ValueError(
149+
'KeyValueStore "id", "name", or "alias" must be specified, '
150+
'or a default KeyValueStore ID must be set in the configuration.'
151+
)
145152

146153
# Now create the client for the determined ID
147154
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)

src/apify/storage_clients/_apify/_request_queue_client.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,10 @@ async def open(
200200
)
201201
apify_rqs_client = apify_client_async.request_queues()
202202

203-
# Normalize 'default' alias to None
204-
alias = None if alias == 'default' else alias
203+
# Normalize unnamed default storage in cases where not defined in `configuration.default_request_queue_id` to
204+
# unnamed storage aliased as `__default__`
205+
if not any([alias, name, id, configuration.default_request_queue_id]):
206+
alias = '__default__'
205207

206208
if alias:
207209
# Check if there is pre-existing alias mapping in the default KVS.
@@ -226,6 +228,11 @@ async def open(
226228
# If none are provided, try to get the default storage ID from environment variables.
227229
elif id is None:
228230
id = configuration.default_request_queue_id
231+
if not id:
232+
raise ValueError(
233+
'RequestQueue "id", "name", or "alias" must be specified, '
234+
'or a default default_request_queue_id ID must be set in the configuration.'
235+
)
229236

230237
# Use suitable client_key to make `hadMultipleClients` response of Apify API useful.
231238
# It should persist across migrated or resurrected Actor runs on the Apify platform.

src/apify/storage_clients/_apify/_utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ async def _get_alias_map(cls) -> dict[str, str]:
7676
Returns:
7777
Map of aliases and storage ids.
7878
"""
79-
if not cls._alias_map:
79+
if not cls._alias_map and Configuration.get_global_configuration().is_at_home:
8080
default_kvs_client = await _get_default_kvs_client()
8181

8282
record = await default_kvs_client.get_record(cls._ALIAS_MAPPING_KEY)
@@ -156,7 +156,8 @@ async def _get_default_kvs_client() -> KeyValueStoreClientAsync:
156156
min_delay_between_retries_millis=500,
157157
timeout_secs=360,
158158
)
159-
159+
if not configuration.default_key_value_store_id:
160+
raise ValueError("'Configuration.default_key_value_store_id' must be set.")
160161
return apify_client_async.key_value_store(key_value_store_id=configuration.default_key_value_store_id)
161162

162163

tests/integration/test_apify_storages.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,20 @@ async def test_alias_concurrent_creation_local(
3232
except AssertionError:
3333
for storage in storages:
3434
await storage.drop()
35+
36+
37+
@pytest.mark.parametrize(
38+
'storage_type',
39+
[Dataset, KeyValueStore, RequestQueue],
40+
)
41+
async def test_unnamed_default_without_config(
42+
storage_type: Dataset | KeyValueStore | RequestQueue, apify_token: str
43+
) -> None:
44+
"""Test that default Apify storage used locally is unnamed storage."""
45+
service_locator.set_configuration(Configuration(token=apify_token))
46+
service_locator.set_storage_client(ApifyStorageClient())
47+
48+
storage = await storage_type.open()
49+
assert storage.name is None
50+
assert storage.id
51+
await storage.drop()

0 commit comments

Comments
 (0)