Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions docs/04_upgrading/upgrading_to_v3.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,21 @@ async def main():
storage_client=custom_storage_client,
)
```

## Removed Actor.config property
- `Actor.config` property has been removed. Use `Actor.configuration` instead.

## Default storage ids in configuration changed to None
- `Configuration.default_key_value_store_id` changed from `'default'` to `None`.
- `Configuration.default_dataset_id` changed from `'default'` to `None`.
- `Configuration.default_request_queue_id` changed from `'default'` to `None`.

Previously using the default storage without specifying its `id` in `Configuration` would lead to using specific storage with id `'default'`. Now it will use newly created unnamed storage with `'id'` assigned by the Apify platform, consecutive calls to get the default storage will return the same storage.

## Storages

<!-- TODO -->

## Storage clients

<!-- TODO -->
12 changes: 6 additions & 6 deletions src/apify/_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,37 +142,37 @@ class Configuration(CrawleeConfiguration):
] = None

default_dataset_id: Annotated[
str,
str | None,
Field(
validation_alias=AliasChoices(
'actor_default_dataset_id',
'apify_default_dataset_id',
),
description='Default dataset ID used by the Apify storage client when no ID or name is provided.',
),
] = 'default'
] = None

default_key_value_store_id: Annotated[
str,
str | None,
Field(
validation_alias=AliasChoices(
'actor_default_key_value_store_id',
'apify_default_key_value_store_id',
),
description='Default key-value store ID for the Apify storage client when no ID or name is provided.',
),
] = 'default'
] = None

default_request_queue_id: Annotated[
str,
str | None,
Field(
validation_alias=AliasChoices(
'actor_default_request_queue_id',
'apify_default_request_queue_id',
),
description='Default request queue ID for the Apify storage client when no ID or name is provided.',
),
] = 'default'
] = None

disable_outdated_warning: Annotated[
bool,
Expand Down
11 changes: 9 additions & 2 deletions src/apify/storage_clients/_apify/_dataset_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,10 @@ async def open(
)
apify_datasets_client = apify_client_async.datasets()

# Normalize 'default' alias to None
alias = None if alias == 'default' else alias
# Normalize unnamed default storage in cases where not defined in `configuration.default_dataset_id` to unnamed
# storage aliased as `__default__`
if not any([alias, name, id, configuration.default_dataset_id]):
alias = '__default__'

if alias:
# Check if there is pre-existing alias mapping in the default KVS.
Expand All @@ -150,6 +152,11 @@ async def open(
# If none are provided, try to get the default storage ID from environment variables.
elif id is None:
id = configuration.default_dataset_id
if not id:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But... this shouldn't happen thanks to the normalization step that you added, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will not happen, but mypy is not able to figure it out. Probably the code could be refactored to make it clearer to mypy, but I would leave it after the release, as it will be non-breaking pure refactoring, similar to #595 (comment)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't we just use https://docs.python.org/3/library/typing.html#typing.assert_never? I don't believe that we'll ever come back to change this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is too much runtime checking. The mypy has no clue about max 1 kwargs of three runtime checks and so on. Let's not bend the code too much for the mypy's sake.

Anyway any such refactoring is non-breaking, so no need to rush it in before the release.

raise ValueError(
'Dataset "id", "name", or "alias" must be specified, '
'or a default dataset ID must be set in the configuration.'
)

# Now create the client for the determined ID
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
Expand Down
11 changes: 9 additions & 2 deletions src/apify/storage_clients/_apify/_key_value_store_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,10 @@ async def open(
)
apify_kvss_client = apify_client_async.key_value_stores()

# Normalize 'default' alias to None
alias = None if alias == 'default' else alias
# Normalize unnamed default storage in cases where not defined in `configuration.default_key_value_store_id` to
# unnamed storage aliased as `__default__`
if not any([alias, name, id, configuration.default_key_value_store_id]):
alias = '__default__'

if alias:
# Check if there is pre-existing alias mapping in the default KVS.
Expand All @@ -142,6 +144,11 @@ async def open(
# If none are provided, try to get the default storage ID from environment variables.
elif id is None:
id = configuration.default_key_value_store_id
if not id:
raise ValueError(
'KeyValueStore "id", "name", or "alias" must be specified, '
'or a default KeyValueStore ID must be set in the configuration.'
)

# Now create the client for the determined ID
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
Expand Down
11 changes: 9 additions & 2 deletions src/apify/storage_clients/_apify/_request_queue_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,10 @@ async def open(
)
apify_rqs_client = apify_client_async.request_queues()

# Normalize 'default' alias to None
alias = None if alias == 'default' else alias
# Normalize unnamed default storage in cases where not defined in `configuration.default_request_queue_id` to
# unnamed storage aliased as `__default__`
if not any([alias, name, id, configuration.default_request_queue_id]):
alias = '__default__'

if alias:
# Check if there is pre-existing alias mapping in the default KVS.
Expand All @@ -226,6 +228,11 @@ async def open(
# If none are provided, try to get the default storage ID from environment variables.
elif id is None:
id = configuration.default_request_queue_id
if not id:
raise ValueError(
'RequestQueue "id", "name", or "alias" must be specified, '
'or a default default_request_queue_id ID must be set in the configuration.'
)

# Use suitable client_key to make `hadMultipleClients` response of Apify API useful.
# It should persist across migrated or resurrected Actor runs on the Apify platform.
Expand Down
5 changes: 3 additions & 2 deletions src/apify/storage_clients/_apify/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ async def _get_alias_map(cls) -> dict[str, str]:
Returns:
Map of aliases and storage ids.
"""
if not cls._alias_map:
if not cls._alias_map and Configuration.get_global_configuration().is_at_home:
default_kvs_client = await _get_default_kvs_client()

record = await default_kvs_client.get_record(cls._ALIAS_MAPPING_KEY)
Expand Down Expand Up @@ -156,7 +156,8 @@ async def _get_default_kvs_client() -> KeyValueStoreClientAsync:
min_delay_between_retries_millis=500,
timeout_secs=360,
)

if not configuration.default_key_value_store_id:
raise ValueError("'Configuration.default_key_value_store_id' must be set.")
return apify_client_async.key_value_store(key_value_store_id=configuration.default_key_value_store_id)


Expand Down
23 changes: 23 additions & 0 deletions tests/integration/test_apify_storages.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,26 @@ async def test_alias_concurrent_creation_local(
except AssertionError:
for storage in storages:
await storage.drop()


@pytest.mark.parametrize(
'storage_type',
[Dataset, KeyValueStore, RequestQueue],
)
async def test_unnamed_default_without_config(
storage_type: Dataset | KeyValueStore | RequestQueue, apify_token: str
) -> None:
"""Test that default Apify storage used locally is unnamed storage."""
service_locator.set_configuration(Configuration(token=apify_token))
service_locator.set_storage_client(ApifyStorageClient())

# Open storage and make sure it has no name and it has id
storage = await storage_type.open()
assert storage.name is None
assert storage.id

# Make sure the same instance is returned when opened again without name or alias
storage_again = await storage_type.open()
assert storage is storage_again

await storage.drop()
Loading