11from __future__ import annotations
22
3- import logging
4- from asyncio import Lock
53from typing import TYPE_CHECKING
64
75from typing_extensions import override
86
9- from crawlee import service_locator
107from crawlee .storage_clients ._base import StorageClient
118
129from ._dataset_client import ApifyDatasetClient
1310from ._key_value_store_client import ApifyKeyValueStoreClient
1411from ._request_queue_client import ApifyRequestQueueClient
15- from ._utils import _ALIAS_MAPPING_KEY , _Alias
16- from apify ._configuration import Configuration
12+ from ._utils import Alias
1713from apify ._configuration import Configuration as ApifyConfiguration
1814from apify ._utils import docs_group
1915
2723class ApifyStorageClient (StorageClient ):
2824 """Apify storage client."""
2925
30- _alias_storages_initialized = False
31- """Flag that indicates whether the pre-existing alias storages were already initialized."""
32- _alias_init_lock : Lock | None = None
33- """Lock for creating alias storages. Only one alias storage can be created at the time."""
34-
3526 # This class breaches Liskov Substitution Principle. It requires specialized Configuration compared to its parent.
3627 _lsp_violation_error_message_template = (
3728 'Expected "configuration" to be an instance of "apify.Configuration", but got {} instead.'
@@ -42,7 +33,7 @@ def get_additional_cache_key(self, configuration: CrawleeConfiguration) -> Hasha
4233 if isinstance (configuration , ApifyConfiguration ):
4334 if configuration .api_base_url is None or configuration .token is None :
4435 raise ValueError ("'Configuration.api_base_url' and 'Configuration.token' must be set." )
45- return _Alias .get_additional_cache_key (configuration . api_base_url , configuration . token )
36+ return Alias .get_additional_cache_key (configuration )
4637 raise TypeError (self ._lsp_violation_error_message_template .format (type (configuration ).__name__ ))
4738
4839 @override
@@ -56,10 +47,6 @@ async def create_dataset_client(
5647 ) -> ApifyDatasetClient :
5748 configuration = configuration or ApifyConfiguration .get_global_configuration ()
5849 if isinstance (configuration , ApifyConfiguration ):
59- if alias :
60- await self ._initialize_alias_storages ()
61- async with self .get_alias_init_lock ():
62- return await ApifyDatasetClient .open (id = id , name = name , alias = alias , configuration = configuration )
6350 return await ApifyDatasetClient .open (id = id , name = name , alias = alias , configuration = configuration )
6451
6552 raise TypeError (self ._lsp_violation_error_message_template .format (type (configuration ).__name__ ))
@@ -75,12 +62,6 @@ async def create_kvs_client(
7562 ) -> ApifyKeyValueStoreClient :
7663 configuration = configuration or ApifyConfiguration .get_global_configuration ()
7764 if isinstance (configuration , ApifyConfiguration ):
78- if alias :
79- await self ._initialize_alias_storages ()
80- async with self .get_alias_init_lock ():
81- return await ApifyKeyValueStoreClient .open (
82- id = id , name = name , alias = alias , configuration = configuration
83- )
8465 return await ApifyKeyValueStoreClient .open (id = id , name = name , alias = alias , configuration = configuration )
8566
8667 raise TypeError (self ._lsp_violation_error_message_template .format (type (configuration ).__name__ ))
@@ -96,66 +77,6 @@ async def create_rq_client(
9677 ) -> ApifyRequestQueueClient :
9778 configuration = configuration or ApifyConfiguration .get_global_configuration ()
9879 if isinstance (configuration , ApifyConfiguration ):
99- if alias :
100- await self ._initialize_alias_storages ()
101- async with self .get_alias_init_lock ():
102- return await ApifyRequestQueueClient .open (
103- id = id , name = name , alias = alias , configuration = configuration
104- )
10580 return await ApifyRequestQueueClient .open (id = id , name = name , alias = alias , configuration = configuration )
10681
10782 raise TypeError (self ._lsp_violation_error_message_template .format (type (configuration ).__name__ ))
108-
109- @classmethod
110- def get_alias_init_lock (cls ) -> Lock :
111- if not cls ._alias_init_lock :
112- cls ._alias_init_lock = Lock ()
113- return cls ._alias_init_lock
114-
115- @classmethod
116- async def _initialize_alias_storages (cls ) -> None :
117- """Initialize alias storages.
118-
119- This method is called once to populate storage_instance_manager alias related cache. All existing alias
120- storages are saved in storage_instance_manager cache. If the alias storage is not there, it does not exist yet.
121- """
122- if not Configuration .get_global_configuration ().is_at_home :
123- logging .getLogger (__name__ ).warning (
124- 'Alias storage limited retention is only supported on Apify platform. '
125- 'No pre-existing storages are imported.'
126- )
127- cls ._alias_storages_initialized = True
128- return
129-
130- async with cls .get_alias_init_lock ():
131- if cls ._alias_storages_initialized :
132- return
133-
134- cache = service_locator .storage_instance_manager ._cache_by_storage_client [ApifyStorageClient ] # noqa: SLF001
135-
136- default_kvs_client = await _Alias .get_default_kvs_client ()
137-
138- record = await default_kvs_client .get_record (key = _ALIAS_MAPPING_KEY )
139-
140- if record is not None and 'value' in record :
141- # get_record can return {key: ..., value: ..., content_type: ...}
142- alias_export_map = record ['value' ]
143-
144- for export_key , storage_id in alias_export_map .value .items ():
145- exported_alias = _Alias .from_exported_string (export_key )
146-
147- # Re-create custom config used to open the storage
148- custom_config = Configuration ()
149- custom_config .api_base_url = exported_alias .api_url
150- custom_config .token = exported_alias .token
151-
152- # Populate the id cache by opening storage by id
153- storage = await exported_alias .storage_type .open (
154- id = storage_id , configuration = custom_config , storage_client = ApifyStorageClient ()
155- )
156- # Populate the alias cache as well
157- cache .by_alias [exported_alias .storage_type ][exported_alias .alias ][
158- exported_alias .additional_cache_key
159- ] = storage
160-
161- cls ._alias_storages_initialized = True
0 commit comments