1
1
from __future__ import annotations
2
2
3
+ from asyncio import Lock
3
4
from typing import TYPE_CHECKING
4
5
5
6
from typing_extensions import override
6
7
8
+ from crawlee import service_locator
7
9
from crawlee .storage_clients ._base import StorageClient
8
10
9
11
from ._dataset_client import ApifyDatasetClient
10
12
from ._key_value_store_client import ApifyKeyValueStoreClient
11
13
from ._request_queue_client import ApifyRequestQueueClient
14
+ from ._utils import _ALIAS_MAPPING_KEY , _Alias
15
+ from apify ._configuration import Configuration
12
16
from apify ._configuration import Configuration as ApifyConfiguration
13
17
from apify ._utils import docs_group
14
18
22
26
class ApifyStorageClient (StorageClient ):
23
27
"""Apify storage client."""
24
28
29
+ _alias_storages_initialized = False
30
+ """Flag that indicates whether the pre-existing alias storages were already initialized."""
31
+ _alias_init_lock : Lock | None = None
32
+ """Lock for creating alias storages. Only one alias storage can be created at the time."""
33
+
25
34
# This class breaches Liskov Substitution Principle. It requires specialized Configuration compared to its parent.
26
35
_lsp_violation_error_message_template = (
27
36
'Expected "configuration" to be an instance of "apify.Configuration", but got {} instead.'
@@ -30,7 +39,9 @@ class ApifyStorageClient(StorageClient):
30
39
@override
31
40
def get_additional_cache_key (self , configuration : CrawleeConfiguration ) -> Hashable :
32
41
if isinstance (configuration , ApifyConfiguration ):
33
- return f'{ configuration .api_base_url } ,{ configuration .token } '
42
+ if configuration .api_base_url is None or configuration .token is None :
43
+ raise ValueError ("'Configuration.api_base_url' and 'Configuration.token' must be set." )
44
+ return _Alias .get_additional_cache_key (configuration .api_base_url , configuration .token )
34
45
raise TypeError (self ._lsp_violation_error_message_template .format (type (configuration ).__name__ ))
35
46
36
47
@override
@@ -44,6 +55,10 @@ async def create_dataset_client(
44
55
) -> ApifyDatasetClient :
45
56
configuration = configuration or ApifyConfiguration .get_global_configuration ()
46
57
if isinstance (configuration , ApifyConfiguration ):
58
+ if alias :
59
+ await self ._initialize_alias_storages ()
60
+ async with self .get_alias_init_lock ():
61
+ return await ApifyDatasetClient .open (id = id , name = name , alias = alias , configuration = configuration )
47
62
return await ApifyDatasetClient .open (id = id , name = name , alias = alias , configuration = configuration )
48
63
49
64
raise TypeError (self ._lsp_violation_error_message_template .format (type (configuration ).__name__ ))
@@ -59,6 +74,12 @@ async def create_kvs_client(
59
74
) -> ApifyKeyValueStoreClient :
60
75
configuration = configuration or ApifyConfiguration .get_global_configuration ()
61
76
if isinstance (configuration , ApifyConfiguration ):
77
+ if alias :
78
+ await self ._initialize_alias_storages ()
79
+ async with self .get_alias_init_lock ():
80
+ return await ApifyKeyValueStoreClient .open (
81
+ id = id , name = name , alias = alias , configuration = configuration
82
+ )
62
83
return await ApifyKeyValueStoreClient .open (id = id , name = name , alias = alias , configuration = configuration )
63
84
64
85
raise TypeError (self ._lsp_violation_error_message_template .format (type (configuration ).__name__ ))
@@ -74,6 +95,58 @@ async def create_rq_client(
74
95
) -> ApifyRequestQueueClient :
75
96
configuration = configuration or ApifyConfiguration .get_global_configuration ()
76
97
if isinstance (configuration , ApifyConfiguration ):
98
+ if alias :
99
+ await self ._initialize_alias_storages ()
100
+ async with self .get_alias_init_lock ():
101
+ return await ApifyRequestQueueClient .open (
102
+ id = id , name = name , alias = alias , configuration = configuration
103
+ )
77
104
return await ApifyRequestQueueClient .open (id = id , name = name , alias = alias , configuration = configuration )
78
105
79
106
raise TypeError (self ._lsp_violation_error_message_template .format (type (configuration ).__name__ ))
107
+
108
+ @classmethod
109
+ def get_alias_init_lock (cls ) -> Lock :
110
+ if not cls ._alias_init_lock :
111
+ cls ._alias_init_lock = Lock ()
112
+ return cls ._alias_init_lock
113
+
114
+ @classmethod
115
+ async def _initialize_alias_storages (cls ) -> None :
116
+ """Initialize alias storages.
117
+
118
+ This method is called once to populate storage_instance_manager alias related cache. All existing alias
119
+ storages are saved in storage_instance_manager cache. If the alias storage is not there, it does not exist yet.
120
+ """
121
+ async with cls .get_alias_init_lock ():
122
+ if cls ._alias_storages_initialized :
123
+ return
124
+
125
+ cache = service_locator .storage_instance_manager ._cache_by_storage_client [ApifyStorageClient ] # noqa: SLF001
126
+
127
+ default_kvs_client = await _Alias .get_default_kvs_client ()
128
+
129
+ record = await default_kvs_client .get_record (key = _ALIAS_MAPPING_KEY )
130
+
131
+ if record is not None and 'value' in record :
132
+ # get_record can return {key: ..., value: ..., content_type: ...}
133
+ alias_export_map = record ['value' ]
134
+
135
+ for export_key , storage_id in alias_export_map .value .items ():
136
+ exported_alias = _Alias .from_exported_string (export_key )
137
+
138
+ # Re-create custom config used to open the storage
139
+ custom_config = Configuration ()
140
+ custom_config .api_base_url = exported_alias .api_url
141
+ custom_config .token = exported_alias .token
142
+
143
+ # Populate the id cache by opening storage by id
144
+ storage = await exported_alias .storage_type .open (
145
+ id = storage_id , configuration = custom_config , storage_client = ApifyStorageClient ()
146
+ )
147
+ # Populate the alias cache as well
148
+ cache .by_alias [exported_alias .storage_type ][exported_alias .alias ][
149
+ exported_alias .additional_cache_key
150
+ ] = storage
151
+
152
+ cls ._alias_storages_initialized = True
0 commit comments