1
1
from __future__ import annotations
2
2
3
- import logging
4
- from asyncio import Lock
5
3
from typing import TYPE_CHECKING
6
4
7
5
from typing_extensions import override
8
6
9
- from crawlee import service_locator
10
7
from crawlee .storage_clients ._base import StorageClient
11
8
12
9
from ._dataset_client import ApifyDatasetClient
13
10
from ._key_value_store_client import ApifyKeyValueStoreClient
14
11
from ._request_queue_client import ApifyRequestQueueClient
15
- from ._utils import _ALIAS_MAPPING_KEY , _Alias
16
- from apify ._configuration import Configuration
12
+ from ._utils import Alias
17
13
from apify ._configuration import Configuration as ApifyConfiguration
18
14
from apify ._utils import docs_group
19
15
27
23
class ApifyStorageClient (StorageClient ):
28
24
"""Apify storage client."""
29
25
30
- _alias_storages_initialized = False
31
- """Flag that indicates whether the pre-existing alias storages were already initialized."""
32
- _alias_init_lock : Lock | None = None
33
- """Lock for creating alias storages. Only one alias storage can be created at the time."""
34
-
35
26
# This class breaches Liskov Substitution Principle. It requires specialized Configuration compared to its parent.
36
27
_lsp_violation_error_message_template = (
37
28
'Expected "configuration" to be an instance of "apify.Configuration", but got {} instead.'
@@ -42,7 +33,7 @@ def get_additional_cache_key(self, configuration: CrawleeConfiguration) -> Hasha
42
33
if isinstance (configuration , ApifyConfiguration ):
43
34
if configuration .api_base_url is None or configuration .token is None :
44
35
raise ValueError ("'Configuration.api_base_url' and 'Configuration.token' must be set." )
45
- return _Alias .get_additional_cache_key (configuration . api_base_url , configuration . token )
36
+ return Alias .get_additional_cache_key (configuration )
46
37
raise TypeError (self ._lsp_violation_error_message_template .format (type (configuration ).__name__ ))
47
38
48
39
@override
@@ -56,10 +47,6 @@ async def create_dataset_client(
56
47
) -> ApifyDatasetClient :
57
48
configuration = configuration or ApifyConfiguration .get_global_configuration ()
58
49
if isinstance (configuration , ApifyConfiguration ):
59
- if alias :
60
- await self ._initialize_alias_storages ()
61
- async with self .get_alias_init_lock ():
62
- return await ApifyDatasetClient .open (id = id , name = name , alias = alias , configuration = configuration )
63
50
return await ApifyDatasetClient .open (id = id , name = name , alias = alias , configuration = configuration )
64
51
65
52
raise TypeError (self ._lsp_violation_error_message_template .format (type (configuration ).__name__ ))
@@ -75,12 +62,6 @@ async def create_kvs_client(
75
62
) -> ApifyKeyValueStoreClient :
76
63
configuration = configuration or ApifyConfiguration .get_global_configuration ()
77
64
if isinstance (configuration , ApifyConfiguration ):
78
- if alias :
79
- await self ._initialize_alias_storages ()
80
- async with self .get_alias_init_lock ():
81
- return await ApifyKeyValueStoreClient .open (
82
- id = id , name = name , alias = alias , configuration = configuration
83
- )
84
65
return await ApifyKeyValueStoreClient .open (id = id , name = name , alias = alias , configuration = configuration )
85
66
86
67
raise TypeError (self ._lsp_violation_error_message_template .format (type (configuration ).__name__ ))
@@ -96,66 +77,6 @@ async def create_rq_client(
96
77
) -> ApifyRequestQueueClient :
97
78
configuration = configuration or ApifyConfiguration .get_global_configuration ()
98
79
if isinstance (configuration , ApifyConfiguration ):
99
- if alias :
100
- await self ._initialize_alias_storages ()
101
- async with self .get_alias_init_lock ():
102
- return await ApifyRequestQueueClient .open (
103
- id = id , name = name , alias = alias , configuration = configuration
104
- )
105
80
return await ApifyRequestQueueClient .open (id = id , name = name , alias = alias , configuration = configuration )
106
81
107
82
raise TypeError (self ._lsp_violation_error_message_template .format (type (configuration ).__name__ ))
108
-
109
- @classmethod
110
- def get_alias_init_lock (cls ) -> Lock :
111
- if not cls ._alias_init_lock :
112
- cls ._alias_init_lock = Lock ()
113
- return cls ._alias_init_lock
114
-
115
- @classmethod
116
- async def _initialize_alias_storages (cls ) -> None :
117
- """Initialize alias storages.
118
-
119
- This method is called once to populate storage_instance_manager alias related cache. All existing alias
120
- storages are saved in storage_instance_manager cache. If the alias storage is not there, it does not exist yet.
121
- """
122
- if not Configuration .get_global_configuration ().is_at_home :
123
- logging .getLogger (__name__ ).warning (
124
- 'Alias storage limited retention is only supported on Apify platform. '
125
- 'No pre-existing storages are imported.'
126
- )
127
- cls ._alias_storages_initialized = True
128
- return
129
-
130
- async with cls .get_alias_init_lock ():
131
- if cls ._alias_storages_initialized :
132
- return
133
-
134
- cache = service_locator .storage_instance_manager ._cache_by_storage_client [ApifyStorageClient ] # noqa: SLF001
135
-
136
- default_kvs_client = await _Alias .get_default_kvs_client ()
137
-
138
- record = await default_kvs_client .get_record (key = _ALIAS_MAPPING_KEY )
139
-
140
- if record is not None and 'value' in record :
141
- # get_record can return {key: ..., value: ..., content_type: ...}
142
- alias_export_map = record ['value' ]
143
-
144
- for export_key , storage_id in alias_export_map .value .items ():
145
- exported_alias = _Alias .from_exported_string (export_key )
146
-
147
- # Re-create custom config used to open the storage
148
- custom_config = Configuration ()
149
- custom_config .api_base_url = exported_alias .api_url
150
- custom_config .token = exported_alias .token
151
-
152
- # Populate the id cache by opening storage by id
153
- storage = await exported_alias .storage_type .open (
154
- id = storage_id , configuration = custom_config , storage_client = ApifyStorageClient ()
155
- )
156
- # Populate the alias cache as well
157
- cache .by_alias [exported_alias .storage_type ][exported_alias .alias ][
158
- exported_alias .additional_cache_key
159
- ] = storage
160
-
161
- cls ._alias_storages_initialized = True
0 commit comments