6
6
from typing import TYPE_CHECKING , ClassVar
7
7
8
8
from apify_client import ApifyClientAsync
9
+ from crawlee ._utils .crypto import compute_short_hash
9
10
from crawlee .storages import Dataset , KeyValueStore , RequestQueue
10
11
11
12
from apify ._configuration import Configuration
25
26
class Alias :
26
27
"""Class for handling aliases.
27
28
28
- It includes helper methods for serialization/deserialization and initialization from kvs.
29
+ The purpose of this is class is to ensure that alias storages are created with correct id. This is achieved by using
30
+ default kvs as a storage for global mapping of aliases to storage ids. Same mapping is also kept in memory to avoid
31
+ unnecessary calls to API and also have limited support of alias storages when not running on Apify platform. When on
32
+ Apify platform, the storages created with alias are accessible by the same alias even after migration or reboot.
29
33
"""
30
34
31
35
_alias_map : ClassVar [dict [str , str ]] = {}
32
36
"""Map containing pre-existing alias storages and their ids. Global for all instances."""
33
37
_alias_init_lock : Lock | None = None
34
38
"""Lock for creating alias storages. Only one alias storage can be created at the time. Global for all instances."""
35
39
36
- ALIAS_STORAGE_KEY_SEPARATOR = '| '
40
+ ALIAS_STORAGE_KEY_SEPARATOR = ', '
37
41
ALIAS_MAPPING_KEY = '__STORAGE_ALIASES_MAPPING'
38
42
39
43
def __init__ (self , storage_type : _StorageT , alias : str , configuration : Configuration ) -> None :
@@ -55,12 +59,24 @@ async def __aexit__(
55
59
56
60
@classmethod
57
61
async def _get_alias_init_lock (cls ) -> Lock :
62
+ """Get lock for controlling the creation of the alias storages.
63
+
64
+ The lock is shared for all instances of the Alias class.
65
+ It is created in async method to ensure that some event loop is already running.
66
+ """
58
67
if cls ._alias_init_lock is None :
59
68
cls ._alias_init_lock = Lock ()
60
69
return cls ._alias_init_lock
61
70
62
71
@classmethod
63
72
async def get_alias_map (cls ) -> dict [str , str ]:
73
+ """Get the aliases and storage ids mapping from the default kvs.
74
+
75
+ Mapping is loaded from kvs only once and is shared for all instances of the Alias class.
76
+
77
+ Returns:
78
+ Map of aliases and storage ids.
79
+ """
64
80
if not cls ._alias_map :
65
81
default_kvs_client = await get_default_kvs_client ()
66
82
@@ -79,13 +95,17 @@ async def get_alias_map(cls) -> dict[str, str]:
79
95
80
96
@classmethod
81
97
def get_additional_cache_key (cls , configuration : Configuration ) -> str :
82
- """Get additional cache key based on api_url and token."""
83
- if configuration .api_base_url is None or configuration .token is None :
84
- raise ValueError ("'Configuration.api_base_url' and 'Configuration.token' must be set." )
85
- return str ((configuration .api_base_url , configuration .token ))
98
+ """Get additional cache key based on configuration.
99
+
100
+ Use only api_public_base_url and token as the relevant for differentiating storages.
101
+ """
102
+ if configuration .api_public_base_url is None or configuration .token is None :
103
+ raise ValueError ("'Configuration.api_public_base_url' and 'Configuration.token' must be set." )
104
+ return compute_short_hash (f'{ configuration .api_public_base_url } { configuration .token } ' .encode ())
86
105
87
106
@property
88
107
def storage_key (self ) -> str :
108
+ """Get a unique storage key used for storing the alias in the mapping."""
89
109
return self .ALIAS_STORAGE_KEY_SEPARATOR .join (
90
110
[
91
111
self .storage_type .__name__ ,
@@ -95,11 +115,19 @@ def storage_key(self) -> str:
95
115
)
96
116
97
117
async def resolve_id (self ) -> str | None :
118
+ """Get id of the aliased storage.
119
+
120
+ Either locate the id in the in-memory mapping or create the new storage.
121
+
122
+ Returns:
123
+ Storage id if it exists, None otherwise.
124
+ """
98
125
return (await self .get_alias_map ()).get (self .storage_key , None )
99
126
100
127
async def store_mapping (self , storage_id : str ) -> None :
101
- """Add alias and related storage id to the mapping in default kvs."""
102
- self ._alias_map [self .storage_key ] = storage_id
128
+ """Add alias and related storage id to the mapping in default kvs and local in-memory mapping."""
129
+ # Update in-memory mapping
130
+ (await self .get_alias_map ())[self .storage_key ] = storage_id
103
131
if not Configuration .get_global_configuration ().is_at_home :
104
132
logging .getLogger (__name__ ).warning (
105
133
'Alias storage limited retention is only supported on Apify platform. Storage is not exported.'
@@ -117,7 +145,6 @@ async def store_mapping(self, storage_id: str) -> None:
117
145
record = record ['value' ]
118
146
119
147
# Update or create the record with the new alias mapping
120
-
121
148
if isinstance (record , dict ):
122
149
record [self .storage_key ] = storage_id
123
150
else :
@@ -126,7 +153,7 @@ async def store_mapping(self, storage_id: str) -> None:
126
153
# Store the mapping back in the KVS.
127
154
await default_kvs_client .set_record (self .ALIAS_MAPPING_KEY , record )
128
155
except Exception as exc :
129
- logger .warning (f'Error accessing alias mapping for { self .alias } : { exc } ' )
156
+ logger .warning (f'Error storing alias mapping for { self .alias } : { exc } ' )
130
157
131
158
132
159
async def get_default_kvs_client () -> KeyValueStoreClientAsync :
0 commit comments