66from typing import TYPE_CHECKING , ClassVar
77
88from apify_client import ApifyClientAsync
9+ from crawlee ._utils .crypto import compute_short_hash
910from crawlee .storages import Dataset , KeyValueStore , RequestQueue
1011
1112from apify ._configuration import Configuration
2526class Alias :
2627 """Class for handling aliases.
2728
28- It includes helper methods for serialization/deserialization and initialization from kvs.
29+ The purpose of this is class is to ensure that alias storages are created with correct id. This is achieved by using
30+ default kvs as a storage for global mapping of aliases to storage ids. Same mapping is also kept in memory to avoid
31+ unnecessary calls to API and also have limited support of alias storages when not running on Apify platform. When on
32+ Apify platform, the storages created with alias are accessible by the same alias even after migration or reboot.
2933 """
3034
3135 _alias_map : ClassVar [dict [str , str ]] = {}
3236 """Map containing pre-existing alias storages and their ids. Global for all instances."""
3337 _alias_init_lock : Lock | None = None
3438 """Lock for creating alias storages. Only one alias storage can be created at the time. Global for all instances."""
3539
36- ALIAS_STORAGE_KEY_SEPARATOR = '| '
40+ ALIAS_STORAGE_KEY_SEPARATOR = ', '
3741 ALIAS_MAPPING_KEY = '__STORAGE_ALIASES_MAPPING'
3842
3943 def __init__ (self , storage_type : _StorageT , alias : str , configuration : Configuration ) -> None :
@@ -55,12 +59,24 @@ async def __aexit__(
5559
5660 @classmethod
5761 async def _get_alias_init_lock (cls ) -> Lock :
62+ """Get lock for controlling the creation of the alias storages.
63+
64+ The lock is shared for all instances of the Alias class.
65+ It is created in async method to ensure that some event loop is already running.
66+ """
5867 if cls ._alias_init_lock is None :
5968 cls ._alias_init_lock = Lock ()
6069 return cls ._alias_init_lock
6170
6271 @classmethod
6372 async def get_alias_map (cls ) -> dict [str , str ]:
73+ """Get the aliases and storage ids mapping from the default kvs.
74+
75+ Mapping is loaded from kvs only once and is shared for all instances of the Alias class.
76+
77+ Returns:
78+ Map of aliases and storage ids.
79+ """
6480 if not cls ._alias_map :
6581 default_kvs_client = await get_default_kvs_client ()
6682
@@ -79,13 +95,17 @@ async def get_alias_map(cls) -> dict[str, str]:
7995
8096 @classmethod
8197 def get_additional_cache_key (cls , configuration : Configuration ) -> str :
82- """Get additional cache key based on api_url and token."""
83- if configuration .api_base_url is None or configuration .token is None :
84- raise ValueError ("'Configuration.api_base_url' and 'Configuration.token' must be set." )
85- return str ((configuration .api_base_url , configuration .token ))
98+ """Get additional cache key based on configuration.
99+
100+ Use only api_public_base_url and token as the relevant for differentiating storages.
101+ """
102+ if configuration .api_public_base_url is None or configuration .token is None :
103+ raise ValueError ("'Configuration.api_public_base_url' and 'Configuration.token' must be set." )
104+ return compute_short_hash (f'{ configuration .api_public_base_url } { configuration .token } ' .encode ())
86105
87106 @property
88107 def storage_key (self ) -> str :
108+ """Get a unique storage key used for storing the alias in the mapping."""
89109 return self .ALIAS_STORAGE_KEY_SEPARATOR .join (
90110 [
91111 self .storage_type .__name__ ,
@@ -95,11 +115,19 @@ def storage_key(self) -> str:
95115 )
96116
97117 async def resolve_id (self ) -> str | None :
118+ """Get id of the aliased storage.
119+
120+ Either locate the id in the in-memory mapping or create the new storage.
121+
122+ Returns:
123+ Storage id if it exists, None otherwise.
124+ """
98125 return (await self .get_alias_map ()).get (self .storage_key , None )
99126
100127 async def store_mapping (self , storage_id : str ) -> None :
101- """Add alias and related storage id to the mapping in default kvs."""
102- self ._alias_map [self .storage_key ] = storage_id
128+ """Add alias and related storage id to the mapping in default kvs and local in-memory mapping."""
129+ # Update in-memory mapping
130+ (await self .get_alias_map ())[self .storage_key ] = storage_id
103131 if not Configuration .get_global_configuration ().is_at_home :
104132 logging .getLogger (__name__ ).warning (
105133 'Alias storage limited retention is only supported on Apify platform. Storage is not exported.'
@@ -117,7 +145,6 @@ async def store_mapping(self, storage_id: str) -> None:
117145 record = record ['value' ]
118146
119147 # Update or create the record with the new alias mapping
120-
121148 if isinstance (record , dict ):
122149 record [self .storage_key ] = storage_id
123150 else :
@@ -126,7 +153,7 @@ async def store_mapping(self, storage_id: str) -> None:
126153 # Store the mapping back in the KVS.
127154 await default_kvs_client .set_record (self .ALIAS_MAPPING_KEY , record )
128155 except Exception as exc :
129- logger .warning (f'Error accessing alias mapping for { self .alias } : { exc } ' )
156+ logger .warning (f'Error storing alias mapping for { self .alias } : { exc } ' )
130157
131158
132159async def get_default_kvs_client () -> KeyValueStoreClientAsync :
0 commit comments