1010import tempfile
1111from collections import deque
1212from concurrent .futures import ThreadPoolExecutor
13+ from dataclasses import dataclass , field
1314from pathlib import Path
14- from typing import Any , Dict , List , Optional , Tuple , Union
15+ from typing import Any , Dict , Final , List , Optional , Tuple , Union
1516
1617import aiobotocore
1718import attr
6162)
6263from .s3wrapper .s3_client import MinioClientWrapper
6364from .settings import Settings
64- from .utils import download_to_file_or_raise
65+ from .utils import download_to_file_or_raise , is_file_entry_valid , to_meta_data_extended
66+
67+ _MINUTE : Final [int ] = 60
68+ _HOUR : Final [int ] = 60 * _MINUTE
69+
6570
6671logger = logging .getLogger (__name__ )
6772
@@ -96,17 +101,7 @@ async def _cleanup_context(app: web.Application):
96101 app .cleanup_ctx .append (_cleanup_context )
97102
98103
99- def to_meta_data_extended (row : RowProxy ) -> FileMetaDataEx :
100- assert row
101- meta = FileMetaData (** dict (row )) # type: ignore
102- meta_extended = FileMetaDataEx (
103- fmd = meta ,
104- parent_id = str (Path (meta .object_name ).parent ),
105- ) # type: ignore
106- return meta_extended
107-
108-
109- @attr .s (auto_attribs = True )
104+ @dataclass
110105class DatCoreApiToken :
111106 api_token : Optional [str ] = None
112107 api_secret : Optional [str ] = None
@@ -115,7 +110,7 @@ def to_tuple(self):
115110 return (self .api_token , self .api_secret )
116111
117112
118- @attr . s ( auto_attribs = True )
113+ @dataclass
119114class DataStorageManager : # pylint: disable=too-many-public-methods
120115 """Data storage manager
121116
@@ -154,12 +149,12 @@ class DataStorageManager: # pylint: disable=too-many-public-methods
154149 pool : ThreadPoolExecutor
155150 simcore_bucket_name : str
156151 has_project_db : bool
157- session : AioSession = attr . Factory ( aiobotocore .get_session )
158- datcore_tokens : Dict [str , DatCoreApiToken ] = attr . Factory ( dict )
152+ session : AioSession = field ( default_factory = aiobotocore .get_session )
153+ datcore_tokens : Dict [str , DatCoreApiToken ] = field ( default_factory = dict )
159154 app : Optional [web .Application ] = None
160155
161156 def _create_aiobotocore_client_context (self ) -> ClientCreatorContext :
162- assert hasattr (self .session , "create_client" )
157+ assert hasattr (self .session , "create_client" ) # nosec
163158 # pylint: disable=no-member
164159
165160 # SEE API in https://botocore.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html
@@ -217,22 +212,28 @@ async def list_files(
217212 accesible_projects_ids = await get_readable_project_ids (
218213 conn , int (user_id )
219214 )
220- has_read_access = (
215+ where_statement = (
221216 file_meta_data .c .user_id == user_id
222217 ) | file_meta_data .c .project_id .in_ (accesible_projects_ids )
223-
224- query = sa .select ([file_meta_data ]).where (has_read_access )
218+ if uuid_filter :
219+ where_statement &= file_meta_data .c .file_uuid .ilike (
220+ f"%{ uuid_filter } %"
221+ )
222+ query = sa .select ([file_meta_data ]).where (where_statement )
225223
226224 async for row in conn .execute (query ):
227225 dex = to_meta_data_extended (row )
228- if dex .fmd .entity_tag is None :
226+ if not is_file_entry_valid (dex .fmd ):
227+ # NOTE: the file is not updated with the information from S3 backend.
228+ # 1. Either the file exists, but was never updated in the database
229+ # 2. Or the file does not exist or was never completed, and the file_meta_data entry is old and faulty
229230 # we need to update from S3 here since the database is not up-to-date
230- dex = await self .update_database_from_storage (
231+ dex = await self .try_update_database_from_storage (
231232 dex .fmd .file_uuid ,
232233 dex .fmd .bucket_name ,
233234 dex .fmd .object_name ,
234235 )
235- if dex and dex . fmd . entity_tag :
236+ if dex :
236237 data .append (dex )
237238
238239 if self .has_project_db :
@@ -288,6 +289,9 @@ async def list_files(
288289
289290 elif location == DATCORE_STR :
290291 api_token , api_secret = self ._get_datcore_tokens (user_id )
292+ assert self .app # nosec
293+ assert api_secret # nosec
294+ assert api_token # nosec
291295 return await datcore_adapter .list_all_datasets_files_metadatas (
292296 self .app , api_token , api_secret
293297 )
@@ -330,6 +334,9 @@ async def list_files_dataset(
330334 elif location == DATCORE_STR :
331335 api_token , api_secret = self ._get_datcore_tokens (user_id )
332336 # lists all the files inside the dataset
337+ assert self .app # nosec
338+ assert api_secret # nosec
339+ assert api_token # nosec
333340 return await datcore_adapter .list_all_files_metadatas_in_dataset (
334341 self .app , api_token , api_secret , dataset_id
335342 )
@@ -368,6 +375,9 @@ async def list_datasets(self, user_id: str, location: str) -> List[DatasetMetaDa
368375
369376 elif location == DATCORE_STR :
370377 api_token , api_secret = self ._get_datcore_tokens (user_id )
378+ assert self .app # nosec
379+ assert api_secret # nosec
380+ assert api_token # nosec
371381 return await datcore_adapter .list_datasets (self .app , api_token , api_secret )
372382
373383 return data
@@ -391,13 +401,14 @@ async def list_file(
391401 if not row :
392402 return None
393403 file_metadata = to_meta_data_extended (row )
394- if file_metadata .fmd .entity_tag is None :
395- # we need to update from S3 here since the database is not up-to-date
396- file_metadata = await self .update_database_from_storage (
397- file_metadata .fmd .file_uuid ,
398- file_metadata .fmd .bucket_name ,
399- file_metadata .fmd .object_name ,
400- )
404+ if is_file_entry_valid (file_metadata .fmd ):
405+ return file_metadata
406+ # we need to update from S3 here since the database is not up-to-date
407+ file_metadata = await self .try_update_database_from_storage (
408+ file_metadata .fmd .file_uuid ,
409+ file_metadata .fmd .bucket_name ,
410+ file_metadata .fmd .object_name ,
411+ )
401412 return file_metadata
402413 # FIXME: returns None in both cases: file does not exist or use has no access
403414 logger .debug ("User %s cannot read file %s" , user_id , file_uuid )
@@ -423,7 +434,7 @@ async def upload_file_to_datcore(
423434 # api_token, api_secret = self._get_datcore_tokens(user_id)
424435 # await dcw.upload_file_to_id(destination_id, local_file_path)
425436
426- async def update_database_from_storage (
437+ async def try_update_database_from_storage (
427438 self ,
428439 file_uuid : str ,
429440 bucket_name : str ,
@@ -469,7 +480,7 @@ async def update_database_from_storage(
469480 return None
470481
471482 @retry (
472- stop = stop_after_delay (3600 ),
483+ stop = stop_after_delay (1 * _HOUR ),
473484 wait = wait_exponential (multiplier = 0.1 , exp_base = 1.2 , max = 30 ),
474485 retry = (
475486 retry_if_exception_type () | retry_if_result (lambda result : result is None )
@@ -479,7 +490,7 @@ async def update_database_from_storage(
479490 async def auto_update_database_from_storage_task (
480491 self , file_uuid : str , bucket_name : str , object_name : str
481492 ):
482- return await self .update_database_from_storage (
493+ return await self .try_update_database_from_storage (
483494 file_uuid , bucket_name , object_name , silence_exception = True
484495 )
485496
@@ -573,6 +584,9 @@ async def download_link_s3(self, file_uuid: str, user_id: int) -> str:
573584
574585 async def download_link_datcore (self , user_id : str , file_id : str ) -> URL :
575586 api_token , api_secret = self ._get_datcore_tokens (user_id )
587+ assert self .app # nosec
588+ assert api_secret # nosec
589+ assert api_token # nosec
576590 return await datcore_adapter .get_file_download_presigned_link (
577591 self .app , api_token , api_secret , file_id
578592 )
@@ -928,6 +942,9 @@ async def delete_file(self, user_id: str, location: str, file_uuid: str):
928942 elif location == DATCORE_STR :
929943 # FIXME: review return inconsistencies
930944 api_token , api_secret = self ._get_datcore_tokens (user_id )
945+ assert self .app # nosec
946+ assert api_secret # nosec
947+ assert api_token # nosec
931948 await datcore_adapter .delete_file (
932949 self .app , api_token , api_secret , file_uuid
933950 )
0 commit comments