diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 616571176371..5187e2345511 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -1312,6 +1312,7 @@ async def create_s3_export( await create_and_upload_export( get_s3_client(self.app), + ProjectRepository.instance(get_db_engine(self.app)), self.simcore_bucket_name, source_object_keys=source_object_keys, destination_object_keys=destination_object_key, diff --git a/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py b/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py index 5764151fdf12..d9e36c9f8567 100644 --- a/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py +++ b/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py @@ -8,8 +8,9 @@ from aws_library.s3._constants import STREAM_READER_CHUNK_SIZE from aws_library.s3._models import S3ObjectKey from models_library.api_schemas_storage.storage_schemas import S3BucketName -from models_library.projects import ProjectID +from models_library.projects import ProjectID, ProjectIDStr from models_library.projects_nodes_io import ( + NodeIDStr, SimcoreS3DirectoryID, SimcoreS3FileID, StorageFileID, @@ -27,6 +28,7 @@ from ..models import FileMetaData, FileMetaDataAtDB, GenericCursor, PathMetaData from ..modules.db.access_layer import AccessLayerRepository from ..modules.db.file_meta_data import FileMetaDataRepository, TotalChildren +from ..modules.db.projects import ProjectRepository from .utils import convert_db_to_model @@ -165,17 +167,55 @@ def _base_path_parent(base_path: UserSelectionStr, s3_object: S3ObjectKey) -> st return f"{result}" +def _get_project_ids(user_selecton: set[UserSelectionStr]) -> list[ProjectID]: + results = [] + for selected in user_selecton: + project_id = ProjectID(Path(selected).parts[0]) + results.append(project_id) + return results + + +def _replace_node_id_project_id_in_path( + ids_names_map: dict[ProjectID, dict[ProjectIDStr | NodeIDStr, str]], path: str +) -> str: + path_parts = Path(path).parts + if len(path_parts) == 0: + return path + + if len(path_parts) == 1: + return ids_names_map[ProjectID(path)][path].replace("/", "_") + + project_id_str = path_parts[0] + project_id = ProjectID(project_id_str) + node_id_str = path_parts[1] + return "/".join( + ( + ids_names_map[project_id][project_id_str].replace("/", "_"), + ids_names_map[project_id][node_id_str].replace("/", "_"), + *path_parts[2:], + ) + ) + + async def create_and_upload_export( s3_client: SimcoreS3API, + project_repository: ProjectRepository, bucket: S3BucketName, *, source_object_keys: set[tuple[UserSelectionStr, StorageFileID]], destination_object_keys: StorageFileID, progress_bar: ProgressBarData, ) -> None: + ids_names_map = await project_repository.get_project_id_and_node_id_to_names_map( + project_uuids=_get_project_ids(user_selecton={x[0] for x in source_object_keys}) + ) + archive_entries: ArchiveEntries = [ ( - _base_path_parent(selection, s3_object), + _base_path_parent( + _replace_node_id_project_id_in_path(ids_names_map, selection), + _replace_node_id_project_id_in_path(ids_names_map, s3_object), + ), await s3_client.get_bytes_streamer_from_object(bucket, s3_object), ) for (selection, s3_object) in source_object_keys diff --git a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py index 56f7d9bd92af..e87d2a5e0dbd 100644 --- a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py +++ b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py @@ -1,10 +1,16 @@ from pathlib import Path +from typing import Final +from uuid import UUID import pytest from aws_library.s3._models import S3ObjectKey +from models_library.projects import ProjectID, ProjectIDStr +from models_library.projects_nodes_io import NodeIDStr +from simcore_service_storage.models import NodeID from simcore_service_storage.utils.simcore_s3_dsm_utils import ( UserSelectionStr, _base_path_parent, + _replace_node_id_project_id_in_path, compute_file_id_prefix, ensure_user_selection_from_same_base_directory, ) @@ -73,3 +79,53 @@ def test_ensure_user_selection_from_same_base_directory( ensure_user_selection_from_same_base_directory([f"{x}" for x in user_selection]) == expected ) + + +_PID1: Final[ProjectID] = UUID(int=1) +_PID2: Final[ProjectID] = UUID(int=2) +_NID1: Final[NodeID] = UUID(int=3) +_NID2: Final[NodeID] = UUID(int=4) +_IDS_NAMES_MAP: Final[dict[ProjectID, dict[ProjectIDStr | NodeIDStr, str]]] = { + _PID1: { + f"{_PID1}": "project one", + f"{_NID1}": "project one -> node one", + f"{_NID2}": "project one -> node two", + }, + _PID2: { + f"{_PID2}": "/project/two/", + f"{_NID1}": "/project/two/->/node/one/", + f"{_NID2}": "/project/two/->/node/two/", + }, +} + + +@pytest.mark.parametrize( + "path, expected", + [ + ("", ""), + (f"{_PID1}", "project one"), + (f"{_PID1}/{_NID1}", "project one/project one -> node one"), + (f"{_PID1}/{_NID1}/something", "project one/project one -> node one/something"), + (f"{_PID1}/{_NID1}/{_NID2}", f"project one/project one -> node one/{_NID2}"), + ( + f"{_PID1}/{_NID1}/{_NID2}/something", + f"project one/project one -> node one/{_NID2}/something", + ), + (f"{_PID2}", "_project_two_"), + (f"{_PID2}/{_NID1}", "_project_two_/_project_two_->_node_one_"), + ( + f"{_PID2}/{_NID1}/something", + "_project_two_/_project_two_->_node_one_/something", + ), + ( + f"{_PID2}/{_NID1}/{_NID2}", + f"_project_two_/_project_two_->_node_one_/{_NID2}", + ), + ( + f"{_PID2}/{_NID1}/{_NID2}/something", + f"_project_two_/_project_two_->_node_one_/{_NID2}/something", + ), + ], +) +def test__replace_node_id_project_id_in_path(path: str, expected: str): + assert _replace_node_id_project_id_in_path(_IDS_NAMES_MAP, path) == expected