diff --git a/services/storage/src/simcore_service_storage/exceptions/errors.py b/services/storage/src/simcore_service_storage/exceptions/errors.py index 5856a2fec5b7..93dc85ddcd31 100644 --- a/services/storage/src/simcore_service_storage/exceptions/errors.py +++ b/services/storage/src/simcore_service_storage/exceptions/errors.py @@ -1,8 +1,7 @@ from common_library.errors_classes import OsparcErrorMixin -class StorageRuntimeError(OsparcErrorMixin, RuntimeError): - ... +class StorageRuntimeError(OsparcErrorMixin, RuntimeError): ... class ConfigurationError(StorageRuntimeError): @@ -45,3 +44,7 @@ class InvalidFileIdentifierError(AccessLayerError): class DatCoreCredentialsMissingError(StorageRuntimeError): msg_template: str = "DatCore credentials are incomplete. TIP: Check your settings" + + +class SelectionNotAllowedError(StorageRuntimeError): + msg_template: str = "Selection='{selection}' must be from the same folder" diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 671d9d01cee6..616571176371 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -61,6 +61,7 @@ LinkAlreadyExistsError, ProjectAccessRightError, ProjectNotFoundError, + SelectionNotAllowedError, ) from .models import ( DatasetMetaData, @@ -81,9 +82,11 @@ from .modules.s3 import get_s3_client from .utils.s3_utils import S3TransferDataCB from .utils.simcore_s3_dsm_utils import ( + UserSelectionStr, compute_file_id_prefix, create_and_upload_export, create_random_export_name, + ensure_user_selection_from_same_base_directory, expand_directory, get_accessible_project_ids, get_directory_file_id, @@ -1249,7 +1252,11 @@ async def create_s3_export( *, progress_bar: ProgressBarData, ) -> StorageFileID: - source_object_keys: set[StorageFileID] = set() + source_object_keys: set[tuple[UserSelectionStr, StorageFileID]] = set() + + # ensure all selected items have the same parent + if not ensure_user_selection_from_same_base_directory(object_keys): + raise SelectionNotAllowedError(selection=object_keys) # check access rights for object_key in object_keys: @@ -1279,7 +1286,7 @@ async def create_s3_export( self.simcore_bucket_name, object_key ): for entry in meta_data_files: - source_object_keys.add(entry.object_key) + source_object_keys.add((object_key, entry.object_key)) _logger.debug( "User selection '%s' includes '%s' files", diff --git a/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py b/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py index 5ebe83c486b1..5764151fdf12 100644 --- a/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py +++ b/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py @@ -1,10 +1,12 @@ from contextlib import suppress from pathlib import Path +from typing import TypeAlias from uuid import uuid4 import orjson from aws_library.s3 import S3MetaData, SimcoreS3API from aws_library.s3._constants import STREAM_READER_CHUNK_SIZE +from aws_library.s3._models import S3ObjectKey from models_library.api_schemas_storage.storage_schemas import S3BucketName from models_library.projects import ProjectID from models_library.projects_nodes_io import ( @@ -143,20 +145,40 @@ def create_random_export_name(user_id: UserID) -> StorageFileID: ) +def ensure_user_selection_from_same_base_directory( + object_keys: list[S3ObjectKey], +) -> bool: + parents = [Path(x).parent for x in object_keys] + return len(set(parents)) <= 1 + + +UserSelectionStr: TypeAlias = str + + +def _base_path_parent(base_path: UserSelectionStr, s3_object: S3ObjectKey) -> str: + base_path_parent_path = Path(base_path).parent + s3_object_path = Path(s3_object) + if base_path_parent_path == s3_object_path: + return s3_object_path.name + + result = s3_object_path.relative_to(base_path_parent_path) + return f"{result}" + + async def create_and_upload_export( s3_client: SimcoreS3API, bucket: S3BucketName, *, - source_object_keys: set[StorageFileID], + source_object_keys: set[tuple[UserSelectionStr, StorageFileID]], destination_object_keys: StorageFileID, progress_bar: ProgressBarData, ) -> None: archive_entries: ArchiveEntries = [ ( - s3_object, + _base_path_parent(selection, s3_object), await s3_client.get_bytes_streamer_from_object(bucket, s3_object), ) - for s3_object in source_object_keys + for (selection, s3_object) in source_object_keys ] async with progress_bar: diff --git a/services/storage/tests/unit/test_rpc_handlers_simcore_s3.py b/services/storage/tests/unit/test_rpc_handlers_simcore_s3.py index 3cd57155e29f..d7ed04de9b93 100644 --- a/services/storage/tests/unit/test_rpc_handlers_simcore_s3.py +++ b/services/storage/tests/unit/test_rpc_handlers_simcore_s3.py @@ -592,17 +592,20 @@ async def test_start_export_data( ): _, src_projects_list = await random_project_with_files(project_params) - paths_to_export: set[SimcoreS3FileID] = set() + all_available_files: set[SimcoreS3FileID] = set() for x in src_projects_list.values(): - paths_to_export |= x.keys() + all_available_files |= x.keys() + + nodes_in_project_to_export = { + TypeAdapter(PathToExport).validate_python("/".join(Path(x).parts[0:2])) + for x in all_available_files + } result = await _request_start_export_data( storage_rabbitmq_rpc_client, user_id, product_name, - paths_to_export=[ - TypeAdapter(PathToExport).validate_python(x) for x in paths_to_export - ], + paths_to_export=list(nodes_in_project_to_export), ) assert re.fullmatch( diff --git a/services/storage/tests/unit/test_simcore_s3_dsm.py b/services/storage/tests/unit/test_simcore_s3_dsm.py index df4d00e9db24..fdde44a86636 100644 --- a/services/storage/tests/unit/test_simcore_s3_dsm.py +++ b/services/storage/tests/unit/test_simcore_s3_dsm.py @@ -243,7 +243,11 @@ async def test_create_s3_export( cleanup_files_closure: Callable[[SimcoreS3FileID], None], ): initial_fmd_count = await _get_fmds_count(sqlalchemy_async_engine) - selection_to_export = _get_folder_and_files_selection(paths_for_export) + all_files_to_export = _get_folder_and_files_selection(paths_for_export) + selection_to_export = { + S3ObjectKey(project_id) + for project_id in {Path(p).parents[-2] for p in all_files_to_export} + } reports: list[ProgressReport] = [] diff --git a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py index 74c79a8cf364..56f7d9bd92af 100644 --- a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py +++ b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py @@ -1,5 +1,13 @@ +from pathlib import Path + import pytest -from simcore_service_storage.utils.simcore_s3_dsm_utils import compute_file_id_prefix +from aws_library.s3._models import S3ObjectKey +from simcore_service_storage.utils.simcore_s3_dsm_utils import ( + UserSelectionStr, + _base_path_parent, + compute_file_id_prefix, + ensure_user_selection_from_same_base_directory, +) @pytest.mark.parametrize( @@ -19,3 +27,49 @@ ) def test_compute_file_id_prefix(file_id, levels, expected): assert compute_file_id_prefix(file_id, levels) == expected + + +_FOLDERS_PATH = Path("nested/folders/path") + + +@pytest.mark.parametrize( + "selection, s3_object, expected", + [ + ("single_file", "single_file", "single_file"), + ("single_folder", "single_folder", "single_folder"), + ("a/b/c", "a/b/c/d/e/f/g", "c/d/e/f/g"), + (_FOLDERS_PATH / "folder", _FOLDERS_PATH / "folder", "folder"), + (_FOLDERS_PATH / "a_file.txt", _FOLDERS_PATH / "a_file.txt", "a_file.txt"), + (_FOLDERS_PATH, _FOLDERS_PATH / "with/some/content", "path/with/some/content"), + ], +) +def test__base_path_parent(selection: Path | str, s3_object: Path, expected: str): + assert ( + _base_path_parent(UserSelectionStr(f"{selection}"), S3ObjectKey(f"{s3_object}")) + == expected + ) + + +@pytest.mark.parametrize( + "user_selection, expected", + [ + ([], True), + (["folder"], True), + (["folder", "folder"], True), + (["", ""], True), + ([""], True), + ([_FOLDERS_PATH / "a", _FOLDERS_PATH / "b"], True), + (["a.txt", "b.txt"], True), + (["a/a.txt"], True), + # not same parent + (["firsta/file", "second/file"], False), + (["a/a.txt", "a.txt", "c.txt", "a/d.txt"], False), + ], +) +def test_ensure_user_selection_from_same_base_directory( + user_selection: list[S3ObjectKey | Path], expected: bool +): + assert ( + ensure_user_selection_from_same_base_directory([f"{x}" for x in user_selection]) + == expected + )