From 06f0caa6969b8ad7d304f38a541489ab5e603ae9 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 29 Oct 2025 16:41:47 -0400 Subject: [PATCH 1/2] fix(clp-package): Fix archive mounting dir for dataset delete. --- .../clp-package-utils/clp_package_utils/general.py | 7 +++++-- .../scripts/native/dataset_manager.py | 10 ++-------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/components/clp-package-utils/clp_package_utils/general.py b/components/clp-package-utils/clp_package_utils/general.py index 756578e834..7929793754 100644 --- a/components/clp-package-utils/clp_package_utils/general.py +++ b/components/clp-package-utils/clp_package_utils/general.py @@ -266,16 +266,19 @@ def generate_container_config( DockerMountType.BIND, clp_config.logs_directory, container_clp_config.logs_directory ) + transformed_container_archive_output_directory = ( + container_clp_config.model_copy(deep=True).archive_output.get_directory() + ) if not is_path_already_mounted( clp_home, CONTAINER_CLP_HOME, clp_config.archive_output.get_directory(), - container_clp_config.archive_output.get_directory(), + transformed_container_archive_output_directory, ): docker_mounts.archives_output_dir = DockerMount( DockerMountType.BIND, clp_config.archive_output.get_directory(), - container_clp_config.archive_output.get_directory(), + transformed_container_archive_output_directory, ) if not is_path_already_mounted( diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/dataset_manager.py b/components/clp-package-utils/clp_package_utils/scripts/native/dataset_manager.py index 4ff30cd9fd..ef4a11293a 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/native/dataset_manager.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/dataset_manager.py @@ -113,7 +113,7 @@ def _try_deleting_archives( archive_storage_config = archive_output_config.storage storage_type = archive_storage_config.type if StorageType.FS == storage_type: - _try_deleting_archives_from_fs(archive_output_config, dataset_archive_storage_dir) + _try_deleting_archives_from_fs(dataset_archive_storage_dir) elif StorageType.S3 == storage_type: _try_deleting_archives_from_s3( archive_storage_config.s3_config, dataset_archive_storage_dir @@ -123,15 +123,9 @@ def _try_deleting_archives( def _try_deleting_archives_from_fs( - archive_output_config: ArchiveOutput, dataset_archive_storage_dir: str + dataset_archive_storage_dir: str ) -> None: - archives_dir = archive_output_config.get_directory() dataset_archive_storage_path = Path(dataset_archive_storage_dir).resolve() - if not dataset_archive_storage_path.is_relative_to(archives_dir): - raise ValueError( - f"'{dataset_archive_storage_path}' is not within top-level archive storage directory" - f" '{archives_dir}'" - ) if not dataset_archive_storage_path.exists(): logger.debug(f"'{dataset_archive_storage_path}' doesn't exist.") From d8c86b36917cbcd7cf9c3a8320afd716bd3fc283 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 29 Oct 2025 17:04:59 -0400 Subject: [PATCH 2/2] limit scope of monkey patching --- .../clp-package-utils/clp_package_utils/general.py | 7 ++----- .../clp_package_utils/scripts/dataset_manager.py | 11 ++++++++++- .../scripts/native/dataset_manager.py | 4 +--- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/components/clp-package-utils/clp_package_utils/general.py b/components/clp-package-utils/clp_package_utils/general.py index 7929793754..756578e834 100644 --- a/components/clp-package-utils/clp_package_utils/general.py +++ b/components/clp-package-utils/clp_package_utils/general.py @@ -266,19 +266,16 @@ def generate_container_config( DockerMountType.BIND, clp_config.logs_directory, container_clp_config.logs_directory ) - transformed_container_archive_output_directory = ( - container_clp_config.model_copy(deep=True).archive_output.get_directory() - ) if not is_path_already_mounted( clp_home, CONTAINER_CLP_HOME, clp_config.archive_output.get_directory(), - transformed_container_archive_output_directory, + container_clp_config.archive_output.get_directory(), ): docker_mounts.archives_output_dir = DockerMount( DockerMountType.BIND, clp_config.archive_output.get_directory(), - transformed_container_archive_output_directory, + container_clp_config.archive_output.get_directory(), ) if not is_path_already_mounted( diff --git a/components/clp-package-utils/clp_package_utils/scripts/dataset_manager.py b/components/clp-package-utils/clp_package_utils/scripts/dataset_manager.py index f1c3111877..23d17b9f32 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/dataset_manager.py +++ b/components/clp-package-utils/clp_package_utils/scripts/dataset_manager.py @@ -18,6 +18,8 @@ from clp_py_utils.s3_utils import generate_container_auth_options from clp_package_utils.general import ( + DockerMount, + DockerMountType, dump_container_config, generate_container_config, generate_container_name, @@ -138,7 +140,14 @@ def main(argv: List[str]) -> int: necessary_mounts = [mounts.logs_dir] if clp_config.archive_output.storage.type == StorageType.FS: - necessary_mounts.append(mounts.archives_output_dir) + container_archive_output_config = container_clp_config.archive_output.model_copy(deep=True) + container_archive_output_config.storage.transform_for_container() + archives_output_dir_mount = DockerMount( + DockerMountType.BIND, + clp_config.archive_output.get_directory(), + container_archive_output_config.get_directory(), + ) + necessary_mounts.append(archives_output_dir_mount) aws_mount, aws_env_vars = generate_container_auth_options( clp_config, ARCHIVE_MANAGER_ACTION_NAME diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/dataset_manager.py b/components/clp-package-utils/clp_package_utils/scripts/native/dataset_manager.py index ef4a11293a..1eaaebf5de 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/native/dataset_manager.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/dataset_manager.py @@ -122,9 +122,7 @@ def _try_deleting_archives( raise ValueError(f"Unsupported storage type: {storage_type}") -def _try_deleting_archives_from_fs( - dataset_archive_storage_dir: str -) -> None: +def _try_deleting_archives_from_fs(dataset_archive_storage_dir: str) -> None: dataset_archive_storage_path = Path(dataset_archive_storage_dir).resolve() if not dataset_archive_storage_path.exists():