44import logging
55import types
66import zipfile
7- from contextlib import AsyncExitStack , contextmanager
7+ from collections .abc import Awaitable , Callable , Iterator
8+ from contextlib import AsyncExitStack , contextmanager , suppress
89from functools import partial
910from pathlib import Path
10- from typing import Any , Awaitable , Callable , Final , Iterator
11+ from typing import Any , Final
1112
1213import tqdm
1314from models_library .basic_types import IDStr
15+ from repro_zipfile import ReproducibleZipFile
1416from tqdm .contrib .logging import logging_redirect_tqdm , tqdm_logging_redirect
1517
1618from .file_utils import remove_directory
2224_MAX_UNARCHIVING_WORKER_COUNT : Final [int ] = 2
2325_CHUNK_SIZE : Final [int ] = 1024 * 8
2426
25- log = logging .getLogger (__name__ )
27+ _logger = logging .getLogger (__name__ )
2628
2729
2830class ArchiveError (Exception ):
@@ -56,19 +58,21 @@ def _iter_files_to_compress(
5658 dir_path : Path , exclude_patterns : set [str ] | None
5759) -> Iterator [Path ]:
5860 exclude_patterns = exclude_patterns if exclude_patterns else set ()
59- for path in dir_path .rglob ("*" ):
61+ # NOTE: make sure to sort paths othrwise between different runs
62+ # the zip will have a different structure and hash
63+ for path in sorted (dir_path .rglob ("*" )):
6064 if path .is_file () and not any (
6165 fnmatch .fnmatch (f"{ path } " , x ) for x in exclude_patterns
6266 ):
6367 yield path
6468
6569
6670def _strip_directory_from_path (input_path : Path , to_strip : Path ) -> Path :
67- _to_strip = f"{ str ( to_strip ) } /"
71+ _to_strip = f"{ to_strip } /"
6872 return Path (str (input_path ).replace (_to_strip , "" ))
6973
7074
71- class _FastZipFileReader (zipfile . ZipFile ):
75+ class _FastZipFileReader (ReproducibleZipFile ):
7276 """
7377 Used to gain a speed boost of several orders of magnitude.
7478
@@ -129,7 +133,7 @@ def _zipfile_single_file_extract_worker(
129133 desc = desc ,
130134 ** (
131135 _TQDM_FILE_OPTIONS
132- | dict ( miniters = _compute_tqdm_miniters (file_in_archive .file_size ))
136+ | { " miniters" : _compute_tqdm_miniters (file_in_archive .file_size )}
133137 ),
134138 ) as pbar :
135139 while chunk := zip_fp .read (_CHUNK_SIZE ):
@@ -139,7 +143,7 @@ def _zipfile_single_file_extract_worker(
139143
140144
141145def _ensure_destination_subdirectories_exist (
142- zip_file_handler : zipfile . ZipFile , destination_folder : Path
146+ zip_file_handler : ReproducibleZipFile , destination_folder : Path
143147) -> None :
144148 # assemble full destination paths
145149 full_destination_paths = {
@@ -177,7 +181,7 @@ async def unarchive_dir(
177181 )
178182 async with AsyncExitStack () as zip_stack :
179183 zip_file_handler = zip_stack .enter_context (
180- zipfile . ZipFile ( # pylint: disable=consider-using-with
184+ ReproducibleZipFile ( # pylint: disable=consider-using-with
181185 archive_to_extract ,
182186 mode = "r" ,
183187 )
@@ -232,7 +236,7 @@ async def unarchive_dir(
232236 extracted_path = await future
233237 extracted_file_size = extracted_path .stat ().st_size
234238 if tqdm_progress .update (extracted_file_size ) and log_cb :
235- with log_catch (log , reraise = False ):
239+ with log_catch (_logger , reraise = False ):
236240 await log_cb (f"{ tqdm_progress } " )
237241 await sub_prog .update (extracted_file_size )
238242 extracted_paths .append (extracted_path )
@@ -266,8 +270,8 @@ async def unarchive_dir(
266270
267271@contextmanager
268272def _progress_enabled_zip_write_handler (
269- zip_file_handler : zipfile . ZipFile , progress_bar : tqdm .tqdm
270- ) -> Iterator [zipfile . ZipFile ]:
273+ zip_file_handler : ReproducibleZipFile , progress_bar : tqdm .tqdm
274+ ) -> Iterator [ReproducibleZipFile ]:
271275 """This function overrides the default zip write fct to allow to get progress using tqdm library"""
272276
273277 def _write_with_progress (
@@ -308,11 +312,10 @@ def _add_to_archive(
308312 desc = f"{ desc } \n " ,
309313 total = folder_size_bytes ,
310314 ** (
311- _TQDM_FILE_OPTIONS
312- | dict (miniters = _compute_tqdm_miniters (folder_size_bytes ))
315+ _TQDM_FILE_OPTIONS | {"miniters" : _compute_tqdm_miniters (folder_size_bytes )}
313316 ),
314317 ) as progress_bar , _progress_enabled_zip_write_handler (
315- zipfile . ZipFile (destination , "w" , compression = compression ), progress_bar
318+ ReproducibleZipFile (destination , "w" , compression = compression ), progress_bar
316319 ) as zip_file_handler :
317320 for file_to_add in _iter_files_to_compress (dir_to_compress , exclude_patterns ):
318321 progress_bar .set_description (f"{ desc } /{ file_to_add .name } \n " )
@@ -393,10 +396,11 @@ async def archive_dir(
393396 if destination .is_file ():
394397 destination .unlink (missing_ok = True )
395398
396- raise ArchiveError (
399+ msg = (
397400 f"Failed archiving { dir_to_compress } -> { destination } due to { type (err )} ."
398401 f"Details: { err } "
399- ) from err
402+ )
403+ raise ArchiveError (msg ) from err
400404
401405 except BaseException :
402406 if destination .is_file ():
@@ -453,11 +457,9 @@ def prune(self, exclude: set[Path]) -> None:
453457 if path .is_file ():
454458 path .unlink ()
455459 elif path .is_dir ():
456- try :
460+ # prevents deleting non-empty folders
461+ with suppress (OSError ):
457462 path .rmdir ()
458- except OSError :
459- # prevents deleting non-empty folders
460- pass
461463
462464 # second pass to delete empty folders
463465 # after deleting files, some folders might have been left empty
0 commit comments