@@ -1404,7 +1404,7 @@ def get_local_dirs_purged():
14041404 return new_dir
14051405
14061406
1407- def find_extension (filename , required = True ):
1407+ def find_extension (filename ):
14081408 """Find best match for filename extension."""
14091409 # sort by length, so longest file extensions get preference
14101410 suffixes = sorted (EXTRACT_CMDS .keys (), key = len , reverse = True )
@@ -1413,12 +1413,9 @@ def find_extension(filename, required=True):
14131413
14141414 if res :
14151415 return res .group ('ext' )
1416-
1417- if required :
1416+ else :
14181417 raise EasyBuildError ("%s has unknown file extension" , filename )
14191418
1420- return None
1421-
14221419
14231420def extract_cmd (filepath , overwrite = False ):
14241421 """
@@ -2648,7 +2645,7 @@ def get_source_tarball_from_git(filename, target_dir, git_config):
26482645 """
26492646 Downloads a git repository, at a specific tag or commit, recursively or not, and make an archive with it
26502647
2651- :param filename: name of the archive to save the code to (must be extensionless )
2648+ :param filename: name of the archive file to save the code to (including extension )
26522649 :param target_dir: target directory where to save the archive to
26532650 :param git_config: dictionary containing url, repo_name, recursive, and one of tag or commit
26542651 """
@@ -2684,11 +2681,6 @@ def get_source_tarball_from_git(filename, target_dir, git_config):
26842681 if not url :
26852682 raise EasyBuildError ("url not specified in git_config parameter" )
26862683
2687- file_ext = find_extension (filename , required = False )
2688- if file_ext :
2689- print_warning (f"Ignoring extension of filename '{ filename } ' set in git_config parameter" )
2690- filename = filename [:- len (file_ext )]
2691-
26922684 # prepare target directory and clone repository
26932685 mkdir (target_dir , parents = True )
26942686
@@ -2776,27 +2768,27 @@ def get_source_tarball_from_git(filename, target_dir, git_config):
27762768 # Create archive
27772769 repo_path = os .path .join (tmpdir , repo_name )
27782770 reproducible = not keep_git_dir # presence of .git directory renders repo unreproducible
2779- archive_path = make_archive (repo_path , archive_name = filename , archive_dir = target_dir , reproducible = reproducible )
2771+ archive_path = make_archive (repo_path , archive_file = filename , archive_dir = target_dir , reproducible = reproducible )
27802772
27812773 # cleanup (repo_name dir does not exist in dry run mode)
27822774 remove (tmpdir )
27832775
27842776 return archive_path
27852777
27862778
2787- def make_archive (dir_path , archive_name = None , archive_dir = None , reproducible = False ):
2779+ def make_archive (source_dir , archive_file = None , archive_dir = None , reproducible = True ):
27882780 """
2789- Create a compressed tar archive in XZ format.
2781+ Create an archive file of the given directory
27902782
2791- :dir_path : string with path to directory to be archived
2792- :archive_name : string with extensionless filename of archive
2783+ :source_dir : string with path to directory to be archived
2784+ :archive_file : string with filename of archive
27932785 :archive_dir: string with path to directory to place the archive
2794- :reproducuble: make a tarball that is reproducible accross systems
2795- see https://reproducible-builds.org/docs/archives/
2786+ :reproducible: make a tarball that is reproducible accross systems
2787+ - see https://reproducible-builds.org/docs/archives/
2788+ - requires uncompressed or LZMA compressed archive images, other formats like .gz are not reproducible
2789+ due to arbitrary strings and timestamps added into their metadata.
27962790
2797- Archive is compressed with LZMA into a .xz because that is compatible with
2798- a reproducible archive. Other formats like .gz are not reproducible due to
2799- arbitrary strings and timestamps getting added into their metadata.
2791+ Default behaviour: reproducible tarball in .tar.xz
28002792 """
28012793 def reproducible_filter (tarinfo ):
28022794 "Filter out system-dependent data from tarball"
@@ -2815,37 +2807,87 @@ def reproducible_filter(tarinfo):
28152807 tarinfo .uname = tarinfo .gname = ""
28162808 return tarinfo
28172809
2818- if archive_name is None :
2819- archive_name = os .path .basename (dir_path )
2810+ compression = {
2811+ # taken from EXTRACT_CMDS
2812+ '.gtgz' : 'gz' ,
2813+ '.tar.gz' : 'gz' ,
2814+ '.tgz' : 'gz' ,
2815+ '.tar.bz2' : 'bz2' ,
2816+ '.tb2' : 'bz2' ,
2817+ '.tbz' : 'bz2' ,
2818+ '.tbz2' : 'bz2' ,
2819+ '.tar.xz' : 'xz' ,
2820+ '.txz' : 'xz' ,
2821+ '.tar' : '' ,
2822+ }
2823+ reproducible_compression = ["" , "xz" ]
2824+ default_ext = ".tar.xz"
28202825
2821- archive_ext = ".tar.xz"
2822- archive_filename = archive_name + archive_ext
2823- archive_path = archive_filename if archive_dir is None else os .path .join (archive_dir , archive_filename )
2826+ if archive_file is None :
2827+ archive_file = os .path .basename (source_dir ) + default_ext
2828+
2829+ try :
2830+ archive_ext = find_extension (archive_file )
2831+ except EasyBuildError :
2832+ if "." in archive_file :
2833+ # archive filename has unknown extension (set for raise)
2834+ archive_ext = ""
2835+ else :
2836+ # archive filename has no extension, use default one
2837+ archive_ext = default_ext
2838+ archive_file += archive_ext
2839+
2840+ if archive_ext not in compression :
2841+ # archive filename has unsupported extension
2842+ raise EasyBuildError (
2843+ f"Unsupported archive format: { archive_file } . Supported tarball extensions: { ', ' .join (compression )} "
2844+ )
2845+ _log .debug (f"Archive extension and compression: { archive_ext } in { compression [archive_ext ]} " )
2846+
2847+ archive_path = archive_file if archive_dir is None else os .path .join (archive_dir , archive_file )
2848+
2849+ archive = {
2850+ 'name' : archive_path ,
2851+ 'mode' : f"w:{ compression [archive_ext ]} " ,
2852+ 'format' : tarfile .GNU_FORMAT ,
2853+ 'encoding' : "utf-8" ,
2854+ }
2855+
2856+ if reproducible :
2857+ if compression [archive_ext ] == "xz" :
2858+ # ensure a consistent compression level in reproducible tarballs with XZ
2859+ archive ["preset" ] = 6
2860+ elif compression [archive_ext ] not in reproducible_compression :
2861+ # requested archive compression cannot be made reproducible
2862+ print_warning (
2863+ f"Requested reproducible archive with unsupported file compression ({ compression [archive_ext ]} ). "
2864+ "Please use XZ instead."
2865+ )
2866+ reproducible = False
2867+
2868+ archive_filter = reproducible_filter if reproducible else None
28242869
28252870 if build_option ('extended_dry_run' ):
28262871 # early return in dry run mode
2827- dry_run_msg ("Archiving '%s' into '%s'..." , dir_path , archive_path )
2872+ dry_run_msg ("Archiving '%s' into '%s'..." , source_dir , archive_path )
28282873 return archive_path
2874+ _log .info ("Archiving '%s' into '%s'..." , source_dir , archive_path )
28292875
28302876 # TODO: replace with TarFile.add(recursive=True) when support for Python 3.6 drops
28312877 # since Python v3.7 tarfile automatically orders the list of files added to the archive
2832- dir_files = [dir_path ]
2878+ source_files = [source_dir ]
28332879 # pathlib's glob includes hidden files
2834- dir_files .extend ([str (filepath ) for filepath in pathlib .Path (dir_path ).glob ("**/*" )])
2835- dir_files .sort () # independent of locale
2836-
2837- dir_path_prefix = os .path .dirname (dir_path )
2838- archive_filter = reproducible_filter if reproducible else None
2880+ source_files .extend ([str (filepath ) for filepath in pathlib .Path (source_dir ).glob ("**/*" )])
2881+ source_files .sort () # independent of locale
28392882
2840- _log .info ("Archiving '%s' into '%s'..." , dir_path , archive_path )
2841- with tarfile .open (archive_path , "w:xz" , format = tarfile .GNU_FORMAT , encoding = "utf-8" , preset = 6 ) as archive :
2842- for filepath in dir_files :
2883+ with tarfile .open (** archive ) as tar_archive :
2884+ for filepath in source_files :
28432885 # archive with target directory in its top level, remove any prefix in path
2844- file_name = os .path .relpath (filepath , start = dir_path_prefix )
2845- archive .add (filepath , arcname = file_name , recursive = False , filter = archive_filter )
2846- _log .debug ("File/folder added to archive '%s': %s" , archive_filename , filepath )
2886+ file_name = os .path .relpath (filepath , start = os . path . dirname ( source_dir ) )
2887+ tar_archive .add (filepath , arcname = file_name , recursive = False , filter = archive_filter )
2888+ _log .debug ("File/folder added to archive '%s': %s" , archive_file , filepath )
28472889
2848- _log .info ("Archive '%s' created successfully" , archive_filename )
2890+ _log .info ("Archive '%s' created successfully" , archive_file )
28492891
28502892 return archive_path
28512893
0 commit comments