|
48 | 48 | import inspect |
49 | 49 | import itertools |
50 | 50 | import os |
| 51 | +import pathlib |
51 | 52 | import platform |
52 | 53 | import re |
53 | 54 | import shutil |
54 | 55 | import signal |
55 | 56 | import stat |
56 | 57 | import ssl |
57 | 58 | import sys |
| 59 | +import tarfile |
58 | 60 | import tempfile |
59 | 61 | import time |
60 | 62 | import zlib |
@@ -2644,7 +2646,7 @@ def get_source_tarball_from_git(filename, target_dir, git_config): |
2644 | 2646 | """ |
2645 | 2647 | Downloads a git repository, at a specific tag or commit, recursively or not, and make an archive with it |
2646 | 2648 |
|
2647 | | - :param filename: name of the archive to save the code to (must be .tar.gz) |
| 2649 | + :param filename: name of the archive to save the code to (must be extensionless) |
2648 | 2650 | :param target_dir: target directory where to save the archive to |
2649 | 2651 | :param git_config: dictionary containing url, repo_name, recursive, and one of tag or commit |
2650 | 2652 | """ |
@@ -2680,8 +2682,10 @@ def get_source_tarball_from_git(filename, target_dir, git_config): |
2680 | 2682 | if not url: |
2681 | 2683 | raise EasyBuildError("url not specified in git_config parameter") |
2682 | 2684 |
|
2683 | | - if not filename.endswith('.tar.gz'): |
2684 | | - raise EasyBuildError("git_config currently only supports filename ending in .tar.gz") |
| 2685 | + file_ext = find_extension(filename) |
| 2686 | + if file_ext: |
| 2687 | + print_warning(f"Ignoring extension of filename '{filename}' set in git_config parameter") |
| 2688 | + filename = filename[:-len(file_ext)] |
2685 | 2689 |
|
2686 | 2690 | # prepare target directory and clone repository |
2687 | 2691 | mkdir(target_dir, parents=True) |
@@ -2768,50 +2772,72 @@ def get_source_tarball_from_git(filename, target_dir, git_config): |
2768 | 2772 | run_shell_cmd(cmd, work_dir=work_dir, hidden=True, verbose_dry_run=True) |
2769 | 2773 |
|
2770 | 2774 | # Create archive |
2771 | | - archive_path = os.path.join(target_dir, filename) |
2772 | | - |
2773 | | - if keep_git_dir: |
2774 | | - # create archive of git repo including .git directory |
2775 | | - tar_cmd = f"tar cfvz {archive_path} {repo_name}" |
2776 | | - else: |
2777 | | - # create reproducible archive |
2778 | | - tar_cmd = reproducible_archive_cmd(repo_name, archive_path) |
2779 | | - |
2780 | | - run_shell_cmd(tar_cmd, work_dir=tmpdir, hidden=True, verbose_dry_run=True) |
| 2775 | + repo_path = os.path.join(tmpdir, repo_name) |
| 2776 | + archive_path = make_archive(repo_path, archive_name=filename, archive_dir=target_dir, reproducible=not keep_git_dir) |
2781 | 2777 |
|
2782 | 2778 | # cleanup (repo_name dir does not exist in dry run mode) |
2783 | 2779 | remove(tmpdir) |
2784 | 2780 |
|
2785 | 2781 | return archive_path |
2786 | 2782 |
|
2787 | 2783 |
|
2788 | | -def reproducible_archive_cmd(dir_name, archive_name): |
| 2784 | +def make_archive(dir_path, archive_name=None, archive_dir=None, reproducible=False): |
2789 | 2785 | """ |
2790 | | - Return string with command to make reproducible archive from a given directory |
| 2786 | + Create a compressed tar archive in XZ format. |
| 2787 | +
|
| 2788 | + :dir_path: string with path to directory to be archived |
| 2789 | + :archive_name: string with extensionless filename of archive |
| 2790 | + :archive_dir: string with path to directory to place the archive |
| 2791 | + :reproducuble: make a tarball that is reproducible accross systems |
2791 | 2792 | see https://reproducible-builds.org/docs/archives/ |
2792 | | - """ |
2793 | | - try: |
2794 | | - cmd_pipe = [ |
2795 | | - # stop on failure of any command in the pipe |
2796 | | - 'set', '-eo pipefail', ';', |
2797 | | - # print names of all files and folders excluding .git directory |
2798 | | - 'find', str(dir_name), '-name ".git"', '-prune', '-o', '-print0', |
2799 | | - # reset access and modification timestamps to epoch 0 |
2800 | | - '-exec', 'touch', '--date=1970-01-01T00:00:00.00Z', '{}', r'\;', |
2801 | | - # reset file permissions of cloned repo (equivalent to --mode in GNU tar) |
2802 | | - '-exec', 'chmod', '"go+u,go-w"', '{}', r'\;', '|', |
2803 | | - # sort file list (equivalent to --sort in GNU tar) |
2804 | | - 'LC_ALL=C', 'sort', '--zero-terminated', '|', |
2805 | | - # create tarball in GNU format with ownership and permissions reset |
2806 | | - 'tar', '--create', '--no-recursion', '--owner=0', '--group=0', '--numeric-owner', |
2807 | | - '--format=gnu', '--null', '--files-from', '-', '|', |
2808 | | - # compress tarball with gzip without original file name and timestamp |
2809 | | - 'gzip', '--no-name', '>', str(archive_name) |
2810 | | - ] |
2811 | | - except TypeError as err: |
2812 | | - raise EasyBuildError("reproducible_archive_cmd: wrong directory or archive name given") from err |
2813 | | - |
2814 | | - return " ".join(cmd_pipe) |
| 2793 | +
|
| 2794 | + Archive is compressed with LZMA into a .xz because that is compatible with |
| 2795 | + a reproducible archive. Other formats like .gz are not reproducible due to |
| 2796 | + arbitrary strings and timestamps getting added into their metadata. |
| 2797 | + """ |
| 2798 | + def reproducible_filter(tarinfo): |
| 2799 | + "Filter out system-dependent data from tarball" |
| 2800 | + # contents of '.git' subdir are inherently system dependent |
| 2801 | + if "/.git/" in tarinfo.name or tarinfo.name.endswith("/.git"): |
| 2802 | + return None |
| 2803 | + # set timestamp to epoch 0 |
| 2804 | + tarinfo.mtime = 0 |
| 2805 | + # reset file permissions by applying go+u,go-w |
| 2806 | + user_mode = tarinfo.mode & stat.S_IRWXU |
| 2807 | + tarinfo.mode = tarinfo.mode | user_mode >> 3 & ~stat.S_IWGRP | user_mode >> 6 & ~stat.S_IWOTH |
| 2808 | + # reset ownership numeric UID/GID 0 |
| 2809 | + tarinfo.uid = tarinfo.gid = 0 |
| 2810 | + tarinfo.uname = tarinfo.gname = "" |
| 2811 | + return tarinfo |
| 2812 | + |
| 2813 | + if archive_name is None: |
| 2814 | + archive_name = os.path.basename(dir_path) |
| 2815 | + |
| 2816 | + archive_ext = ".tar.xz" |
| 2817 | + archive_filename = archive_name + archive_ext |
| 2818 | + archive_path = archive_filename if archive_dir is None else os.path.join(archive_dir, archive_filename) |
| 2819 | + |
| 2820 | + # TODO: replace with TarFile.add(recursive=True) when support for Python 3.6 drops |
| 2821 | + # since Python v3.7 tarfile automatically orders the list of files added to the archive |
| 2822 | + dir_files = [dir_path] |
| 2823 | + # pathlib's glob includes hidden files |
| 2824 | + dir_files.extend([str(filepath) for filepath in pathlib.Path(dir_path).glob("**/*")]) |
| 2825 | + dir_files.sort() # independent of locale |
| 2826 | + |
| 2827 | + dir_path_prefix = os.path.dirname(dir_path) |
| 2828 | + archive_filter = reproducible_filter if reproducible else None |
| 2829 | + |
| 2830 | + _log.info("Archiving '%s' into '%s'...", dir_path, archive_path) |
| 2831 | + with tarfile.open(archive_path, "w:xz", format=tarfile.GNU_FORMAT, encoding="utf-8", preset=6) as archive: |
| 2832 | + for filepath in dir_files: |
| 2833 | + # archive with target directory in its top level, remove any prefix in path |
| 2834 | + file_name = os.path.relpath(filepath, start=dir_path_prefix) |
| 2835 | + archive.add(filepath, arcname=file_name, recursive=False, filter=archive_filter) |
| 2836 | + _log.debug("File/folder added to archive '%s': %s", archive_filename, filepath) |
| 2837 | + |
| 2838 | + _log.info("Archive '%s' created successfully", archive_filename) |
| 2839 | + |
| 2840 | + return archive_path |
2815 | 2841 |
|
2816 | 2842 |
|
2817 | 2843 | def move_file(path, target_path, force_in_dry_run=False): |
|
0 commit comments