2020import uuid
2121from dataclasses import dataclass
2222from pathlib import Path
23+ import re
2324
2425from invoke .context import Context
2526
@@ -72,6 +73,38 @@ class GitArchivePackager(Packager):
7273 check_uncommitted_changes : bool = False
7374 check_untracked_files : bool = False
7475
76+ def _concatenate_tar_files (
77+ self , ctx : Context , output_file : str , files_to_concatenate : list [str ]
78+ ):
79+ """Concatenate multiple uncompressed tar files into a single tar archive.
80+
81+ The list should include ALL fragments to merge (base + additions).
82+ Creates/overwrites `output_file`.
83+ """
84+ if not files_to_concatenate :
85+ raise ValueError ("files_to_concatenate must not be empty" )
86+
87+ # Quote paths for shell safety
88+ quoted_files = [shlex .quote (f ) for f in files_to_concatenate ]
89+ quoted_output_file = shlex .quote (output_file )
90+
91+ if os .uname ().sysname == "Linux" :
92+ # Start from the first archive then append the rest, to avoid self-append issues
93+ first_file , * rest_files = quoted_files
94+ ctx .run (f"cp { first_file } { quoted_output_file } " )
95+ if rest_files :
96+ ctx .run (f"tar Af { quoted_output_file } { ' ' .join (rest_files )} " )
97+ # Remove all input fragments
98+ ctx .run (f"rm { ' ' .join (quoted_files )} " )
99+ else :
100+ # Extract all fragments and repack once (faster than iterative extract/append)
101+ temp_dir = f"temp_extract_{ uuid .uuid4 ()} "
102+ ctx .run (f"mkdir -p { temp_dir } " )
103+ for file in quoted_files :
104+ ctx .run (f"tar xf { file } -C { temp_dir } " )
105+ ctx .run (f"tar cf { quoted_output_file } -C { temp_dir } ." )
106+ ctx .run (f"rm -r { temp_dir } { ' ' .join (quoted_files )} " )
107+
75108 def package (self , path : Path , job_dir : str , name : str ) -> str :
76109 output_file = os .path .join (job_dir , f"{ name } .tar.gz" )
77110 if os .path .exists (output_file ):
@@ -113,20 +146,11 @@ def package(self, path: Path, job_dir: str, name: str) -> str:
113146 )
114147
115148 ctx = Context ()
116- # we first add git files into an uncompressed archive
117- # then we add submodule files into that archive
118- # then we add an extra files from pattern to that archive
119- # finally we compress it (cannot compress right away, since adding files is not possible)
120- git_archive_cmd = (
121- f"git archive --format=tar --output={ output_file } .tmp { self .ref } :{ git_sub_path } "
122- )
123- if os .uname ().sysname == "Linux" :
124- tar_submodule_cmd = f"tar Af { output_file } .tmp $sha1.tmp && rm $sha1.tmp"
125- else :
126- tar_submodule_cmd = f"cat $sha1.tmp >> { output_file } .tmp && rm $sha1.tmp"
127-
128- git_submodule_cmd = f"""git submodule foreach --recursive \
129- 'git archive --format=tar --prefix=$sm_path/ --output=$sha1.tmp HEAD && { tar_submodule_cmd } '"""
149+ # Build the base uncompressed archive, then separately generate all additional fragments.
150+ # Finally, concatenate all fragments in one pass for performance and portability.
151+ base_tar_tmp = f"{ output_file } .tmp.base"
152+ git_archive_cmd = f"git archive --format=tar --output={ shlex .quote (base_tar_tmp )} { self .ref } :{ git_sub_path } "
153+ git_submodule_cmd = "git submodule foreach --recursive 'git archive --format=tar --prefix=$sm_path/ --output=$sha1.tmp HEAD'"
130154
131155 with ctx .cd (git_base_path ):
132156 ctx .run (git_archive_cmd )
@@ -143,6 +167,16 @@ def package(self, path: Path, job_dir: str, name: str) -> str:
143167 "include_pattern and include_pattern_relative_path should have the same length"
144168 )
145169
170+ # Collect submodule tar fragments (named as <40-hex-sha1>.tmp) if any
171+ submodule_tmp_files : list [str ] = []
172+ if self .include_submodules :
173+ for dirpath , _dirnames , filenames in os .walk (git_base_path ):
174+ for filename in filenames :
175+ if re .fullmatch (r"[0-9a-f]{40}\.tmp" , filename ):
176+ submodule_tmp_files .append (os .path .join (dirpath , filename ))
177+
178+ # Generate additional fragments from include patterns and collect their paths
179+ additional_tmp_files : list [str ] = []
146180 for include_pattern , include_pattern_relative_path in zip (
147181 self .include_pattern , self .include_pattern_relative_path
148182 ):
@@ -158,26 +192,16 @@ def package(self, path: Path, job_dir: str, name: str) -> str:
158192 include_pattern , include_pattern_relative_path
159193 )
160194 pattern_tar_file_name = os .path .join (git_base_path , pattern_tar_file_name )
161- include_pattern_cmd = (
162- f"find { relative_include_pattern } -type f | tar -cf { pattern_tar_file_name } -T -"
163- )
195+ include_pattern_cmd = f"find { relative_include_pattern } -type f | tar -cf { shlex .quote (pattern_tar_file_name )} -T -"
164196
165197 with ctx .cd (include_pattern_relative_path ):
166198 ctx .run (include_pattern_cmd )
199+ additional_tmp_files .append (pattern_tar_file_name )
167200
168- with ctx .cd (git_base_path ):
169- if os .uname ().sysname == "Linux" :
170- # On Linux, directly concatenate tar files
171- ctx .run (f"tar Af { output_file } .tmp { pattern_tar_file_name } " )
172- ctx .run (f"rm { pattern_tar_file_name } " )
173- else :
174- # Extract and repack approach for other platforms
175- temp_dir = f"temp_extract_{ pattern_file_id } "
176- ctx .run (f"mkdir -p { temp_dir } " )
177- ctx .run (f"tar xf { output_file } .tmp -C { temp_dir } " )
178- ctx .run (f"tar xf { pattern_tar_file_name } -C { temp_dir } " )
179- ctx .run (f"tar cf { output_file } .tmp -C { temp_dir } ." )
180- ctx .run (f"rm -rf { temp_dir } { pattern_tar_file_name } " )
201+ # Concatenate all fragments in one pass into {output_file}.tmp
202+ fragments_to_merge : list [str ] = [base_tar_tmp ] + submodule_tmp_files + additional_tmp_files
203+ with ctx .cd (git_base_path ):
204+ self ._concatenate_tar_files (ctx , f"{ output_file } .tmp" , fragments_to_merge )
181205
182206 gzip_cmd = f"gzip -c { output_file } .tmp > { output_file } "
183207 rm_cmd = f"rm { output_file } .tmp"
0 commit comments