Skip to content

Commit bd9dfd9

Browse files
authored
Use a robust approach for concatenating tars in GitArchivePackager (#151)
1 parent 03db88e commit bd9dfd9

File tree

1 file changed

+17
-5
lines changed
  • src/nemo_run/core/packaging

1 file changed

+17
-5
lines changed

src/nemo_run/core/packaging/git.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -137,11 +137,12 @@ def package(self, path: Path, job_dir: str, name: str) -> str:
137137
"include_pattern and include_pattern_relative_path should have the same length"
138138
)
139139

140-
pattern_file_id = uuid.uuid4()
141-
pattern_tar_file_name = f"additional_{pattern_file_id}.tmp"
142140
for include_pattern, include_pattern_relative_path in zip(
143141
self.include_pattern, self.include_pattern_relative_path
144142
):
143+
pattern_file_id = uuid.uuid4()
144+
pattern_tar_file_name = f"additional_{pattern_file_id}.tmp"
145+
145146
if include_pattern == "":
146147
continue
147148
include_pattern_relative_path = include_pattern_relative_path or shlex.quote(
@@ -150,17 +151,28 @@ def package(self, path: Path, job_dir: str, name: str) -> str:
150151
relative_include_pattern = os.path.relpath(
151152
include_pattern, include_pattern_relative_path
152153
)
154+
pattern_tar_file_name = os.path.join(git_base_path, pattern_tar_file_name)
153155
include_pattern_cmd = (
154156
f"find {relative_include_pattern} -type f | "
155-
f"tar -cf {os.path.join(git_base_path, pattern_tar_file_name)} -T -"
157+
f"tar -cf {pattern_tar_file_name} -T -"
156158
)
157-
tar_concatenate_cmd = f"cat {os.path.join(git_base_path, pattern_tar_file_name)} >> {output_file}.tmp && rm {pattern_tar_file_name}"
158159

159160
with ctx.cd(include_pattern_relative_path):
160161
ctx.run(include_pattern_cmd)
161162

162163
with ctx.cd(git_base_path):
163-
ctx.run(tar_concatenate_cmd)
164+
if os.uname().sysname == "Linux":
165+
# On Linux, directly concatenate tar files
166+
ctx.run(f"tar Af {output_file}.tmp {pattern_tar_file_name}")
167+
ctx.run(f"rm {pattern_tar_file_name}")
168+
else:
169+
# Extract and repack approach for other platforms
170+
temp_dir = f"temp_extract_{pattern_file_id}"
171+
ctx.run(f"mkdir -p {temp_dir}")
172+
ctx.run(f"tar xf {output_file}.tmp -C {temp_dir}")
173+
ctx.run(f"tar xf {pattern_tar_file_name} -C {temp_dir}")
174+
ctx.run(f"tar cf {output_file}.tmp -C {temp_dir} .")
175+
ctx.run(f"rm -rf {temp_dir} {pattern_tar_file_name}")
164176

165177
gzip_cmd = f"gzip -c {output_file}.tmp > {output_file}"
166178
rm_cmd = f"rm {output_file}.tmp"

0 commit comments

Comments
 (0)