Skip to content

Commit 6c2eee0

Browse files
authored
Add multiple include paths option to packagers (#117)
* Multiple include paths Signed-off-by: Igor Gitman <[email protected]> * Same for pattern packager Signed-off-by: Igor Gitman <[email protected]> * Apply formatting Signed-off-by: Igor Gitman <[email protected]> * Add tests Signed-off-by: Igor Gitman <[email protected]> --------- Signed-off-by: Igor Gitman <[email protected]>
1 parent fbeb070 commit 6c2eee0

File tree

4 files changed

+118
-14
lines changed

4 files changed

+118
-14
lines changed

src/nemo_run/core/packaging/git.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,11 @@ class GitArchivePackager(Packager):
6262

6363
#: Include extra files in the archive which matches include_pattern
6464
#: This str will be included in the command as: find {include_pattern} -type f to get the list of extra files to include in the archive
65-
include_pattern: str = ""
65+
include_pattern: str | list[str] = ""
6666

6767
#: Relative path to use as tar -C option - need to be consistent with include_pattern
6868
#: If not provided, will use git base path.
69-
include_pattern_relative_path: str = ""
69+
include_pattern_relative_path: str | list[str] = ""
7070

7171
check_uncommitted_changes: bool = False
7272
check_untracked_files: bool = False
@@ -125,15 +125,32 @@ def package(self, path: Path, job_dir: str, name: str) -> str:
125125
ctx.run(git_archive_cmd)
126126
if self.include_submodules:
127127
ctx.run(git_submodule_cmd)
128+
if isinstance(self.include_pattern, str):
129+
self.include_pattern = [self.include_pattern]
128130

129-
if self.include_pattern:
130-
include_pattern_relative_path = self.include_pattern_relative_path or shlex.quote(
131+
if isinstance(self.include_pattern_relative_path, str):
132+
self.include_pattern_relative_path = [self.include_pattern_relative_path]
133+
134+
if len(self.include_pattern) != len(self.include_pattern_relative_path):
135+
raise ValueError(
136+
"include_pattern and include_pattern_relative_path should have the same length"
137+
)
138+
139+
for include_pattern, include_pattern_relative_path in zip(
140+
self.include_pattern, self.include_pattern_relative_path
141+
):
142+
if include_pattern == "":
143+
continue
144+
include_pattern_relative_path = include_pattern_relative_path or shlex.quote(
131145
str(git_base_path)
132146
)
133147
relative_include_pattern = os.path.relpath(
134-
self.include_pattern, include_pattern_relative_path
148+
include_pattern, include_pattern_relative_path
149+
)
150+
include_pattern_cmd = (
151+
f"find {relative_include_pattern} -type f | "
152+
f"tar -cf {os.path.join(git_base_path, 'additional.tmp')} -T -"
135153
)
136-
include_pattern_cmd = f"find {relative_include_pattern} -type f | tar -cf {os.path.join(git_base_path, 'additional.tmp')} -T -"
137154
tar_concatenate_cmd = f"cat additional.tmp >> {output_file}.tmp && rm additional.tmp"
138155

139156
with ctx.cd(include_pattern_relative_path):

src/nemo_run/core/packaging/pattern.py

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,23 +32,47 @@ class PatternPackager(Packager):
3232
#: This str will be included in the command as:
3333
#: find {include_pattern} -type f to get the list of extra files to include in the archive
3434
#: best to use an absolute path here and a proper relative path argument to pass to tar
35-
include_pattern: str
35+
include_pattern: str | list[str]
3636

3737
#: Relative path to use as tar -C option.
38-
relative_path: str
38+
relative_path: str | list[str]
3939

4040
def package(self, path: Path, job_dir: str, name: str) -> str:
4141
output_file = os.path.join(job_dir, f"{name}.tar.gz")
4242
if os.path.exists(output_file):
4343
return output_file
4444

45-
relative_include_pattern = os.path.relpath(self.include_pattern, self.relative_path)
46-
cmd = (
47-
f"tar -czf {output_file} -C {self.relative_path} -T "
48-
f"<(cd {self.relative_path} && find {relative_include_pattern} -type f)"
49-
)
45+
if isinstance(self.include_pattern, str):
46+
self.include_pattern = [self.include_pattern]
47+
48+
if isinstance(self.relative_path, str):
49+
self.relative_path = [self.relative_path]
50+
51+
if len(self.include_pattern) != len(self.relative_path):
52+
raise ValueError("include_pattern and relative_path should have the same length")
53+
54+
# Create initial empty tar file
5055
ctx = Context()
51-
ctx.run(cmd)
56+
ctx.run(f"tar -cf {output_file}.tmp --files-from /dev/null")
57+
58+
for include_pattern, relative_path in zip(self.include_pattern, self.relative_path):
59+
if include_pattern == "":
60+
continue
61+
62+
relative_include_pattern = os.path.relpath(include_pattern, relative_path)
63+
64+
with ctx.cd(relative_path):
65+
# Append files directly to the main tar archive
66+
cmd = f"find {relative_include_pattern} -type f -print0 | xargs -0 tar -rf {output_file}.tmp"
67+
ctx.run(cmd)
68+
69+
# Gzip the final result
70+
gzip_cmd = f"gzip -c {output_file}.tmp > {output_file}"
71+
rm_cmd = f"rm {output_file}.tmp"
72+
73+
ctx.run(gzip_cmd)
74+
ctx.run(rm_cmd)
75+
5276
return output_file
5377

5478

test/core/packaging/test_git.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,39 @@ def test_package_with_include_pattern_rel_path(packager, temp_repo, tmpdir):
287287
assert not cmp.diff_files
288288

289289

290+
@patch("nemo_run.core.packaging.git.Context", MockContext)
291+
def test_package_with_multi_include_pattern_rel_path(packager, temp_repo, tmpdir):
292+
temp_repo = Path(temp_repo)
293+
# Create extra files in a separate directory
294+
(tmpdir / "extra").mkdir()
295+
with open(tmpdir / "extra" / "extra_file1.txt", "w") as f:
296+
f.write("Extra file 1")
297+
with open(tmpdir / "extra" / "extra_file2.txt", "w") as f:
298+
f.write("Extra file 2")
299+
300+
include_pattern = [str(tmpdir / "extra/extra_file1.txt"), str(tmpdir / "extra/extra_file2.txt")]
301+
relative_path = [str(tmpdir), str(tmpdir)]
302+
303+
packager = GitArchivePackager(
304+
include_pattern=include_pattern, include_pattern_relative_path=relative_path
305+
)
306+
with tempfile.TemporaryDirectory() as job_dir:
307+
output_file = packager.package(Path(temp_repo), job_dir, "test_package")
308+
assert os.path.exists(output_file)
309+
subprocess.check_call(shlex.split(f"mkdir -p {os.path.join(job_dir, 'extracted_output')}"))
310+
subprocess.check_call(
311+
shlex.split(
312+
f"tar -xvzf {output_file} -C {os.path.join(job_dir, 'extracted_output')} --ignore-zeros"
313+
),
314+
)
315+
cmp = filecmp.dircmp(
316+
os.path.join(tmpdir, "extra"),
317+
os.path.join(job_dir, "extracted_output", "extra"),
318+
)
319+
assert cmp.left_list == cmp.right_list
320+
assert not cmp.diff_files
321+
322+
290323
@patch("nemo_run.core.packaging.git.Context", MockContext)
291324
def test_package_with_check_uncommitted_changes(packager, temp_repo):
292325
temp_repo = Path(temp_repo)

test/core/packaging/test_pattern.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,33 @@ def test_package_with_include_pattern_rel_path(tmpdir):
5050
)
5151
assert cmp.left_list == cmp.right_list
5252
assert not cmp.diff_files
53+
54+
55+
@patch("nemo_run.core.packaging.pattern.Context", MockContext)
56+
def test_package_with_multi_include_pattern_rel_path(tmpdir):
57+
# Create extra files in a separate directory
58+
(tmpdir / "extra").mkdir()
59+
with open(tmpdir / "extra" / "extra_file1.txt", "w") as f:
60+
f.write("Extra file 1")
61+
with open(tmpdir / "extra" / "extra_file2.txt", "w") as f:
62+
f.write("Extra file 2")
63+
64+
include_pattern = [str(tmpdir / "extra/extra_file1.txt"), str(tmpdir / "extra/extra_file2.txt")]
65+
relative_path = [str(tmpdir), str(tmpdir)]
66+
67+
packager = PatternPackager(include_pattern=include_pattern, relative_path=relative_path)
68+
with tempfile.TemporaryDirectory() as job_dir:
69+
output_file = packager.package(Path(tmpdir), job_dir, "test_package")
70+
assert os.path.exists(output_file)
71+
subprocess.check_call(shlex.split(f"mkdir -p {os.path.join(job_dir, 'extracted_output')}"))
72+
subprocess.check_call(
73+
shlex.split(
74+
f"tar -xvzf {output_file} -C {os.path.join(job_dir, 'extracted_output')} --ignore-zeros"
75+
),
76+
)
77+
cmp = filecmp.dircmp(
78+
os.path.join(tmpdir, "extra"),
79+
os.path.join(job_dir, "extracted_output", "extra"),
80+
)
81+
assert cmp.left_list == cmp.right_list
82+
assert not cmp.diff_files

0 commit comments

Comments
 (0)