Skip to content

Commit d1ae06f

Browse files
authored
Add extract_at_root option and MacOS support for HybridPackager (#149)
Signed-off-by: Hemil Desai <[email protected]>
1 parent f78e39e commit d1ae06f

File tree

3 files changed

+56
-6
lines changed

3 files changed

+56
-6
lines changed

src/nemo_run/core/packaging/git.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,7 @@ def package(self, path: Path, job_dir: str, name: str) -> str:
154154
f"find {relative_include_pattern} -type f | "
155155
f"tar -cf {os.path.join(git_base_path, pattern_tar_file_name)} -T -"
156156
)
157-
tar_concatenate_cmd = (
158-
f"cat {pattern_tar_file_name} >> {output_file}.tmp && rm {pattern_tar_file_name}"
159-
)
157+
tar_concatenate_cmd = f"cat {os.path.join(git_base_path, pattern_tar_file_name)} >> {output_file}.tmp && rm {pattern_tar_file_name}"
160158

161159
with ctx.cd(include_pattern_relative_path):
162160
ctx.run(include_pattern_cmd)

src/nemo_run/core/packaging/hybrid.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,14 @@ class HybridPackager(Packager):
1313
"""
1414
A packager that combines multiple other packagers into one final archive.
1515
Each subpackager is mapped to a target directory name, which will become
16-
the top-level folder under which that packager’s content is placed.
16+
the top-level folder under which that packager's content is placed.
17+
18+
If `extract_at_root` is True, the contents of each sub-packager are extracted
19+
directly at the root of the final archive (i.e. without being nested in a subfolder).
1720
"""
1821

1922
sub_packagers: Dict[str, Packager] = field(default_factory=dict)
23+
extract_at_root: bool = False
2024

2125
def package(self, path: Path, job_dir: str, name: str) -> str:
2226
final_tar_gz = os.path.join(job_dir, f"{name}.tar.gz")
@@ -28,7 +32,8 @@ def package(self, path: Path, job_dir: str, name: str) -> str:
2832
ctx = Context()
2933
ctx.run(f"tar -cf {tmp_tar} --files-from /dev/null")
3034

31-
# For each subpackager, run its .package() method and extract to a subfolder
35+
# For each subpackager, run its .package() method,
36+
# extract the content and add it to the final tar
3237
for folder_name, packager in self.sub_packagers.items():
3338
subarchive_path = packager.package(path, job_dir, f"{name}_{folder_name}")
3439

@@ -38,7 +43,22 @@ def package(self, path: Path, job_dir: str, name: str) -> str:
3843
os.makedirs(tmp_extract_dir, exist_ok=True)
3944

4045
ctx.run(f"tar -xf {subarchive_path} -C {tmp_extract_dir}")
41-
ctx.run(f"tar -rf {tmp_tar} -C {tmp_extract_dir} . --transform='s,^,{folder_name}/,'")
46+
47+
# If extract_at_root is True then add files directly to the archive root.
48+
# Otherwise, add them under a subfolder named after the key.
49+
if self.extract_at_root:
50+
ctx.run(f"tar -rf {tmp_tar} -C {tmp_extract_dir} .")
51+
else:
52+
sysname = os.uname().sysname
53+
if sysname == "Darwin":
54+
# BSD tar uses the -s option with a chosen delimiter (here we use a comma)
55+
# The first -s replaces an entry that is exactly "."
56+
# The second -s replaces entries starting with "./" (i.e. files inside)
57+
transform_option = f"-s ',^\\.$,{folder_name},' -s ',^\\./,{folder_name}/,'"
58+
else:
59+
transform_option = f"--transform='s,^,{folder_name}/,'"
60+
ctx.run(f"tar {transform_option} -rf {tmp_tar} -C {tmp_extract_dir} .")
61+
4262
ctx.run(f"rm -rf {tmp_extract_dir}")
4363
ctx.run(f"rm {subarchive_path}")
4464

test/core/packaging/test_hybrid.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,35 @@ def test_hybrid_packager(mock_subpackager_one, mock_subpackager_two, tmp_path):
7878
os.path.join(extract_dir, "2"),
7979
)
8080
assert not cmp.diff_files
81+
82+
83+
@patch("nemo_run.core.packaging.hybrid.Context", MockContext)
84+
def test_hybrid_packager_extract_at_root(mock_subpackager_one, mock_subpackager_two, tmp_path):
85+
hybrid = HybridPackager(
86+
sub_packagers={
87+
"1": mock_subpackager_one,
88+
"2": mock_subpackager_two,
89+
},
90+
extract_at_root=True,
91+
)
92+
with tempfile.TemporaryDirectory() as job_dir:
93+
output_tar = hybrid.package(Path(tmp_path), job_dir, "hybrid_test_extract")
94+
assert os.path.exists(output_tar)
95+
96+
# Extract the tar and verify that files are extracted at the root
97+
extract_dir = os.path.join(job_dir, "hybrid_extracted")
98+
os.makedirs(extract_dir, exist_ok=True)
99+
subprocess.run(["tar", "-xzf", output_tar, "-C", extract_dir], check=True)
100+
101+
file1 = os.path.join(extract_dir, "file1.txt")
102+
file2 = os.path.join(extract_dir, "file2.txt")
103+
assert os.path.exists(file1), f"Expected {file1} to exist, but it does not."
104+
assert os.path.exists(file2), f"Expected {file2} to exist, but it does not."
105+
106+
with open(file1, "r") as f:
107+
content1 = f.read()
108+
with open(file2, "r") as f:
109+
content2 = f.read()
110+
111+
assert content1 == "Content from packager one", f"Unexpected content in {file1}: {content1}"
112+
assert content2 == "Content from packager two", f"Unexpected content in {file2}: {content2}"

0 commit comments

Comments
 (0)