Skip to content

Commit 11e0d2f

Browse files
authored
Add hybrid packager to allow combining multiple packagers (#137)
1 parent 9d800c1 commit 11e0d2f

File tree

4 files changed

+133
-4
lines changed

4 files changed

+133
-4
lines changed

src/nemo_run/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,7 @@
2727
from nemo_run.core.execution.local import LocalExecutor
2828
from nemo_run.core.execution.skypilot import SkypilotExecutor
2929
from nemo_run.core.execution.slurm import SlurmExecutor
30-
from nemo_run.core.packaging.base import Packager
31-
from nemo_run.core.packaging.git import GitArchivePackager
32-
from nemo_run.core.packaging.pattern import PatternPackager
30+
from nemo_run.core.packaging import GitArchivePackager, HybridPackager, Packager, PatternPackager
3331
from nemo_run.core.tunnel.client import LocalTunnel, SSHTunnel
3432
from nemo_run.devspace.base import DevSpace
3533
from nemo_run.help import help
@@ -54,6 +52,7 @@
5452
"ExecutorMacros",
5553
"Experiment",
5654
"FaultTolerance",
55+
"HybridPackager",
5756
"GitArchivePackager",
5857
"PatternPackager",
5958
"help",

src/nemo_run/core/packaging/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from nemo_run.core.packaging.base import Packager
1717
from nemo_run.core.packaging.git import GitArchivePackager
18+
from nemo_run.core.packaging.hybrid import HybridPackager
1819
from nemo_run.core.packaging.pattern import PatternPackager
1920

20-
__all__ = ["Packager", "GitArchivePackager", "PatternPackager"]
21+
__all__ = ["Packager", "GitArchivePackager", "PatternPackager", "HybridPackager"]
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import os
2+
from dataclasses import dataclass, field
3+
from pathlib import Path
4+
from typing import Dict
5+
6+
from invoke.context import Context
7+
8+
from nemo_run.core.packaging.base import Packager
9+
10+
11+
@dataclass(kw_only=True)
12+
class HybridPackager(Packager):
13+
"""
14+
A packager that combines multiple other packagers into one final archive.
15+
Each subpackager is mapped to a target directory name, which will become
16+
the top-level folder under which that packager’s content is placed.
17+
"""
18+
19+
sub_packagers: Dict[str, Packager] = field(default_factory=dict)
20+
21+
def package(self, path: Path, job_dir: str, name: str) -> str:
22+
final_tar_gz = os.path.join(job_dir, f"{name}.tar.gz")
23+
if os.path.exists(final_tar_gz):
24+
return final_tar_gz
25+
26+
# Create an empty tar to append packaged files from each sub-packager
27+
tmp_tar = final_tar_gz + ".tmp"
28+
ctx = Context()
29+
ctx.run(f"tar -cf {tmp_tar} --files-from /dev/null")
30+
31+
# For each subpackager, run its .package() method and extract to a subfolder
32+
for folder_name, packager in self.sub_packagers.items():
33+
subarchive_path = packager.package(path, job_dir, f"{name}_{folder_name}")
34+
35+
# Create a temp folder, extract subarchive content into it,
36+
# then add that folder to the final tar under the desired subpath
37+
tmp_extract_dir = os.path.join(job_dir, f"__extract_{folder_name}")
38+
os.makedirs(tmp_extract_dir, exist_ok=True)
39+
40+
ctx.run(f"tar -xf {subarchive_path} -C {tmp_extract_dir}")
41+
ctx.run(f"tar -rf {tmp_tar} -C {tmp_extract_dir} . --transform='s,^,{folder_name}/,'")
42+
ctx.run(f"rm -rf {tmp_extract_dir}")
43+
ctx.run(f"rm {subarchive_path}")
44+
45+
# Finally, compress the combined tar
46+
ctx.run(f"gzip -c {tmp_tar} > {final_tar_gz}")
47+
ctx.run(f"rm {tmp_tar}")
48+
49+
return final_tar_gz

test/core/packaging/test_hybrid.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import filecmp
2+
import os
3+
import subprocess
4+
import tempfile
5+
from pathlib import Path
6+
from unittest.mock import MagicMock, patch
7+
8+
import pytest
9+
10+
from nemo_run.core.packaging.base import Packager
11+
from nemo_run.core.packaging.hybrid import HybridPackager
12+
from test.conftest import MockContext
13+
14+
15+
@pytest.fixture
16+
def mock_subpackager_one(tmp_path) -> Packager:
17+
"""
18+
Creates a mocked Packager that packages a single file named file1.txt.
19+
"""
20+
mock_packager = MagicMock(spec=Packager)
21+
# Prepare a small file to tar
22+
file_path = tmp_path / "file1.txt"
23+
file_path.write_text("Content from packager one")
24+
25+
tar_path = str(tmp_path / "packager_one.tar.gz")
26+
subprocess.run(["tar", "-czf", tar_path, "-C", str(tmp_path), "file1.txt"], check=True)
27+
28+
# Make the package() call return the path to this tar
29+
mock_packager.package.return_value = tar_path
30+
return mock_packager
31+
32+
33+
@pytest.fixture
34+
def mock_subpackager_two(tmp_path) -> Packager:
35+
"""
36+
Creates a mocked Packager that packages a single file named file2.txt.
37+
"""
38+
mock_packager = MagicMock(spec=Packager)
39+
# Prepare a small file to tar
40+
file_path = tmp_path / "file2.txt"
41+
file_path.write_text("Content from packager two")
42+
43+
tar_path = str(tmp_path / "packager_two.tar.gz")
44+
subprocess.run(["tar", "-czf", tar_path, "-C", str(tmp_path), "file2.txt"], check=True)
45+
46+
mock_packager.package.return_value = tar_path
47+
return mock_packager
48+
49+
50+
@patch("nemo_run.core.packaging.hybrid.Context", MockContext)
51+
def test_hybrid_packager(mock_subpackager_one, mock_subpackager_two, tmp_path):
52+
hybrid = HybridPackager(
53+
sub_packagers={
54+
"1": mock_subpackager_one,
55+
"2": mock_subpackager_two,
56+
}
57+
)
58+
with tempfile.TemporaryDirectory() as job_dir:
59+
output_tar = hybrid.package(Path(tmp_path), job_dir, "hybrid_test")
60+
61+
assert os.path.exists(output_tar)
62+
63+
# Extract the resulting tar to verify contents
64+
extract_dir = os.path.join(job_dir, "hybrid_extracted")
65+
os.makedirs(extract_dir, exist_ok=True)
66+
subprocess.run(["tar", "-xzf", output_tar, "-C", extract_dir], check=True)
67+
68+
# Compare subfolder "1" for file1.txt
69+
cmp = filecmp.dircmp(
70+
os.path.dirname(mock_subpackager_one.package.return_value),
71+
os.path.join(extract_dir, "1"),
72+
)
73+
assert not cmp.diff_files
74+
75+
# Compare subfolder "2" for file2.txt
76+
cmp = filecmp.dircmp(
77+
os.path.dirname(mock_subpackager_two.package.return_value),
78+
os.path.join(extract_dir, "2"),
79+
)
80+
assert not cmp.diff_files

0 commit comments

Comments
 (0)