Skip to content

Commit 81e6cac

Browse files
authored
Upgrade skypilot to v0.10.0, introduce network_tier (#297)
* Upgrade skypilot to v0.10.0, introduce network_tier Signed-off-by: Roee Landesman <[email protected]> * add unit tests Signed-off-by: Roee Landesman <[email protected]> --------- Signed-off-by: Roee Landesman <[email protected]>
1 parent 3b2b91a commit 81e6cac

File tree

4 files changed

+3288
-2724
lines changed

4 files changed

+3288
-2724
lines changed

nemo_run/core/execution/skypilot.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@
3535
try:
3636
import sky
3737
import sky.task as skyt
38-
from sky.utils import status_lib
3938
from sky import backends
39+
from sky.utils import status_lib
4040

4141
_SKYPILOT_AVAILABLE = True
4242
except ImportError:
@@ -62,7 +62,8 @@ class SkypilotExecutor(Executor):
6262
gpus="A10G",
6363
gpus_per_node=devices,
6464
container_image="nvcr.io/nvidia/nemo:dev",
65-
cloud="kubernetes",
65+
infra="k8s/my-context",
66+
network_tier="best",
6667
cluster_name="nemo_tester",
6768
file_mounts={
6869
"nemo_run.whl": "nemo_run.whl"
@@ -105,6 +106,8 @@ class SkypilotExecutor(Executor):
105106
idle_minutes_to_autostop: Optional[int] = None
106107
torchrun_nproc_per_node: Optional[int] = None
107108
cluster_config_overrides: Optional[dict[str, Any]] = None
109+
infra: Optional[str] = None
110+
network_tier: Optional[str] = None
108111
packager: Packager = field(default_factory=lambda: GitArchivePackager()) # type: ignore # noqa: F821
109112

110113
def __post_init__(self):
@@ -114,6 +117,13 @@ def __post_init__(self):
114117
assert isinstance(self.packager, GitArchivePackager), (
115118
"Only GitArchivePackager is currently supported for SkypilotExecutor."
116119
)
120+
if self.infra is not None:
121+
assert self.cloud is None, "Cannot specify both `infra` and `cloud` parameters."
122+
assert self.region is None, "Cannot specify both `infra` and `region` parameters."
123+
assert self.zone is None, "Cannot specify both `infra` and `zone` parameters."
124+
logger.info(
125+
"`cloud` is deprecated and will be removed in a future version. Use `infra` instead."
126+
)
117127

118128
@classmethod
119129
def parse_app(cls: Type["SkypilotExecutor"], app_id: str) -> tuple[str, str, int]:
@@ -173,6 +183,8 @@ def parse_attr(attr: str):
173183
"memory",
174184
"instance_type",
175185
"use_spot",
186+
"infra",
187+
"network_tier",
176188
"image_id",
177189
"disk_size",
178190
"disk_tier",

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,10 @@ lepton = "nemo_run.run.torchx_backend.schedulers.lepton:create_scheduler"
5151

5252
[project.optional-dependencies]
5353
skypilot = [
54-
"skypilot[kubernetes]>=0.9.2",
54+
"skypilot[kubernetes]>=0.10.0",
5555
]
5656
skypilot-all = [
57-
"skypilot[all]>=0.9.2",
57+
"skypilot[all]>=0.10.0",
5858
]
5959
ray = [
6060
"kubernetes"

test/core/execution/test_skypilot.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ class MockClusterNotUpError(Exception):
6868
"sky.core": sky_core_mock,
6969
"sky.skylet.job_lib": job_lib_mock,
7070
"sky.utils.common_utils": common_utils_mock,
71+
"sky.resources": MagicMock(),
7172
}
7273

7374
# Also mock the sky_exceptions module with our mock exception
@@ -141,6 +142,33 @@ def test_init_non_git_packager(self, mock_skypilot_imports):
141142
packager=non_git_packager,
142143
)
143144

145+
def test_init_with_infra_and_cloud_fails(self, mock_skypilot_imports):
146+
with pytest.raises(
147+
AssertionError, match="Cannot specify both `infra` and `cloud` parameters."
148+
):
149+
SkypilotExecutor(
150+
infra="my-infra",
151+
cloud="aws",
152+
)
153+
154+
def test_init_with_infra_and_region_fails(self, mock_skypilot_imports):
155+
with pytest.raises(
156+
AssertionError, match="Cannot specify both `infra` and `region` parameters."
157+
):
158+
SkypilotExecutor(
159+
infra="my-infra",
160+
region="us-west-2",
161+
)
162+
163+
def test_init_with_infra_and_zone_fails(self, mock_skypilot_imports):
164+
with pytest.raises(
165+
AssertionError, match="Cannot specify both `infra` and `zone` parameters."
166+
):
167+
SkypilotExecutor(
168+
infra="my-infra",
169+
zone="us-west-2a",
170+
)
171+
144172
def test_parse_app(self, mock_skypilot_imports):
145173
app_id = "app___cluster-name___task-name___123"
146174
cluster, task, job_id = SkypilotExecutor.parse_app(app_id)
@@ -228,6 +256,18 @@ def test_to_resources_with_none_string(self, mock_resources, mock_skypilot_impor
228256
assert config["cloud"] is None
229257
assert config["any_of"][1]["region"] is None
230258

259+
@patch("sky.resources.Resources")
260+
def test_to_resources_with_infra_and_network_tier(self, mock_resources, mock_skypilot_imports):
261+
executor = SkypilotExecutor(infra="k8s/my-context", network_tier="best")
262+
263+
executor.to_resources()
264+
265+
mock_resources.from_yaml_config.assert_called_once()
266+
267+
config = mock_resources.from_yaml_config.call_args[0][0]
268+
assert config["infra"] == "k8s/my-context"
269+
assert config["network_tier"] == "best"
270+
231271
@patch("sky.core.status")
232272
@patch("sky.core.queue")
233273
@patch("nemo_run.core.execution.skypilot.SkypilotExecutor.parse_app")

0 commit comments

Comments
 (0)