Skip to content

Commit dd0354d

Browse files
authored
Import guard k8s import in Ray Cluster and Job (#245)
1 parent 252edfb commit dd0354d

File tree

5 files changed

+47
-27
lines changed

5 files changed

+47
-27
lines changed

nemo_run/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@
4949
RUNDIR_SPECIAL_NAME = "/$nemo_run"
5050
SCRIPTS_DIR = "scripts"
5151

52+
# Metadata keys
53+
USE_WITH_RAY_CLUSTER_KEY = "use_with_ray_cluster"
54+
5255

5356
def get_nemorun_home() -> str:
5457
"""

nemo_run/run/ray/cluster.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,32 +14,41 @@
1414
# limitations under the License.
1515

1616
from dataclasses import dataclass
17-
from typing import Optional
17+
from typing import Optional, Type
1818

1919
from nemo_run.core.execution.base import Executor
20-
from nemo_run.core.execution.kuberay import KubeRayExecutor
2120
from nemo_run.core.execution.slurm import SlurmExecutor
22-
from nemo_run.run.ray.kuberay import KubeRayCluster
2321
from nemo_run.run.ray.slurm import SlurmRayCluster
2422

25-
USE_WITH_RAY_CLUSTER_KEY = "use_with_ray_cluster"
23+
# Import guard for Kubernetes dependencies
24+
try:
25+
from nemo_run.core.execution.kuberay import KubeRayExecutor
26+
from nemo_run.run.ray.kuberay import KubeRayCluster
27+
28+
_KUBERAY_AVAILABLE = True
29+
except ImportError:
30+
KubeRayExecutor = None
31+
KubeRayCluster = None
32+
_KUBERAY_AVAILABLE = False
2633

2734

2835
@dataclass(kw_only=True)
2936
class RayCluster:
30-
BACKEND_MAP = {
31-
KubeRayExecutor: KubeRayCluster,
32-
SlurmExecutor: SlurmRayCluster,
33-
}
34-
3537
name: str
3638
executor: Executor
3739

3840
def __post_init__(self):
39-
if self.executor.__class__ not in self.BACKEND_MAP:
41+
backend_map: dict[Type[Executor], Type] = {
42+
SlurmExecutor: SlurmRayCluster,
43+
}
44+
45+
if _KUBERAY_AVAILABLE and KubeRayExecutor is not None and KubeRayCluster is not None:
46+
backend_map[KubeRayExecutor] = KubeRayCluster
47+
48+
if self.executor.__class__ not in backend_map:
4049
raise ValueError(f"Unsupported executor: {self.executor.__class__}")
4150

42-
backend_cls = self.BACKEND_MAP[self.executor.__class__]
51+
backend_cls = backend_map[self.executor.__class__]
4352
self.backend = backend_cls(name=self.name, executor=self.executor) # type: ignore[arg-type]
4453

4554
self._port_forward_map = {}

nemo_run/run/ray/job.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,35 +14,45 @@
1414
# limitations under the License.
1515

1616
from dataclasses import dataclass
17-
from typing import Any, Optional
17+
from typing import Any, Optional, Type
1818

1919
from nemo_run.core.execution.base import Executor
20-
from nemo_run.core.execution.kuberay import KubeRayExecutor
2120
from nemo_run.core.execution.slurm import SlurmExecutor
22-
from nemo_run.run.ray.kuberay import KubeRayJob
2321
from nemo_run.run.ray.slurm import SlurmRayJob
2422

23+
# Import guard for Kubernetes dependencies
24+
try:
25+
from nemo_run.core.execution.kuberay import KubeRayExecutor
26+
from nemo_run.run.ray.kuberay import KubeRayJob
27+
28+
_KUBERAY_AVAILABLE = True
29+
except ImportError:
30+
KubeRayExecutor = None
31+
KubeRayJob = None
32+
_KUBERAY_AVAILABLE = False
33+
2534

2635
@dataclass(kw_only=True)
2736
class RayJob:
2837
"""Backend-agnostic convenience wrapper around Ray *jobs*."""
2938

30-
BACKEND_MAP = {
31-
KubeRayExecutor: KubeRayJob,
32-
SlurmExecutor: SlurmRayJob,
33-
}
34-
3539
name: str
3640
executor: Executor
3741
pre_ray_start_commands: Optional[list[str]] = None
3842

3943
def __post_init__(self) -> None: # noqa: D401 – simple implementation
40-
if self.executor.__class__ not in self.BACKEND_MAP:
44+
backend_map: dict[Type[Executor], Type[Any]] = {
45+
SlurmExecutor: SlurmRayJob,
46+
}
47+
48+
if _KUBERAY_AVAILABLE and KubeRayExecutor is not None and KubeRayJob is not None:
49+
backend_map[KubeRayExecutor] = KubeRayJob
50+
51+
if self.executor.__class__ not in backend_map:
4152
raise ValueError(f"Unsupported executor: {self.executor.__class__}")
4253

43-
self.backend = self.BACKEND_MAP[self.executor.__class__](
44-
name=self.name, executor=self.executor
45-
)
54+
backend_cls = backend_map[self.executor.__class__]
55+
self.backend = backend_cls(name=self.name, executor=self.executor)
4656

4757
# ------------------------------------------------------------------
4858
# Public API

nemo_run/run/torchx_backend/packaging.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import fiddle._src.experimental.dataclasses as fdl_dc
2222
from torchx import specs
2323

24-
from nemo_run.config import SCRIPTS_DIR, Partial, Script
24+
from nemo_run.config import SCRIPTS_DIR, USE_WITH_RAY_CLUSTER_KEY, Partial, Script
2525
from nemo_run.core.execution.base import Executor
2626
from nemo_run.core.execution.dgxcloud import DGXCloudExecutor
2727
from nemo_run.core.execution.launcher import FaultTolerance, Torchrun
@@ -30,7 +30,6 @@
3030
from nemo_run.core.execution.slurm import SlurmExecutor
3131
from nemo_run.core.serialization.yaml import YamlSerializer
3232
from nemo_run.core.serialization.zlib_json import ZlibJSONSerializer
33-
from nemo_run.run.ray.cluster import USE_WITH_RAY_CLUSTER_KEY
3433
from nemo_run.run.torchx_backend.components import ft_launcher, torchrun
3534

3635
log: logging.Logger = logging.getLogger(__name__)

nemo_run/run/torchx_backend/schedulers/slurm.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,11 @@
5050
)
5151
from torchx.specs.api import is_terminal
5252

53-
from nemo_run.config import RUNDIR_NAME, from_dict, get_nemorun_home
53+
from nemo_run.config import RUNDIR_NAME, USE_WITH_RAY_CLUSTER_KEY, from_dict, get_nemorun_home
5454
from nemo_run.core.execution.base import Executor
5555
from nemo_run.core.execution.slurm import SlurmBatchRequest, SlurmExecutor, SlurmJobDetails
5656
from nemo_run.core.tunnel.client import LocalTunnel, PackagingJob, SSHTunnel, Tunnel
5757
from nemo_run.run import experiment as run_experiment
58-
from nemo_run.run.ray.cluster import USE_WITH_RAY_CLUSTER_KEY
5958
from nemo_run.run.ray.slurm import SlurmRayRequest
6059
from nemo_run.run.torchx_backend.schedulers.api import SchedulerMixin
6160

0 commit comments

Comments
 (0)