Skip to content

Commit f78e39e

Browse files
authored
Add CPU-only support for DGXCloudExecutor (#146)
1 parent f07f446 commit f78e39e

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

src/nemo_run/core/execution/dgxcloud.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,7 @@
1010
import requests
1111
from invoke.context import Context
1212

13-
from nemo_run.core.execution.base import (
14-
Executor,
15-
ExecutorMacros,
16-
)
13+
from nemo_run.core.execution.base import Executor, ExecutorMacros
1714
from nemo_run.core.packaging.base import Packager
1815
from nemo_run.core.packaging.git import GitArchivePackager
1916

@@ -54,7 +51,8 @@ class DGXCloudExecutor(Executor):
5451
project_name: str
5552
container_image: str
5653
nodes: int = 1
57-
gpus_per_node: int = 8
54+
gpus_per_node: int = 0
55+
nprocs_per_node: int = 1
5856
pvcs: list[dict[str, Any]] = field(default_factory=list)
5957
distributed_framework: str = "PyTorch"
6058
custom_spec: dict[str, Any] = field(default_factory=dict)
@@ -160,7 +158,13 @@ def nnodes(self) -> int:
160158
return self.nodes
161159

162160
def nproc_per_node(self) -> int:
163-
return self.gpus_per_node
161+
# Default to the number of GPUs specified per node
162+
# If user doesn't want GPUs, can run multiple processes with CPU only
163+
if self.gpus_per_node:
164+
return self.gpus_per_node
165+
elif self.nprocs_per_node:
166+
return self.nprocs_per_node
167+
return 1
164168

165169
def status(self, job_id: str) -> Optional[DGXCloudState]:
166170
url = f"{self.base_url}/workloads/distributed/{job_id}"

0 commit comments

Comments
 (0)