Skip to content

Commit aff2052

Browse files
committed
Make NEMORUN_HOME configurable
Signed-off-by: Hemil Desai <hemild@nvidia.com>
1 parent e70f109 commit aff2052

File tree

12 files changed

+66
-39
lines changed

12 files changed

+66
-39
lines changed

src/nemo_run/cli/api.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,13 @@
5656
from nemo_run.cli import devspace as devspace_cli
5757
from nemo_run.cli import experiment as experiment_cli
5858
from nemo_run.cli.cli_parser import parse_cli_args, parse_factory
59-
from nemo_run.config import NEMORUN_HOME, Config, Partial, get_type_namespace, get_underlying_types
59+
from nemo_run.config import (
60+
Config,
61+
Partial,
62+
get_nemorun_home,
63+
get_type_namespace,
64+
get_underlying_types,
65+
)
6066
from nemo_run.core.execution import LocalExecutor, SkypilotExecutor, SlurmExecutor
6167
from nemo_run.core.execution.base import Executor
6268
from nemo_run.core.frontend.console.styles import BOX_STYLE, TABLE_STYLES
@@ -758,7 +764,7 @@ def _search_workspace_file() -> str | None:
758764
file_names = [
759765
"workspace_private.py",
760766
"workspace.py",
761-
os.path.join(NEMORUN_HOME, "workspace.py"),
767+
os.path.join(get_nemorun_home(), "workspace.py"),
762768
]
763769

764770
while True:

src/nemo_run/config.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,33 @@
4444
_BuildableT = TypeVar("_BuildableT", bound=fdl.Buildable)
4545

4646
RECURSIVE_TYPES = (typing.Union, typing.Optional)
47-
NEMORUN_HOME = os.environ.get("NEMORUN_HOME", os.path.expanduser("~/.nemo_run"))
47+
_NEMORUN_HOME = os.environ.get("NEMORUN_HOME", os.path.expanduser("~/.nemo_run"))
4848
RUNDIR_NAME = "nemo_run"
4949
RUNDIR_SPECIAL_NAME = "/$nemo_run"
5050
SCRIPTS_DIR = "scripts"
5151

5252

53+
def get_nemorun_home() -> str:
54+
"""
55+
Get the current NEMORUN_HOME directory path.
56+
57+
Returns:
58+
The path to the NEMORUN_HOME directory.
59+
"""
60+
return _NEMORUN_HOME
61+
62+
63+
def set_nemorun_home(path: str) -> None:
64+
"""
65+
Set the NEMORUN_HOME directory path.
66+
67+
Args:
68+
path: The new path for NEMORUN_HOME.
69+
"""
70+
global _NEMORUN_HOME
71+
_NEMORUN_HOME = os.path.expanduser(path)
72+
73+
5374
def get_type_namespace(typ: Type | Callable) -> str:
5475
"""
5576
Get the namespace of a type or callable.

src/nemo_run/core/execution/base.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from torchx.specs import Role
2525
from typing_extensions import Self
2626

27-
from nemo_run.config import NEMORUN_HOME, ConfigurableMixin
27+
from nemo_run.config import ConfigurableMixin, get_nemorun_home
2828
from nemo_run.core.execution.launcher import LAUNCHER_MAP, Launcher
2929
from nemo_run.core.packaging.base import Packager
3030

@@ -201,25 +201,25 @@ def import_executor(
201201
202202
Example:
203203
executor = import_executor("local", file_path="path/to/executors.py")
204-
executor = import_executor("gpu") # Uses the default location of os.path.join(NEMORUN_HOME, "executors.py")
204+
executor = import_executor("gpu") # Uses the default location of os.path.join(get_nemorun_home(), "executors.py")
205205
206206
Args:
207207
name (str): The name of the executor to retrieve.
208208
file_path (Optional[str]): The path to the Python file containing the executor definitions.
209-
Defaults to None, in which case the default location of os.path.join(NEMORUN_HOME, "executors.py") is used.
209+
Defaults to None, in which case the default location of os.path.join(get_nemorun_home(), "executors.py") is used.
210210
211211
The file_path is expected to be a string representing a file path with the following structure:
212212
- It should be a path to a Python file (with a .py extension).
213213
- The file should contain a dictionary named `EXECUTOR_MAP` that maps executor names to their corresponding instances.
214-
- The file can be located anywhere in the file system, but if not provided, it defaults to `NEMORUN_HOME/executors.py`.
214+
- The file can be located anywhere in the file system, but if not provided, it defaults to `get_nemorun_home()/executors.py`.
215215
call (bool): If True, the value from the module is called with the rest of the given kwargs.
216216
217217
Returns:
218218
Executor: The executor instance corresponding to the given name.
219219
"""
220220

221221
if not file_path:
222-
file_path = os.path.join(NEMORUN_HOME, "executors.py")
222+
file_path = os.path.join(get_nemorun_home(), "executors.py")
223223

224224
spec = importlib.util.spec_from_file_location("executors", file_path)
225225
assert spec

src/nemo_run/core/execution/dgxcloud.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ def assign(
234234
== os.path.abspath(x["path"]),
235235
self.pvcs,
236236
)
237-
), f"Need to specify atleast one PVC containing {self.job_dir}.\nTo update job dir to a PVC path, you can set the NEMORUN_HOME env var."
237+
), f"Need to specify atleast one PVC containing {self.job_dir}.\nTo update job dir to a PVC path, you can use set_nemorun_home() or the NEMORUN_HOME env var."
238238

239239
def package(self, packager: Packager, job_name: str):
240240
assert self.experiment_id, "Executor not assigned to an experiment."

src/nemo_run/core/execution/docker.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
parse_app_handle,
3232
)
3333

34-
from nemo_run.config import NEMORUN_HOME, RUNDIR_NAME
34+
from nemo_run.config import RUNDIR_NAME, get_nemorun_home
3535
from nemo_run.core.execution.base import Executor
3636
from nemo_run.core.packaging.base import Packager
3737
from nemo_run.core.packaging.git import GitArchivePackager
@@ -50,7 +50,7 @@
5050
fcntl = None
5151
FCNTL_AVAILABLE = False
5252

53-
DOCKER_JOB_DIRS = os.path.join(NEMORUN_HOME, ".docker_jobs.json")
53+
DOCKER_JOB_DIRS = os.path.join(get_nemorun_home(), ".docker_jobs.json")
5454
NETWORK = "nemo_run"
5555

5656
LABEL_EXPERIMENT_ID: str = "nemo-run/experiment-id"

src/nemo_run/core/tunnel/client.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,12 @@
3232
from invoke.context import Context
3333
from invoke.runners import Result as RunResult
3434

35-
from nemo_run.config import NEMORUN_HOME, ConfigurableMixin
35+
from nemo_run.config import ConfigurableMixin, get_nemorun_home
3636
from nemo_run.core.frontend.console.api import CONSOLE
3737

3838
logger: logging.Logger = logging.getLogger(__name__)
3939
TUNNEL_DIR = ".tunnels"
40-
TUNNEL_FILE_SUBPATH = os.path.join(NEMORUN_HOME, TUNNEL_DIR)
40+
TUNNEL_FILE_SUBPATH = os.path.join(get_nemorun_home(), TUNNEL_DIR)
4141

4242

4343
def delete_tunnel_dir(file_path):
@@ -136,7 +136,7 @@ def __post_init__(self):
136136

137137
def _set_job_dir(self, experiment_id: str):
138138
experiment_title, _, _ = experiment_id.rpartition("_")
139-
base_job_dir = self.job_dir or os.path.join(NEMORUN_HOME, "experiments")
139+
base_job_dir = self.job_dir or os.path.join(get_nemorun_home(), "experiments")
140140
job_dir = os.path.join(base_job_dir, experiment_title, experiment_id)
141141
self.job_dir = job_dir
142142

src/nemo_run/run/experiment.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,11 @@
4141

4242
import nemo_run as run
4343
from nemo_run.config import (
44-
NEMORUN_HOME,
4544
Config,
4645
ConfigurableMixin,
4746
Partial,
4847
Script,
48+
get_nemorun_home,
4949
get_type_namespace,
5050
)
5151
from nemo_run.core.execution.base import Executor
@@ -101,7 +101,7 @@ class Experiment(ConfigurableMixin):
101101
The design is heavily inspired from `XManager <https://github.com/google-deepmind/xmanager/blob/main/docs/xm_launch_api_principles.md>`_.
102102
103103
Under the hood, the Experiment metadata is stored in the local filesystem
104-
inside a user specified directory controlled by NEMORUN_HOME env var.
104+
inside a user specified directory controlled by get_nemorun_home() env var.
105105
We will explore making the metadata more persistent in the future.
106106
107107
.. note::
@@ -206,9 +206,9 @@ def catalog(
206206
title: str = "",
207207
) -> list[str]:
208208
"""
209-
List all experiments inside NEMORUN_HOME, optionally with the provided title.
209+
List all experiments inside get_nemorun_home(), optionally with the provided title.
210210
"""
211-
parent_dir = os.path.join(NEMORUN_HOME, "experiments", title)
211+
parent_dir = os.path.join(get_nemorun_home(), "experiments", title)
212212
return _get_sorted_dirs(parent_dir)
213213

214214
@classmethod
@@ -247,7 +247,7 @@ def from_id(
247247
Reconstruct an experiment with the specified id.
248248
"""
249249
title, _, _ = id.rpartition("_")
250-
parent_dir = os.path.join(NEMORUN_HOME, "experiments", title)
250+
parent_dir = os.path.join(get_nemorun_home(), "experiments", title)
251251
exp_dir = os.path.join(parent_dir, id)
252252

253253
assert os.path.isdir(exp_dir), f"Experiment {id} not found."
@@ -263,7 +263,7 @@ def from_title(
263263
"""
264264
Reconstruct an experiment with the specified title.
265265
"""
266-
parent_dir = os.path.join(NEMORUN_HOME, "experiments", title)
266+
parent_dir = os.path.join(get_nemorun_home(), "experiments", title)
267267
exp_dir = _get_latest_dir(parent_dir)
268268

269269
assert os.path.isdir(exp_dir), f"Experiment {id} not found."
@@ -303,7 +303,7 @@ def __init__(
303303
self._title = title
304304
self._id = id or f"{title}_{int(time.time())}"
305305

306-
base_dir = base_dir or NEMORUN_HOME
306+
base_dir = base_dir or get_nemorun_home()
307307
self._exp_dir = os.path.join(base_dir, "experiments", title, self._id)
308308

309309
self.log_level = log_level
@@ -967,7 +967,7 @@ def reset(self) -> "Experiment":
967967

968968
old_id, old_exp_dir, old_launched = self._id, self._exp_dir, self._launched
969969
self._id = f"{self._title}_{int(time.time())}"
970-
self._exp_dir = os.path.join(NEMORUN_HOME, "experiments", self._title, self._id)
970+
self._exp_dir = os.path.join(get_nemorun_home(), "experiments", self._title, self._id)
971971
self._launched = False
972972
self._live_progress = None
973973

@@ -1017,7 +1017,7 @@ def reset(self) -> "Experiment":
10171017
f"[bold magenta]Failed resetting Experiment {self._id} due to error: {e}"
10181018
)
10191019
# Double check exp dir is unchanged
1020-
new_path = os.path.join(NEMORUN_HOME, "experiments", self._title, self._id)
1020+
new_path = os.path.join(get_nemorun_home(), "experiments", self._title, self._id)
10211021
if self._exp_dir == new_path and new_path != old_exp_dir:
10221022
shutil.rmtree(self._exp_dir)
10231023

src/nemo_run/run/torchx_backend/schedulers/dgxcloud.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from pathlib import Path
88
from typing import Any, Optional
99

10+
import fiddle as fdl
11+
import fiddle._src.experimental.dataclasses as fdl_dc
1012
from torchx.schedulers.api import (
1113
AppDryRunInfo,
1214
DescribeAppResponse,
@@ -22,16 +24,14 @@
2224
runopts,
2325
)
2426

25-
import fiddle as fdl
26-
import fiddle._src.experimental.dataclasses as fdl_dc
27-
from nemo_run.config import NEMORUN_HOME
27+
from nemo_run.config import get_nemorun_home
2828
from nemo_run.core.execution.base import Executor
2929
from nemo_run.core.execution.dgxcloud import DGXCloudExecutor, DGXCloudState
3030
from nemo_run.core.serialization.zlib_json import ZlibJSONSerializer
3131
from nemo_run.run.torchx_backend.schedulers.api import SchedulerMixin
3232

3333
# Local placeholder for storing DGX job states
34-
DGX_JOB_DIRS = os.path.join(NEMORUN_HOME, ".dgx_jobs.json")
34+
DGX_JOB_DIRS = os.path.join(get_nemorun_home(), ".dgx_jobs.json")
3535

3636
# Example mapping from some DGX statuses to the TorchX AppState
3737
DGX_STATES: dict[DGXCloudState, AppState] = {

src/nemo_run/run/torchx_backend/schedulers/local.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
)
4747
from torchx.specs.api import AppDef, AppState, Role
4848

49-
from nemo_run.config import NEMORUN_HOME
49+
from nemo_run.config import get_nemorun_home
5050
from nemo_run.core.execution.base import Executor
5151
from nemo_run.core.execution.local import LocalExecutor
5252
from nemo_run.run.torchx_backend.schedulers.api import SchedulerMixin
@@ -59,7 +59,7 @@
5959
fcntl = None
6060
FCNTL_AVAILABLE = False
6161

62-
LOCAL_JOB_DIRS = os.path.join(NEMORUN_HOME, ".local_jobs.json")
62+
LOCAL_JOB_DIRS = os.path.join(get_nemorun_home(), ".local_jobs.json")
6363

6464

6565
class PersistentLocalScheduler(SchedulerMixin, LocalScheduler): # type: ignore

src/nemo_run/run/torchx_backend/schedulers/skypilot.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
runopts,
4343
)
4444

45-
from nemo_run.config import NEMORUN_HOME
45+
from nemo_run.config import get_nemorun_home
4646
from nemo_run.core.execution.base import Executor
4747
from nemo_run.core.execution.skypilot import _SKYPILOT_AVAILABLE, SkypilotExecutor
4848
from nemo_run.run.torchx_backend.schedulers.api import SchedulerMixin
@@ -74,7 +74,7 @@
7474
...
7575

7676
log: logging.Logger = logging.getLogger(__name__)
77-
SKYPILOT_JOB_DIRS = os.path.join(NEMORUN_HOME, ".skypilot_jobs.json")
77+
SKYPILOT_JOB_DIRS = os.path.join(get_nemorun_home(), ".skypilot_jobs.json")
7878

7979

8080
@dataclass

0 commit comments

Comments
 (0)