Skip to content

Commit 0be1693

Browse files
committed
torchrun_job
Signed-off-by: oliver könig <[email protected]>
1 parent 02efa9f commit 0be1693

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

nemo_run/run/torchx_backend/schedulers/dgxcloud.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def _submit_dryrun( # type: ignore
120120
)
121121

122122
# Write and copy sbatch script
123-
path = os.path.join(executor.experiment_dir, f"{executor.job_name}_job.sh")
123+
path = os.path.join(executor.experiment_dir, "torchrun_job.sh")
124124
script = req.materialize()
125125

126126
with open(path, "w") as f:
@@ -145,7 +145,7 @@ def schedule(self, dryrun_info: AppDryRunInfo[DGXRequest]) -> str:
145145

146146
# The DGXExecutor's launch call typically returns (job_id, handle).
147147
# We'll call it without additional parameters here.
148-
cmd = os.path.join(executor.experiment_dir, f"{executor.job_name}_job.sh")
148+
cmd = os.path.join(executor.experiment_dir, "torchrun_job.sh")
149149
req.launch_cmd = ["bash", cmd]
150150
job_id, status = executor.launch(name=req.name, cmd=req.launch_cmd)
151151
if not job_id:

0 commit comments

Comments
 (0)