Skip to content

Commit 1d74c5b

Browse files
committed
Fixes
Signed-off-by: Hemil Desai <[email protected]>
1 parent 3906fa2 commit 1d74c5b

File tree

1 file changed

+13
-2
lines changed

1 file changed

+13
-2
lines changed

nemo_run/run/ray/slurm.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -334,8 +334,6 @@ def create_ray_cluster(
334334

335335
return job_id
336336

337-
return None
338-
339337
def schedule_ray_job(
340338
self,
341339
name: str,
@@ -413,6 +411,19 @@ def schedule_ray_job(
413411
command=command,
414412
workdir=remote_workdir,
415413
)
414+
415+
# Descriptive log for the user with useful paths / identifiers
416+
cluster_dir = os.path.join(executor.tunnel.job_dir, name)
417+
logger.info(
418+
f"""\n\n\033[1;34mRay job submitted to Slurm cluster at {executor.tunnel.key}:\033[0m
419+
\033[1mJob ID\033[0m : \033[32m{job_id}\033[0m
420+
\033[1mCluster dir\033[0m : {cluster_dir}
421+
\033[1mLogs directory\033[0m : {os.path.join(cluster_dir, "logs")}
422+
\033[1mSBATCH script\033[0m : {os.path.join(cluster_dir, "ray.sub")}
423+
\033[1mRemote workdir\033[0m : {remote_workdir}
424+
(use `squeue -j {job_id}` to check status, `scancel {job_id}` to cancel)\n"""
425+
)
426+
416427
return job_id
417428

418429
def wait_until_ray_cluster_running(

0 commit comments

Comments
 (0)