Skip to content

Commit a46de33

Browse files
committed
Fix
Signed-off-by: Hemil Desai <[email protected]>
1 parent c1d29b5 commit a46de33

File tree

2 files changed

+27
-2
lines changed

2 files changed

+27
-2
lines changed

src/nemo_run/core/execution/slurm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -816,7 +816,7 @@ def materialize(self) -> str:
816816
)
817817
het_parameters.update(
818818
{
819-
"job_name": f"{job_name_prefix}{self.jobs[i]}",
819+
"job_name": f"{job_details.job_name[:-2] if job_details.job_name.endswith('-0') else job_details.job_name}-{i}",
820820
"nodes": resource_req.nodes,
821821
"ntasks_per_node": resource_req.ntasks_per_node,
822822
"gpus_per_node": resource_req.gpus_per_node,
@@ -995,7 +995,7 @@ def get_container_flags(
995995
return sbatch_script
996996

997997
def __repr__(self) -> str:
998-
return f"""{' '.join(self.cmd + ['$SBATCH_SCRIPT'])}
998+
return f"""{" ".join(self.cmd + ["$SBATCH_SCRIPT"])}
999999
10001000
#----------------
10011001
# SBATCH_SCRIPT

test/core/execution/test_slurm.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -691,3 +691,28 @@ def test_het_job_name_prefix(self, het_slurm_request_with_artifact):
691691
for job in het_request.jobs:
692692
expected = f"prefix_{job}"
693693
assert expected in sbatch_script, f"Expected job name '{expected}' not found in script"
694+
695+
def test_het_job_custom_details_job_name(self, het_slurm_request_with_artifact):
696+
# Test that the job name from CustomJobDetails is used for heterogeneous slurm requests
697+
from nemo_run.core.execution.slurm import SlurmJobDetails
698+
699+
het_request, _ = het_slurm_request_with_artifact
700+
701+
class CustomJobDetails(SlurmJobDetails):
702+
@property
703+
def stdout(self):
704+
assert self.folder
705+
return Path(self.folder) / "sbatch_job.out"
706+
707+
@property
708+
def srun_stdout(self):
709+
assert self.folder
710+
return Path(self.folder) / "log_job.out"
711+
712+
custom_name = "custom_het_job"
713+
het_request.slurm_config.job_details = CustomJobDetails(
714+
job_name=custom_name, folder="/custom_folder"
715+
)
716+
sbatch_script = het_request.materialize()
717+
for i in range(len(het_request.jobs)):
718+
assert f"#SBATCH --job-name={custom_name}-{i}" in sbatch_script

0 commit comments

Comments
 (0)