Skip to content

Commit d6745b0

Browse files
Sbozzolopearce8scheibelp
authored
Add support for PBS/Torque scheduler (#1159)
* Add support for PBS/Torque scheduler * Improve PBS support * Address comments --------- Co-authored-by: pearce8 <pearce8@llnl.gov> Co-authored-by: Peter Scheibel <scheibel1@llnl.gov>
1 parent 8e6415c commit d6745b0

File tree

1 file changed

+44
-0
lines changed

1 file changed

+44
-0
lines changed

modifiers/allocation/modifier.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ def determine_allocation(self, v):
292292
"Experiment requests GPUs, but sys_gpus_per_node "
293293
"is not specified for the system"
294294
)
295+
295296
v.n_nodes = max(cores_node_request or 0, gpus_node_request or 0)
296297

297298
if not v.n_threads_per_proc:
@@ -466,13 +467,56 @@ def pjm_instructions(self, v):
466467
v.batch_submit = "pjsub {execute_experiment}"
467468
v.allocation_directives = "\n".join(batch_directives)
468469

470+
def pbs_instructions(self, v):
471+
batch_opts, cmd_opts = Allocation._init_batch_and_cmd_opts(v)
472+
473+
if not v.n_ranks_per_node:
474+
v.n_ranks_per_node = math.ceil(v.n_ranks / v.n_nodes)
475+
476+
node_spec = [f"select={v.n_nodes}"]
477+
node_spec.append(f"mpiprocs={v.n_ranks_per_node}")
478+
479+
if v.n_ranks:
480+
cmd_opts.append(f"-np {v.n_ranks}")
481+
482+
if v.n_threads_per_proc and v.n_threads_per_proc != 1:
483+
node_spec.append(f"ompthreads={v.n_threads_per_proc}")
484+
485+
n_cpus_per_node = v.n_ranks_per_node * v.n_threads_per_proc
486+
node_spec.append(f"ncpus={n_cpus_per_node}")
487+
488+
if v.n_gpus:
489+
gpus_per_rank = self.gpus_as_gpus_per_rank(v.n_gpus)
490+
node_spec.append(f"gpus={gpus_per_rank}")
491+
492+
if node_spec:
493+
batch_opts.append(f"-l {':'.join(node_spec)}")
494+
else:
495+
raise ValueError("Not enough information to select resources")
496+
497+
if v.queue:
498+
batch_opts.append(f"-q {v.queue}")
499+
500+
if v.timeout:
501+
batch_opts.append(f"-l walltime={TimeFormat.as_hhmmss(v.timeout)}")
502+
503+
if v.bank:
504+
batch_opts.append(f"-A {v.bank}")
505+
506+
batch_directives = list(f"#PBS {x}" for x in batch_opts)
507+
508+
v.mpi_command = f"mpiexec {' '.join(cmd_opts)}"
509+
v.batch_submit = "qsub {execute_experiment}"
510+
v.allocation_directives = "\n".join(batch_directives)
511+
469512
def determine_scheduler_instructions(self, v):
470513
handler = {
471514
"slurm": self.slurm_instructions,
472515
"flux": self.flux_instructions,
473516
"mpi": self.mpi_instructions,
474517
"lsf": self.lsf_instructions,
475518
"pjm": self.pjm_instructions,
519+
"pbs": self.pbs_instructions,
476520
}
477521
if v.scheduler not in handler:
478522
raise ValueError(

0 commit comments

Comments
 (0)