Skip to content

Commit 5cb58d1

Browse files
tylern4Nicholas Tyler
andauthored
Adds clusters option for slurm (#3694)
# Description Adds the `clusters` option from slurm for sites with [multi-cluster slurm](https://slurm.schedmd.com/multi_cluster.html) setups. # Changed Behaviour Users on federated Slurm clusters will be able to schedule tasks between different clusters within the same slurm instance. # Fixes Fixes #3675 ## Type of change - New feature --------- Co-authored-by: Nicholas Tyler <[email protected]>
1 parent 92ab47f commit 5cb58d1

File tree

1 file changed

+25
-3
lines changed

1 file changed

+25
-3
lines changed

parsl/providers/slurm/slurm.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ class SlurmProvider(ClusterProvider, RepresentationMixin):
7070
Slurm queue to place job in. If unspecified or ``None``, no queue slurm directive will be specified.
7171
constraint : str
7272
Slurm job constraint, often used to choose cpu or gpu type. If unspecified or ``None``, no constraint slurm directive will be added.
73+
clusters : str
74+
Slurm cluster name, or comma seperated cluster list, used to choose between different clusters in a federated Slurm instance.
75+
If unspecified or ``None``, no slurm directive for clusters will be added.
7376
channel : Channel
7477
Channel for accessing this provider.
7578
nodes_per_block : int
@@ -116,6 +119,7 @@ def __init__(self,
116119
account: Optional[str] = None,
117120
qos: Optional[str] = None,
118121
constraint: Optional[str] = None,
122+
clusters: Optional[str] = None,
119123
channel: Channel = LocalChannel(),
120124
nodes_per_block: int = 1,
121125
cores_per_node: Optional[int] = None,
@@ -152,6 +156,7 @@ def __init__(self,
152156
self.account = account
153157
self.qos = qos
154158
self.constraint = constraint
159+
self.clusters = clusters
155160
self.scheduler_options = scheduler_options + '\n'
156161
if exclusive:
157162
self.scheduler_options += "#SBATCH --exclusive\n"
@@ -163,6 +168,8 @@ def __init__(self,
163168
self.scheduler_options += "#SBATCH --qos={}\n".format(qos)
164169
if constraint:
165170
self.scheduler_options += "#SBATCH --constraint={}\n".format(constraint)
171+
if clusters:
172+
self.scheduler_options += "#SBATCH --clusters={}\n".format(clusters)
166173

167174
self.regex_job_id = regex_job_id
168175
self.worker_init = worker_init + '\n'
@@ -174,14 +181,22 @@ def __init__(self,
174181
logger.debug(f"sacct returned retcode={retcode} stderr={stderr}")
175182
if retcode == 0:
176183
logger.debug("using sacct to get job status")
184+
_cmd = "sacct"
185+
# Add clusters option to sacct if provided
186+
if self.clusters:
187+
_cmd += f" --clusters={self.clusters}"
177188
# Using state%20 to get enough characters to not truncate output
178189
# of the state. Without output can look like "<job_id> CANCELLED+"
179-
self._cmd = "sacct -X --noheader --format=jobid,state%20 --job '{0}'"
190+
self._cmd = _cmd + " -X --noheader --format=jobid,state%20 --job '{0}'"
180191
self._translate_table = sacct_translate_table
181192
else:
182193
logger.debug(f"sacct failed with retcode={retcode}")
183194
logger.debug("falling back to using squeue to get job status")
184-
self._cmd = "squeue --noheader --format='%i %t' --job '{0}'"
195+
_cmd = "squeue"
196+
# Add clusters option to squeue if provided
197+
if self.clusters:
198+
_cmd += f" --clusters={self.clusters}"
199+
self._cmd = _cmd + " --noheader --format='%i %t' --job '{0}'"
185200
self._translate_table = squeue_translate_table
186201

187202
def _status(self):
@@ -344,7 +359,14 @@ def cancel(self, job_ids):
344359
'''
345360

346361
job_id_list = ' '.join(job_ids)
347-
retcode, stdout, stderr = self.execute_wait("scancel {0}".format(job_id_list))
362+
363+
# Make the command to cancel jobs
364+
_cmd = "scancel"
365+
if self.clusters:
366+
_cmd += f" --clusters={self.clusters}"
367+
_cmd += " {0}"
368+
369+
retcode, stdout, stderr = self.execute_wait(_cmd.format(job_id_list))
348370
rets = None
349371
if retcode == 0:
350372
for jid in job_ids:

0 commit comments

Comments
 (0)