@@ -70,6 +70,9 @@ class SlurmProvider(ClusterProvider, RepresentationMixin):
7070 Slurm queue to place job in. If unspecified or ``None``, no queue slurm directive will be specified.
7171 constraint : str
7272 Slurm job constraint, often used to choose cpu or gpu type. If unspecified or ``None``, no constraint slurm directive will be added.
73+ clusters : str
74+ Slurm cluster name, or comma seperated cluster list, used to choose between different clusters in a federated Slurm instance.
75+ If unspecified or ``None``, no slurm directive for clusters will be added.
7376 channel : Channel
7477 Channel for accessing this provider.
7578 nodes_per_block : int
@@ -116,6 +119,7 @@ def __init__(self,
116119 account : Optional [str ] = None ,
117120 qos : Optional [str ] = None ,
118121 constraint : Optional [str ] = None ,
122+ clusters : Optional [str ] = None ,
119123 channel : Channel = LocalChannel (),
120124 nodes_per_block : int = 1 ,
121125 cores_per_node : Optional [int ] = None ,
@@ -152,6 +156,7 @@ def __init__(self,
152156 self .account = account
153157 self .qos = qos
154158 self .constraint = constraint
159+ self .clusters = clusters
155160 self .scheduler_options = scheduler_options + '\n '
156161 if exclusive :
157162 self .scheduler_options += "#SBATCH --exclusive\n "
@@ -163,6 +168,8 @@ def __init__(self,
163168 self .scheduler_options += "#SBATCH --qos={}\n " .format (qos )
164169 if constraint :
165170 self .scheduler_options += "#SBATCH --constraint={}\n " .format (constraint )
171+ if clusters :
172+ self .scheduler_options += "#SBATCH --clusters={}\n " .format (clusters )
166173
167174 self .regex_job_id = regex_job_id
168175 self .worker_init = worker_init + '\n '
@@ -174,14 +181,22 @@ def __init__(self,
174181 logger .debug (f"sacct returned retcode={ retcode } stderr={ stderr } " )
175182 if retcode == 0 :
176183 logger .debug ("using sacct to get job status" )
184+ _cmd = "sacct"
185+ # Add clusters option to sacct if provided
186+ if self .clusters :
187+ _cmd += f" --clusters={ self .clusters } "
177188 # Using state%20 to get enough characters to not truncate output
178189 # of the state. Without output can look like "<job_id> CANCELLED+"
179- self ._cmd = "sacct -X --noheader --format=jobid,state%20 --job '{0}'"
190+ self ._cmd = _cmd + " -X --noheader --format=jobid,state%20 --job '{0}'"
180191 self ._translate_table = sacct_translate_table
181192 else :
182193 logger .debug (f"sacct failed with retcode={ retcode } " )
183194 logger .debug ("falling back to using squeue to get job status" )
184- self ._cmd = "squeue --noheader --format='%i %t' --job '{0}'"
195+ _cmd = "squeue"
196+ # Add clusters option to squeue if provided
197+ if self .clusters :
198+ _cmd += f" --clusters={ self .clusters } "
199+ self ._cmd = _cmd + " --noheader --format='%i %t' --job '{0}'"
185200 self ._translate_table = squeue_translate_table
186201
187202 def _status (self ):
@@ -344,7 +359,14 @@ def cancel(self, job_ids):
344359 '''
345360
346361 job_id_list = ' ' .join (job_ids )
347- retcode , stdout , stderr = self .execute_wait ("scancel {0}" .format (job_id_list ))
362+
363+ # Make the command to cancel jobs
364+ _cmd = "scancel"
365+ if self .clusters :
366+ _cmd += f" --clusters={ self .clusters } "
367+ _cmd += " {0}"
368+
369+ retcode , stdout , stderr = self .execute_wait (_cmd .format (job_id_list ))
348370 rets = None
349371 if retcode == 0 :
350372 for jid in job_ids :
0 commit comments