Skip to content

Commit 1899f52

Browse files
committed
fix: issue proper warning, if the cancel command fails in a multicluster setup
1 parent d7bda79 commit 1899f52

File tree

1 file changed

+12
-2
lines changed

1 file changed

+12
-2
lines changed

snakemake_executor_plugin_slurm/__init__.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -812,14 +812,14 @@ def cancel_jobs(self, active_jobs: List[SubmittedJobInfo]):
812812
if active_jobs:
813813
# TODO chunk jobids in order to avoid too long command lines
814814
jobids = " ".join([job_info.external_jobid for job_info in active_jobs])
815-
815+
816816
try:
817817
# timeout set to 60, because a scheduler cycle usually is
818818
# about 30 sec, but can be longer in extreme cases.
819819
# Under 'normal' circumstances, 'scancel' is executed in
820820
# virtually no time.
821821
scancel_command = f"scancel {jobids}"
822-
822+
823823
# Add cluster specification if any clusters were found during submission
824824
if self._submitted_job_clusters:
825825
clusters_str = ",".join(sorted(self._submitted_job_clusters))
@@ -838,6 +838,16 @@ def cancel_jobs(self, active_jobs: List[SubmittedJobInfo]):
838838
msg = e.stderr.strip()
839839
if msg:
840840
msg = f": {msg}"
841+
# If we were using --clusters and it failed, provide additional context
842+
if self._submitted_job_clusters:
843+
msg += (
844+
"\nWARNING: Job cancellation failed while using "
845+
"--clusters flag. Your multicluster SLURM setup may not "
846+
"support this feature, or the SLURM database may not be "
847+
"properly configured for multicluster operations. "
848+
"Please verify your SLURM configuration with your "
849+
"HPC administrator."
850+
)
841851
raise WorkflowError(
842852
"Unable to cancel jobs with scancel "
843853
f"(exit code {e.returncode}){msg}"

0 commit comments

Comments
 (0)