Skip to content
Merged
Show file tree
Hide file tree
Changes from 73 commits
Commits
Show all changes
85 commits
Select commit Hold shift + click to select a range
3abb561
feat: job arrays
johanneskoester Nov 29, 2024
5e8acca
Update snakemake_executor_plugin_slurm/__init__.py
johanneskoester Dec 20, 2024
f9cb5ad
Merge branch 'main' into feat/jobarrays
cmeesters Jan 25, 2025
6e3742c
Merge branch 'main' into feat/jobarrays
cmeesters Mar 7, 2025
72b12ad
Merge branch 'main' into feat/jobarrays
cmeesters Jan 22, 2026
cce6fac
Merge branch 'main' into feat/jobarrays
cmeesters Feb 13, 2026
dbc04de
fix: merge conflict
cmeesters Feb 25, 2026
8c580ec
feat: basic job wait implementation for arrays
cmeesters Feb 25, 2026
720147d
fix: formatting
cmeesters Feb 25, 2026
af558f7
feat: updated testing code
cmeesters Feb 25, 2026
1504763
fix: typo
cmeesters Feb 25, 2026
be01b04
fix: typo
cmeesters Feb 25, 2026
80e587c
fix: consider 'all' case - then all jobs are considered array jobs, i…
cmeesters Feb 25, 2026
cc6a84d
fix: async execution added.
cmeesters Feb 25, 2026
5d8cf01
refactor: submit_string woring for both arrays and ordinary jobs
cmeesters Feb 26, 2026
1358be1
feat: added two helper functions: pending_jobs_for_rule and get_job_w…
cmeesters Feb 26, 2026
6120c1b
feat: started implementation of array job submission
cmeesters Feb 26, 2026
8152280
feat: avoiding array limit by raising WorkflowError 'not implemented'
cmeesters Feb 26, 2026
25cd8d0
one step further in implementing arrays
cmeesters Feb 26, 2026
f97d708
fix: formatting
cmeesters Mar 4, 2026
97831ca
fix: jobid handling for array jobs
cmeesters Mar 6, 2026
83a1ba0
fix: submitting without stalling - not yet working. There is a block …
cmeesters Mar 6, 2026
ef477d0
proposed fix for https://github.com/snakemake/snakemake-executor-plug…
Mar 6, 2026
0cf6eed
fix: deadlock issue
cmeesters Mar 9, 2026
8d033ce
Merge branch 'main' of github.com:snakemake/snakemake-executor-plugin…
cmeesters Mar 9, 2026
ed1e50d
Merge branch 'main' of github.com:snakemake/snakemake-executor-plugin…
cmeesters Mar 9, 2026
009c164
fix: group job handling before array job handling to avoid api issue
cmeesters Mar 10, 2026
57485b8
docs: added array documentation
cmeesters Mar 10, 2026
c8fa068
docs: documented the slurm-arra-limit flag
cmeesters Mar 10, 2026
842753e
Fix small errors in further.md
fbartusch Mar 10, 2026
081ecca
test: added array testcases
cmeesters Mar 10, 2026
ee39c7c
Merge branch 'feat/jobarrays' of github.com:snakemake/snakemake-execu…
cmeesters Mar 10, 2026
bb91765
fix: merge conflict
cmeesters Mar 10, 2026
c5a92f8
Merge branch 'feat/jobarrays' of github.com:snakemake/snakemake-execu…
cmeesters Mar 10, 2026
6c1cbc3
fix: formatting
cmeesters Mar 10, 2026
eba3612
Merge branch 'feat/jobarrays' of github.com:snakemake/snakemake-execu…
cmeesters Mar 10, 2026
2b2ae25
fix: formatting
cmeesters Mar 10, 2026
c5d22e3
fix: typo
cmeesters Mar 10, 2026
ef6da07
fix: syntax (critical)
cmeesters Mar 10, 2026
938c1f2
Fix syntax. Add black format changes.
fbartusch Mar 10, 2026
3f7549a
fix: formatting
cmeesters Mar 10, 2026
a099a23
Merge branch 'feat/jobarrays' of github.com:snakemake/snakemake-execu…
cmeesters Mar 10, 2026
a80e0ed
feat: setting max array size by cluster limit - taking minimum of arr…
cmeesters Mar 10, 2026
6831182
refactor: get_max_array_size function due to stupid copilot overwrite
cmeesters Mar 10, 2026
78eaf2b
feat: added array limit validation
cmeesters Mar 10, 2026
c766ca5
refactor: array limit validation
cmeesters Mar 10, 2026
3eed042
refactor: logfile type conversion
cmeesters Mar 10, 2026
94e7a0e
fix: gnarf - wrote shutils.split - is shlex!
cmeesters Mar 11, 2026
e3e6ff9
fix: missing f-string for exec_job in shell handshake
cmeesters Mar 11, 2026
83f65ba
fix: better error message for failed array job submission
cmeesters Mar 11, 2026
74d79be
fix: tests for array jobs, deleted one useless tests (sbatch failures)
cmeesters Mar 11, 2026
329948b
fix: node exclusion is optional
cmeesters Mar 11, 2026
78ba68f
fix: deleted accidentially added backup file
cmeesters Mar 11, 2026
dad46b5
fix: node exclusion is optional
cmeesters Mar 11, 2026
ff3f39a
fix: more stable status checks, if CI file system issue
cmeesters Mar 11, 2026
9e1ec44
fix: better prefix for slurm logfiles for array jobs
cmeesters Mar 11, 2026
b2306d4
fix: deleted pending_jobs_for_rule - not used
cmeesters Mar 11, 2026
b90d19b
fix: formatting
cmeesters Mar 11, 2026
b5e9226
fix: removed unused import
cmeesters Mar 11, 2026
38a7ab5
fix: removed unused import
cmeesters Mar 11, 2026
03c91f1
fix: formatting
cmeesters Mar 11, 2026
18dc93f
fix: max array size is max array size -1 under SLURM
cmeesters Mar 11, 2026
1001ee7
fix: submission in chuncks working again - not blocking - encoding to…
cmeesters Mar 16, 2026
07a0cc6
fix: now accounting for lost tick mark
cmeesters Mar 16, 2026
ed03477
fix: restored behaviour for first job (not in array execs), now consi…
cmeesters Mar 16, 2026
f340640
tests: new snakefile for array job tests
cmeesters Mar 16, 2026
b1603fc
tests: new test cases for array jobs
cmeesters Mar 16, 2026
3471cfa
fix: restored previous function
cmeesters Mar 16, 2026
2c7244f
fix: formatting
cmeesters Mar 16, 2026
b2bd750
fix: formatting
cmeesters Mar 16, 2026
d4da0b1
fix: trailing whitespace
cmeesters Mar 16, 2026
da0536c
Merge branch 'main' into feat/jobarrays
cmeesters Mar 16, 2026
ae8e765
Fix: catch type error
fbartusch Mar 16, 2026
f8dce8e
feat: updated jobstep requirement (just merged on bioconda)
cmeesters Mar 18, 2026
be64f6d
fix: update snakemake dependency
cmeesters Mar 25, 2026
da28f81
fix: introducing a storage class to ensure the presence of a test sui…
cmeesters Mar 25, 2026
c94716e
Merge branch 'main' into feat/jobarrays
cmeesters Mar 25, 2026
9dcf4cc
fix: apparently the fixed stage-in of local test cases shadowed the o…
cmeesters Mar 25, 2026
2b1edf9
Merge branch 'feat/jobarrays' of github.com:snakemake/snakemake-execu…
cmeesters Mar 25, 2026
0569e7b
fix: adding a default method to get config settings (returns None)
cmeesters Mar 25, 2026
900ab1a
fix: the executor stalled after a local rule, because the previous im…
cmeesters Mar 25, 2026
494cd16
fix: changed two logger settings to debug in the array submission cod…
cmeesters Mar 25, 2026
bc47aa3
fix: one more status query timing message from info to debug
cmeesters Mar 25, 2026
94805ec
feat: stabilizing array submission with adding a memory fudge factor …
cmeesters Mar 26, 2026
36d680e
feat: version bump: jobstep plugin 0.6 is required - better memory ha…
cmeesters Mar 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@

### Performance Improvements

* performanc tweaks ([#417](https://github.com/snakemake/snakemake-executor-plugin-slurm/issues/417)) ([a3f6abf](https://github.com/snakemake/snakemake-executor-plugin-slurm/commit/a3f6abf47a1d3baa51b987fc0fcdb1972fc2bdd6))
* performance tweaks ([#417](https://github.com/snakemake/snakemake-executor-plugin-slurm/issues/417)) ([a3f6abf](https://github.com/snakemake/snakemake-executor-plugin-slurm/commit/a3f6abf47a1d3baa51b987fc0fcdb1972fc2bdd6))

## [2.3.1](https://github.com/snakemake/snakemake-executor-plugin-slurm/compare/v2.3.0...v2.3.1) (2026-02-20)

Expand Down
13 changes: 13 additions & 0 deletions docs/further.md
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,19 @@ This node tracking works regardless of whether the `--slurm-requeue` flag is ena
- **With `--slurm-requeue`**: SLURM will automatically requeue failed jobs, and they will be retried on different nodes
- **Without `--slurm-requeue`**: Failed jobs will be reported as errors, but future retries (via `--retries` or other retry mechanisms) will avoid the problematic nodes

#### SLURM Job Arrays

Using `--slurm-array-jobs` SLURM job arrays can be submitted. `--slurm-array-jobs=rule1,rule2,...` lets you select specific rules by name to be selected as an array job. Alternatively, `--slurm-array-jobs=all` will submit all eligible rules as array jobs.

Note: group jobs cannot be array jobs.

.. note:: Using array jobs does impose a synchronization overhead (all jobs of a particular rule need to be ready for execution).

When submitting array jobs, the `--slurm-array-limit` flag defines the
maximum number of array tasks to be submitted in one job submission.
If the number of tasks exceeds this limit, multiple array job submissions will be performed. This is useful to avoid hitting cluster limits on the maximum number of array tasks per job. Please obey your cluster limits and set this flag accordingly.


#### MPI-specific Resources

Snakemake's SLURM executor plugin supports the execution of MPI ([Message Passing Interface](https://en.wikipedia.org/wiki/Message_Passing_Interface)) jobs.
Expand Down
588 changes: 538 additions & 50 deletions snakemake_executor_plugin_slurm/__init__.py

Large diffs are not rendered by default.

16 changes: 14 additions & 2 deletions snakemake_executor_plugin_slurm/job_cancellation.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,19 @@ def cancel_slurm_jobs(
"""
if active_jobs:
# TODO chunk jobids in order to avoid too long command lines
jobids = " ".join([job_info.external_jobid for job_info in active_jobs])
# Filter out None values in case some jobs haven't been assigned
# external IDs yet
jobids = " ".join(
[
job_info.external_jobid
for job_info in active_jobs
if job_info.external_jobid is not None
]
)

if not jobids:
# No valid job IDs to cancel
return

try:
# timeout set to 60, because a scheduler cycle usually is
Expand Down Expand Up @@ -68,5 +80,5 @@ def cancel_slurm_jobs(
"HPC administrator."
)
raise WorkflowError(
"Unable to cancel jobs with scancel " f"(exit code {e.returncode}){msg}"
f"Unable to cancel jobs with scancel (exit code {e.returncode}){msg}"
) from e
18 changes: 17 additions & 1 deletion snakemake_executor_plugin_slurm/submit_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@ def safe_quote(value):
return shlex.quote(str_value)


def get_submit_command(job, params):
def get_submit_command(job, params, settings=None, failed_nodes=None) -> str:
"""
Return the submit command for the job.
"""
# Convert params dict to a SimpleNamespace for attribute-style access
params = SimpleNamespace(**params)

failed_nodes = failed_nodes or set()

call = (
"sbatch "
"--parsable "
Expand Down Expand Up @@ -76,6 +78,20 @@ def get_submit_command(job, params):
if job.resources.get("nodes", False):
call += f" --nodes={job.resources.get('nodes', 1)}"

if settings and settings.requeue:
call += " --requeue"

if settings and settings.qos:
call += f" --qos={safe_quote(settings.qos)}"

if settings and settings.reservation:
call += f" --reservation={safe_quote(settings.reservation)}"

# we exclude failed nodes from further job submissions, to avoid
# repeated failures.
if failed_nodes:
call += f" --exclude={','.join(failed_nodes)}"

gpu_job = job.resources.get("gpu") or "gpu" in job.resources.get("gres", "")
if gpu_job:
# fixes #316 - allow unsetting of tasks per gpu
Expand Down
65 changes: 65 additions & 0 deletions snakemake_executor_plugin_slurm/utils.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,82 @@
# utility functions for the SLURM executor plugin

from collections import Counter
import math
import os
import shlex
import subprocess
import re
from pathlib import Path
from typing import Union

from snakemake_interface_executor_plugins.dag import DAGExecutorInterface
from snakemake_interface_executor_plugins.jobs import (
JobExecutorInterface,
)
from snakemake_interface_common.exceptions import WorkflowError


def get_max_array_size() -> int:
"""
Function to get the maximum array size for SLURM job arrays. This is used
to determine how many jobs can be submitted in a single array job.

Returns:
The maximum array size for SLURM job arrays, as an integer.
Defaults to 1000 if the SLURM_ARRAY_MAX environment variable is not set
or cannot be parsed as an integer.
"""
max_array_size_str = None
scontrol_cmd = "scontrol show config"
try:
res = subprocess.run(
shlex.split(scontrol_cmd),
capture_output=True,
text=True,
timeout=5,
)
out = (res.stdout or "") + (res.stderr or "")
m = re.search(r"MaxArraySize\s*=?\s*(\d+)", out, re.IGNORECASE)
if m:
max_array_size_str = m.group(1)
except (subprocess.SubprocessError, OSError):
max_array_size_str = None

try:
max_array_size = int(max_array_size_str)
except (ValueError, TypeError):
max_array_size = 1000
# The SLURM_ARRAY_MAX limits to its value -1
return max_array_size - 1


def get_job_wildcards(job: JobExecutorInterface) -> str:
"""
Function to get the wildcards of a job as a string. This is used to
create the job name for the SLURM job submission.

Args:
job: The JobExecutorInterface instance representing the job
Returns:
A string representation of the job's wildcards, with slashes replaced
by underscores.
"""
try:
wildcard_str = (
"_".join(job.wildcards).replace("/", "_") if job.wildcards else ""
)
except AttributeError:
wildcard_str = ""

return wildcard_str


def pending_jobs_for_rule(dag: DAGExecutorInterface, rule_name: str) -> int:
"""Count pending jobs for a given rule in the DAG."""
counts = Counter(job.rule.name for job in dag.needrun_jobs())
return counts.get(rule_name, 0)


def round_half_up(n):
return int(math.floor(n + 0.5))

Expand Down
Loading
Loading