Skip to content

Commit 160fb46

Browse files
Merge branch 'release/0.21.0'
2 parents 414228f + 182ae2d commit 160fb46

32 files changed

+1169
-267
lines changed

DESCRIPTION

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
Package: future.batchtools
2-
Version: 0.20.0
2+
Version: 0.21.0
33
Depends:
44
R (>= 3.2.0),
55
parallelly,
66
future (>= 1.58.0)
77
Imports:
88
batchtools (>= 0.9.17),
9-
utils
9+
utils,
10+
checkmate,
11+
stringi
1012
Suggests:
1113
globals,
1214
future.apply,
@@ -32,5 +34,5 @@ URL: https://future.batchtools.futureverse.org, https://github.com/futureverse/f
3234
BugReports: https://github.com/futureverse/future.batchtools/issues
3335
Language: en-US
3436
Encoding: UTF-8
35-
RoxygenNote: 7.3.2
37+
RoxygenNote: 7.3.3
3638
Roxygen: list(markdown = TRUE)

NAMESPACE

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,9 @@ export(batchtools_torque)
5959
export(loggedError)
6060
export(loggedOutput)
6161
export(makeClusterFunctionsBash)
62+
export(makeClusterFunctionsSlurm2)
6263
importFrom(batchtools,Worker)
64+
importFrom(batchtools,assertRegistry)
6365
importFrom(batchtools,batchExport)
6466
importFrom(batchtools,batchMap)
6567
importFrom(batchtools,cfBrewTemplate)
@@ -88,6 +90,9 @@ importFrom(batchtools,saveRegistry)
8890
importFrom(batchtools,setJobNames)
8991
importFrom(batchtools,submitJobs)
9092
importFrom(batchtools,waitForJobs)
93+
importFrom(checkmate,"%??%")
94+
importFrom(checkmate,assertCharacter)
95+
importFrom(checkmate,assertString)
9196
importFrom(future,FutureBackend)
9297
importFrom(future,FutureError)
9398
importFrom(future,FutureInterruptError)
@@ -107,6 +112,8 @@ importFrom(future,tweak)
107112
importFrom(parallelly,availableCores)
108113
importFrom(parallelly,availableWorkers)
109114
importFrom(parallelly,supportsMulticore)
115+
importFrom(stringi,stri_flatten)
116+
importFrom(stringi,stri_replace_all_fixed)
110117
importFrom(tools,pskill)
111118
importFrom(utils,capture.output)
112119
importFrom(utils,file_test)

NEWS.md

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,53 @@
1+
# Version 0.21.0 [2025-09-20]
2+
3+
## Significant Changes
4+
5+
* `batchtools_slurm()` now uses `makeClusterFunctionsSlurm2()`.
6+
7+
## New Features
8+
9+
* Add support for controlling the 'Rscript' call in the built-in job
10+
script templates. This can be done via fields `rscript` and
11+
`rscript_args` of the `resources` argument.
12+
13+
* Add support for setting environment variables in the built-in job
14+
script templates. This can be done via field `envs` of the
15+
`resources` argument.
16+
17+
* The built-in job script templates assert that the `Rscript`
18+
launcher is found, and if not, they give an informative error
19+
message suggesting to declare environment modules, via the
20+
`resources` argument, that should be loaded by the job script.
21+
22+
* Add `makeClusterFunctionsSlurm2()`, which patches
23+
`batchtools::makeClusterFunctionsSlurm()`. Firstly, it patches the
24+
`listJobsQueued()` cluster function such that it falls back to
25+
querying Slurm's account database (`sacct`), if the future was
26+
_not_ found in the Slurm job queue (`squeue`), which might be the
27+
case when Slurm provisions a job that was just submitted to the
28+
scheduler. Secondly, it patched the `submitJob()` cluster function
29+
such that the system call to `sbatch` captures stderr separately
30+
from stdout, which prevents auxiliary INFO messages from `sbatch`
31+
to corrupt the output to be parsed.
32+
33+
## Documentation
34+
35+
* Add example on how to configure `batchtools_slurm()` to run R
36+
within a Linux container.
37+
38+
## Bug Fixes
39+
40+
`batchtools_slurm()` would produce "Future of class
41+
BatchtoolsSlurmFuture expired, which indicates that it crashed or
42+
was killed" errors on some Slurm clusters. We believe this happened
43+
because a recently submitted future job would not immediately show
44+
up on the job queue, which caused **future.batchtools** to
45+
incorrectly conclude that the job had already finished, but without
46+
producing any results. `batchtools_slurm()` now uses the new
47+
`makeClusterFunctionsSlurm2()`, which does a better job inferring
48+
whether a job is queued or not.
49+
50+
151
# Version 0.20.0 [2025-08-25]
252

353
## Significant Changes

R/BatchtoolsFutureBackend-class.R

Lines changed: 98 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,27 @@
3030
#' job-script template as variable `resources`. This is based on how
3131
#' [batchtools::submitJobs()] works, with the exception for specially
3232
#' reserved names defined by the \pkg{future.batchtools} package;
33-
#' * `resources[["asis"]]` is a character vector that are passed as-is to
34-
#' the job script and are injected as job resource declarations.
35-
#' * `resources[["modules"]]` is character vector of Linux environment
36-
#' modules to be loaded.
37-
#' * `resources[["startup"]]` and `resources[["shutdown"]]` are character
38-
#' vectors of shell code to be injected to the job script as-is.
33+
#'
3934
#' * `resources[["details"]]`, if TRUE, results in the job script outputting
4035
#' job details and job summaries at the beginning and at the end.
36+
#'
37+
#' * `resources[["startup"]]` and `resources[["shutdown"]]` are character
38+
#' vectors of shell code to be injected to the job script as-is.
39+
#'
40+
#' * `resources[["modules"]]` is character vector of Linux environment
41+
#' modules to be loaded.
42+
#'
43+
#' * `resources[["envs"]]`, is an optional names character vector specifying
44+
#' environment variables to be set.
45+
#'
46+
#' * `resources[["rscript"]]` is an optional character vector specifying
47+
#' how the 'Rscript' is launched. The `resources[["rscript_args"]]` field
48+
#' is an optional character vector specifying the 'Rscript' command-line
49+
#' arguments.
50+
#'
51+
#' * `resources[["asis"]]` is a character vector that are passed as-is to
52+
#' the job script and are injected as job resource declarations.
53+
#'
4154
#' * All remaining `resources` named elements are injected as named resource
4255
#' specification for the scheduler.
4356
#'
@@ -325,7 +338,8 @@ launchFuture.BatchtoolsFutureBackend <- local({
325338
resources <- backend[["resources"]]
326339
config[["resources"]] <- resources
327340
future[["config"]] <- config
328-
341+
submitted_on <- Sys.time()
342+
329343
## WORKAROUND: batchtools::submitJobs() updates the RNG state,
330344
## which we must make sure to undo.
331345
tryCatch({
@@ -343,9 +357,10 @@ launchFuture.BatchtoolsFutureBackend <- local({
343357
msg <- sprintf("%s\nDETAILS:\nThe batchtools registry path: %s", msg, sQuote(path))
344358
stop(FutureLaunchError(msg, future = future))
345359
})
346-
360+
347361
if (debug) mdebugf("Launched future #%d", jobid$job.id)
348362

363+
future[["submitted_on"]] <- submitted_on
349364
future[["state"]] <- "running"
350365

351366
## 6. Reserve worker for future
@@ -535,6 +550,26 @@ status <- function(future, ...) {
535550

536551
jobid <- config$jobid
537552
if (is.na(jobid)) return("not submitted")
553+
554+
## Optionally filter by the scheduler's job ID, if it exists
555+
batch_id <- reg[["status"]][["batch.id"]]
556+
if (!is.null(batch_id) && inherits(future, "BatchtoolsTemplateFutureBackend")) {
557+
if (!is.character(batch_id) || length(batch_id) != 1L || is.na(batch_id) || !nzchar(batch_id) || !grepl("^[[:digit:].]+$", batch_id)) {
558+
stop(sprintf("Unknown value of 'batch.id': [class=%s] %s", class(batch_id)[1], paste(sQuote(batch_id), collapse = ", ")))
559+
}
560+
561+
## Pass this to cluster functions listJobsQueued() and listJobsRunning()
562+
## via an R option, because we cannot pass as an argument.
563+
options(
564+
future.batchtools.batch_id = batch_id,
565+
future.batchtools.submitted_on = future[["submitted_on"]]
566+
)
567+
on.exit(options(
568+
future.batchtools.batch_id = NULL,
569+
future.batchtools.submitted_on = NULL
570+
), add = TRUE)
571+
}
572+
538573
status <- get_status(reg = reg, ids = jobid)
539574
status <- (unlist(status) == 1L)
540575
status <- status[status]
@@ -667,7 +702,7 @@ resolved.BatchtoolsFuture <- function(x, ...) {
667702
## Assert that the process that created the future is
668703
## also the one that evaluates/resolves/queries it.
669704
assertOwner(x)
670-
705+
671706
## If not, checks the batchtools registry status
672707
resolved <- finished(x)
673708
if (is.na(resolved)) return(FALSE)
@@ -845,25 +880,69 @@ await <- function(future, cleanup = TRUE, ...) {
845880
## how we can distinguish the two right now, but I'll assume that
846881
## started jobs have a 'submitted' or 'started' status flag too,
847882
## whereas jobs that failed to launch won't. /HB 2025-07-15
883+
hints <- NULL
884+
885+
state <- future[["state"]]
886+
info <- sprintf("Future state: %s", sQuote(state))
887+
hints <- c(hints, info)
888+
info <- sprintf("Batchtools status: %s", commaq(stat))
889+
hints <- c(hints, info)
890+
891+
## SPECIAL CASE: Some Slurm users report on 'expired' jobs, although they never started.
892+
## Output more breadcrumbs to be able to narrow in on what causes this. /HB 2025-09-07
893+
if (inherits(future, "BatchtoolsSlurmFuture")) {
894+
batch_id <- reg[["status"]][["batch.id"]]
895+
if (length(batch_id) > 0) {
896+
info <- sprintf("Slurm job ID: [n=%d] %s", length(batch_id), commaq(batch_id))
897+
898+
args <- c("--noheader", "--format='job_id=%i,state=%T,submitted_on=%V,time_used=%M'", sprintf("--jobs=%s", paste(batch_id, collapse = ",")))
899+
res <- system2("squeue", args = args, stdout = TRUE, stderr = TRUE)
900+
if (length(res) == 0) {
901+
res <- "<empty>"
902+
} else {
903+
res <- paste(res, collapse = "; ") ## should only be one, but just in case ...
904+
}
905+
info <- c(info, sprintf("Slurm 'squeue' job status: %s", res))
906+
907+
args <- c("--noheader", "--parsable2", "--allocations", "--format='JobID,State,ExitCode'", sprintf("--jobs=%s", paste(batch_id, collapse = ",")))
908+
res <- system2("sacct", args = args, stdout = TRUE, stderr = TRUE)
909+
if (length(res) == 0) {
910+
res <- "<empty>"
911+
} else {
912+
res <- paste(res, collapse = "; ") ## should only be one, but just in case ...
913+
}
914+
info <- c(info, sprintf("Slurm 'sacct' job status: %s", res))
915+
} else {
916+
info <- "Slurm job ID: <not found>"
917+
info <- c(info, sprintf("Slurm job status: <unknown>"))
918+
}
919+
hints <- c(hints, info)
920+
}
848921

849-
hint <- tryCatch({
922+
## TROUBLESHOOTING: Logged output
923+
info <- tryCatch({
850924
output <- loggedOutput(future, timeout = 0.0)
851-
hint <- unlist(strsplit(output, split = "\n", fixed = TRUE))
852-
hint <- hint[nzchar(hint)]
853-
hint <- tail(hint, n = getOption("future.batchtools.expiration.tail", 48L))
925+
info <- unlist(strsplit(output, split = "\n", fixed = TRUE))
926+
info <- info[nzchar(info)]
927+
info <- tail(info, n = getOption("future.batchtools.expiration.tail", 48L))
854928
}, error = function(e) NULL)
855-
if (length(hint) > 0) {
856-
hint <- c("The last few lines of the logged output:", hint)
857-
hint <- paste(hint, collapse = "\n")
929+
930+
if (length(info) > 0) {
931+
info <- c("The last few lines of the logged output:", info)
858932
} else {
859-
hint <- "No logged output file exist (at the moment)"
933+
info <- "No logged output file exist (at the moment)"
860934
}
935+
hints <- c(hints, info)
861936

937+
if (length(hints) > 0) {
938+
hints <- c("\nPost-mortem details:", hints)
939+
hints <- paste(hints, collapse = "\n")
940+
}
862941
if (any(c("submitted", "started") %in% stat)) {
863-
msg <- sprintf("Future (%s) of class %s expired, which indicates that it crashed or was killed. %s", label, class(future)[1], hint)
942+
msg <- sprintf("Future (%s) of class %s expired, which indicates that it crashed or was killed.%s", label, class(future)[1], hints)
864943
result <- FutureInterruptError(msg, future = future)
865944
} else {
866-
msg <- sprintf("Future (%s) of class %s failed to launch. %s", label, class(future)[1], hint)
945+
msg <- sprintf("Future (%s) of class %s failed to launch.%s", label, class(future)[1], hints)
867946
result <- FutureLaunchError(msg, future = future)
868947
}
869948
} else if (future[["state"]] %in% c("canceled", "interrupted")) {

R/BatchtoolsTemplateFutureBackend-class.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ BatchtoolsTemplateFutureBackend <- function(type, scheduler.latency = 1.0, fs.la
7171
lsf = makeClusterFunctionsLSF,
7272
openlava = makeClusterFunctionsOpenLava,
7373
sge = makeClusterFunctionsSGE,
74-
slurm = makeClusterFunctionsSlurm,
74+
slurm = makeClusterFunctionsSlurm2,
7575
torque = makeClusterFunctionsTORQUE,
7676
makeClusterFunctions
7777
)

R/batchtools_bash.R

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,16 +112,28 @@ makeClusterFunctionsBash <- function(template = "bash", fs.latency = 0.0, ...) {
112112
stop_if_not(inherits(reg, "Registry"))
113113
stop_if_not(inherits(jc, "JobCollection"))
114114

115-
script <- cfBrewTemplate(reg, text = template_text, jc = jc)
116-
output <- system2(bin, args = c(script), stdout = TRUE, stderr = TRUE, wait = TRUE)
117115
debug <- isTRUE(getOption("future.debug"))
118116
if (debug) {
119-
mdebug_push("makeClusterFunctionsBash() ...")
120-
mdebug(paste(c(output, ""), collapse = "\n"))
117+
mdebug_push("makeClusterFunctionsBash() -> submitJob() ...")
121118
on.exit(mdebug_pop())
122119
}
120+
121+
script <- cfBrewTemplate(reg, text = template_text, jc = jc)
122+
if (debug) {
123+
mdebugf("job script: %s\n", script)
124+
bfr <- readLines(script, warn = FALSE)
125+
mdebugf("[job script]: %s\n", bfr)
126+
}
127+
128+
output <- system2(bin, args = c(script), stdout = TRUE, stderr = TRUE, wait = TRUE)
129+
if (debug) {
130+
mdebug(paste(c(sprintf("[job output]: %s", output), ""), collapse = "\n"))
131+
}
123132

124133
status <- attr(output, "status")
134+
if (debug) {
135+
mstr(list(status = status))
136+
}
125137
if (is.null(status)) {
126138
status <- 0L
127139
batch.id <- sprintf("bash#%d", Sys.getpid())

R/batchtools_lsf.R

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,11 @@ BatchtoolsLsfFutureBackend <- function(...) {
5050
#'
5151
#' f <- future({
5252
#' data.frame(
53-
#' hostname = Sys.info()[["nodename"]],
54-
#' os = Sys.info()[["sysname"]],
55-
#' cores = unname(parallelly::availableCores()),
56-
#' modules = Sys.getenv("LOADEDMODULES")
53+
#' hostname = Sys.info()[["nodename"]],
54+
#' os = Sys.info()[["sysname"]],
55+
#' osVersion = utils::osVersion,
56+
#' cores = unname(parallelly::availableCores()),
57+
#' modules = Sys.getenv("LOADEDMODULES")
5758
#' )
5859
#' })
5960
#' info <- value(f)

R/batchtools_openlava.R

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,11 @@ BatchtoolsOpenLavaFutureBackend <- function(...) {
5050
#'
5151
#' f <- future({
5252
#' data.frame(
53-
#' hostname = Sys.info()[["nodename"]],
54-
#' os = Sys.info()[["sysname"]],
55-
#' cores = unname(parallelly::availableCores()),
56-
#' modules = Sys.getenv("LOADEDMODULES")
53+
#' hostname = Sys.info()[["nodename"]],
54+
#' os = Sys.info()[["sysname"]],
55+
#' osVersion = utils::osVersion,
56+
#' cores = unname(parallelly::availableCores()),
57+
#' modules = Sys.getenv("LOADEDMODULES")
5758
#' )
5859
#' })
5960
#' info <- value(f)

R/batchtools_sge.R

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,11 @@ print.BatchtoolsSGEFutureBackend <- function(x, ...) {
6565
#'
6666
#' f <- future({
6767
#' data.frame(
68-
#' hostname = Sys.info()[["nodename"]],
69-
#' os = Sys.info()[["sysname"]],
70-
#' cores = unname(parallelly::availableCores()),
71-
#' modules = Sys.getenv("LOADEDMODULES")
68+
#' hostname = Sys.info()[["nodename"]],
69+
#' os = Sys.info()[["sysname"]],
70+
#' osVersion = utils::osVersion,
71+
#' cores = unname(parallelly::availableCores()),
72+
#' modules = Sys.getenv("LOADEDMODULES")
7273
#' )
7374
#' })
7475
#' info <- value(f)

0 commit comments

Comments
 (0)