3030# ' job-script template as variable `resources`. This is based on how
3131# ' [batchtools::submitJobs()] works, with the exception for specially
3232# ' reserved names defined by the \pkg{future.batchtools} package;
33- # ' * `resources[["asis"]]` is a character vector that are passed as-is to
34- # ' the job script and are injected as job resource declarations.
35- # ' * `resources[["modules"]]` is character vector of Linux environment
36- # ' modules to be loaded.
37- # ' * `resources[["startup"]]` and `resources[["shutdown"]]` are character
38- # ' vectors of shell code to be injected to the job script as-is.
33+ # '
3934# ' * `resources[["details"]]`, if TRUE, results in the job script outputting
4035# ' job details and job summaries at the beginning and at the end.
36+ # '
37+ # ' * `resources[["startup"]]` and `resources[["shutdown"]]` are character
38+ # ' vectors of shell code to be injected to the job script as-is.
39+ # '
40+ # ' * `resources[["modules"]]` is character vector of Linux environment
41+ # ' modules to be loaded.
42+ # '
43+ # ' * `resources[["envs"]]`, is an optional names character vector specifying
44+ # ' environment variables to be set.
45+ # '
46+ # ' * `resources[["rscript"]]` is an optional character vector specifying
47+ # ' how the 'Rscript' is launched. The `resources[["rscript_args"]]` field
48+ # ' is an optional character vector specifying the 'Rscript' command-line
49+ # ' arguments.
50+ # '
51+ # ' * `resources[["asis"]]` is a character vector that are passed as-is to
52+ # ' the job script and are injected as job resource declarations.
53+ # '
4154# ' * All remaining `resources` named elements are injected as named resource
4255# ' specification for the scheduler.
4356# '
@@ -325,7 +338,8 @@ launchFuture.BatchtoolsFutureBackend <- local({
325338 resources <- backend [[" resources" ]]
326339 config [[" resources" ]] <- resources
327340 future [[" config" ]] <- config
328-
341+ submitted_on <- Sys.time()
342+
329343 # # WORKAROUND: batchtools::submitJobs() updates the RNG state,
330344 # # which we must make sure to undo.
331345 tryCatch({
@@ -343,9 +357,10 @@ launchFuture.BatchtoolsFutureBackend <- local({
343357 msg <- sprintf(" %s\n DETAILS:\n The batchtools registry path: %s" , msg , sQuote(path ))
344358 stop(FutureLaunchError(msg , future = future ))
345359 })
346-
360+
347361 if (debug ) mdebugf(" Launched future #%d" , jobid $ job.id )
348362
363+ future [[" submitted_on" ]] <- submitted_on
349364 future [[" state" ]] <- " running"
350365
351366 # # 6. Reserve worker for future
@@ -535,6 +550,26 @@ status <- function(future, ...) {
535550
536551 jobid <- config $ jobid
537552 if (is.na(jobid )) return (" not submitted" )
553+
554+ # # Optionally filter by the scheduler's job ID, if it exists
555+ batch_id <- reg [[" status" ]][[" batch.id" ]]
556+ if (! is.null(batch_id ) && inherits(future , " BatchtoolsTemplateFutureBackend" )) {
557+ if (! is.character(batch_id ) || length(batch_id ) != 1L || is.na(batch_id ) || ! nzchar(batch_id ) || ! grepl(" ^[[:digit:].]+$" , batch_id )) {
558+ stop(sprintf(" Unknown value of 'batch.id': [class=%s] %s" , class(batch_id )[1 ], paste(sQuote(batch_id ), collapse = " , " )))
559+ }
560+
561+ # # Pass this to cluster functions listJobsQueued() and listJobsRunning()
562+ # # via an R option, because we cannot pass as an argument.
563+ options(
564+ future.batchtools.batch_id = batch_id ,
565+ future.batchtools.submitted_on = future [[" submitted_on" ]]
566+ )
567+ on.exit(options(
568+ future.batchtools.batch_id = NULL ,
569+ future.batchtools.submitted_on = NULL
570+ ), add = TRUE )
571+ }
572+
538573 status <- get_status(reg = reg , ids = jobid )
539574 status <- (unlist(status ) == 1L )
540575 status <- status [status ]
@@ -667,7 +702,7 @@ resolved.BatchtoolsFuture <- function(x, ...) {
667702 # # Assert that the process that created the future is
668703 # # also the one that evaluates/resolves/queries it.
669704 assertOwner(x )
670-
705+
671706 # # If not, checks the batchtools registry status
672707 resolved <- finished(x )
673708 if (is.na(resolved )) return (FALSE )
@@ -845,25 +880,69 @@ await <- function(future, cleanup = TRUE, ...) {
845880 # # how we can distinguish the two right now, but I'll assume that
846881 # # started jobs have a 'submitted' or 'started' status flag too,
847882 # # whereas jobs that failed to launch won't. /HB 2025-07-15
883+ hints <- NULL
884+
885+ state <- future [[" state" ]]
886+ info <- sprintf(" Future state: %s" , sQuote(state ))
887+ hints <- c(hints , info )
888+ info <- sprintf(" Batchtools status: %s" , commaq(stat ))
889+ hints <- c(hints , info )
890+
891+ # # SPECIAL CASE: Some Slurm users report on 'expired' jobs, although they never started.
892+ # # Output more breadcrumbs to be able to narrow in on what causes this. /HB 2025-09-07
893+ if (inherits(future , " BatchtoolsSlurmFuture" )) {
894+ batch_id <- reg [[" status" ]][[" batch.id" ]]
895+ if (length(batch_id ) > 0 ) {
896+ info <- sprintf(" Slurm job ID: [n=%d] %s" , length(batch_id ), commaq(batch_id ))
897+
898+ args <- c(" --noheader" , " --format='job_id=%i,state=%T,submitted_on=%V,time_used=%M'" , sprintf(" --jobs=%s" , paste(batch_id , collapse = " ," )))
899+ res <- system2(" squeue" , args = args , stdout = TRUE , stderr = TRUE )
900+ if (length(res ) == 0 ) {
901+ res <- " <empty>"
902+ } else {
903+ res <- paste(res , collapse = " ; " ) # # should only be one, but just in case ...
904+ }
905+ info <- c(info , sprintf(" Slurm 'squeue' job status: %s" , res ))
906+
907+ args <- c(" --noheader" , " --parsable2" , " --allocations" , " --format='JobID,State,ExitCode'" , sprintf(" --jobs=%s" , paste(batch_id , collapse = " ," )))
908+ res <- system2(" sacct" , args = args , stdout = TRUE , stderr = TRUE )
909+ if (length(res ) == 0 ) {
910+ res <- " <empty>"
911+ } else {
912+ res <- paste(res , collapse = " ; " ) # # should only be one, but just in case ...
913+ }
914+ info <- c(info , sprintf(" Slurm 'sacct' job status: %s" , res ))
915+ } else {
916+ info <- " Slurm job ID: <not found>"
917+ info <- c(info , sprintf(" Slurm job status: <unknown>" ))
918+ }
919+ hints <- c(hints , info )
920+ }
848921
849- hint <- tryCatch({
922+ # # TROUBLESHOOTING: Logged output
923+ info <- tryCatch({
850924 output <- loggedOutput(future , timeout = 0.0 )
851- hint <- unlist(strsplit(output , split = " \n " , fixed = TRUE ))
852- hint <- hint [nzchar(hint )]
853- hint <- tail(hint , n = getOption(" future.batchtools.expiration.tail" , 48L ))
925+ info <- unlist(strsplit(output , split = " \n " , fixed = TRUE ))
926+ info <- info [nzchar(info )]
927+ info <- tail(info , n = getOption(" future.batchtools.expiration.tail" , 48L ))
854928 }, error = function (e ) NULL )
855- if (length( hint ) > 0 ) {
856- hint <- c( " The last few lines of the logged output: " , hint )
857- hint <- paste( hint , collapse = " \n " )
929+
930+ if (length( info ) > 0 ) {
931+ info <- c( " The last few lines of the logged output: " , info )
858932 } else {
859- hint <- " No logged output file exist (at the moment)"
933+ info <- " No logged output file exist (at the moment)"
860934 }
935+ hints <- c(hints , info )
861936
937+ if (length(hints ) > 0 ) {
938+ hints <- c(" \n Post-mortem details:" , hints )
939+ hints <- paste(hints , collapse = " \n " )
940+ }
862941 if (any(c(" submitted" , " started" ) %in% stat )) {
863- msg <- sprintf(" Future (%s) of class %s expired, which indicates that it crashed or was killed. %s" , label , class(future )[1 ], hint )
942+ msg <- sprintf(" Future (%s) of class %s expired, which indicates that it crashed or was killed.%s" , label , class(future )[1 ], hints )
864943 result <- FutureInterruptError(msg , future = future )
865944 } else {
866- msg <- sprintf(" Future (%s) of class %s failed to launch. %s" , label , class(future )[1 ], hint )
945+ msg <- sprintf(" Future (%s) of class %s failed to launch.%s" , label , class(future )[1 ], hints )
867946 result <- FutureLaunchError(msg , future = future )
868947 }
869948 } else if (future [[" state" ]] %in% c(" canceled" , " interrupted" )) {
0 commit comments