@@ -667,7 +667,7 @@ resolved.BatchtoolsFuture <- function(x, ...) {
667667 # # Assert that the process that created the future is
668668 # # also the one that evaluates/resolves/queries it.
669669 assertOwner(x )
670-
670+
671671 # # If not, checks the batchtools registry status
672672 resolved <- finished(x )
673673 if (is.na(resolved )) return (FALSE )
@@ -845,25 +845,59 @@ await <- function(future, cleanup = TRUE, ...) {
845845 # # how we can distinguish the two right now, but I'll assume that
846846 # # started jobs have a 'submitted' or 'started' status flag too,
847847 # # whereas jobs that failed to launch won't. /HB 2025-07-15
848+ hints <- NULL
849+
850+ state <- future [[" state" ]]
851+ info <- sprintf(" Future state: %s" , sQuote(state ))
852+ hints <- c(hints , info )
853+ info <- sprintf(" Batchtools status: %s" , commaq(stat ))
854+ hints <- c(hints , info )
855+
856+ # # SPECIAL CASE: Some Slurm users report on 'expired' jobs, although they never started.
857+ # # Output more breadcrumbs to be able to narrow in on what causes this. /HB 2025-09-07
858+ if (inherits(future , " BatchtoolsSlurmFuture" )) {
859+ # # Get _all_ jobs of the users, including those not submitted via future.batchtools
860+ slurm_job_ids <- unique(c(
861+ reg $ cluster.functions $ listJobsQueued(reg ),
862+ reg $ cluster.functions $ listJobsRunning(reg )
863+ ))
864+ if (length(slurm_job_ids ) > 0 ) {
865+ info <- sprintf(" Slurm job ID: [n=%d] %s" , length(slurm_job_ids ), commaq(slurm_job_ids ))
866+ args <- c(" --noheader" , " --format='job_id=%i,state=%T,submitted_on=%V,time_used=%M'" , " -j" , paste(slurm_job_ids , collapse = " ," ))
867+ res <- system2(" squeue" , args = args , stdout = TRUE , stderr = TRUE )
868+ res <- paste(res , collapse = " ; " ) # # should only be a single line, but ...
869+ info <- c(info , sprintf(" Slurm job status: %s" , res ))
870+ } else {
871+ info <- " Slurm job ID: <not found>"
872+ info <- c(info , sprintf(" Slurm job status: <unknown>" ))
873+ }
874+ hints <- c(hints , info )
875+ }
848876
849- hint <- tryCatch({
877+ # # TROUBLESHOOTING: Logged output
878+ info <- tryCatch({
850879 output <- loggedOutput(future , timeout = 0.0 )
851- hint <- unlist(strsplit(output , split = " \n " , fixed = TRUE ))
852- hint <- hint [nzchar(hint )]
853- hint <- tail(hint , n = getOption(" future.batchtools.expiration.tail" , 48L ))
880+ info <- unlist(strsplit(output , split = " \n " , fixed = TRUE ))
881+ info <- info [nzchar(info )]
882+ info <- tail(info , n = getOption(" future.batchtools.expiration.tail" , 48L ))
854883 }, error = function (e ) NULL )
855- if (length( hint ) > 0 ) {
856- hint <- c( " The last few lines of the logged output: " , hint )
857- hint <- paste( hint , collapse = " \n " )
884+
885+ if (length( info ) > 0 ) {
886+ info <- c( " The last few lines of the logged output: " , info )
858887 } else {
859- hint <- " No logged output file exist (at the moment)"
888+ info <- " No logged output file exist (at the moment)"
860889 }
890+ hints <- c(hints , info )
861891
892+ if (length(hints ) > 0 ) {
893+ hints <- c(" \n Post-mortem details:" , hints )
894+ hints <- paste(hints , collapse = " \n " )
895+ }
862896 if (any(c(" submitted" , " started" ) %in% stat )) {
863- msg <- sprintf(" Future (%s) of class %s expired, which indicates that it crashed or was killed. %s" , label , class(future )[1 ], hint )
897+ msg <- sprintf(" Future (%s) of class %s expired, which indicates that it crashed or was killed.%s" , label , class(future )[1 ], hints )
864898 result <- FutureInterruptError(msg , future = future )
865899 } else {
866- msg <- sprintf(" Future (%s) of class %s failed to launch. %s" , label , class(future )[1 ], hint )
900+ msg <- sprintf(" Future (%s) of class %s failed to launch.%s" , label , class(future )[1 ], hints )
867901 result <- FutureLaunchError(msg , future = future )
868902 }
869903 } else if (future [[" state" ]] %in% c(" canceled" , " interrupted" )) {
0 commit comments