@@ -416,6 +416,116 @@ tweak.mirai_cluster <- function(strategy, ..., penvir = parent.frame()) {
416416# '
417417# ' @return Nothing.
418418# '
419+ # ' @section Launch mirai workers via HPC job scheduler:
420+ # '
421+ # ' If you have access to a high-performance-compute (HPC) environment
422+ # ' with a job scheduler, you can use **future.mirai** and **mirai**
423+ # ' to run parallel workers distributed on the computer cluster.
424+ # ' How to set these workers up is explained in [mirai::cluster_config()],
425+ # ' which should work for our most common job schedulers, e.g. Slurm,
426+ # ' Sun/Son of/Oracle/Univa/Altair Grid Engine (SGE), OpenLava,
427+ # ' Load Sharing Facility (LSF), and TORQUE/PBS.
428+ # '
429+ # ' _Note: Not all compute clusters support running **mirai** workers
430+ # ' this way. This is because **mirai** workers need to establish a
431+ # ' TCP connection back to machine that launched the workers, but some
432+ # ' systems have security policies disallowing such connections from
433+ # ' being established. This is often configured in the firewall and
434+ # ' can only be controlled by the admins. If your system has such
435+ # ' rules, you will find that the mirai jobs are launched and running
436+ # ' on the scheduler, but we will wait forever for the mirai workers
437+ # ' to connect back, i.e. `mirai::info()[["connections"]]` is always
438+ # ' zero in the below example._
439+ # '
440+ # ' Briefly, to launch a cluster of mirai workers on an HPC cluster, we
441+ # ' need to:
442+ # '
443+ # ' 1. configure [mirai::cluster_config()] for the job scheduler,
444+ # '
445+ # ' 2. use configuration to launch workers using [mirai::daemons()].
446+ # '
447+ # ' The first step is specific to each job scheduler and this is where
448+ # ' you control things such as how much memory each worker gets, for
449+ # ' how long the may run, which environment modules to load, and which
450+ # ' environment modules to load, if any.
451+ # ' The second step is the same regardless of job scheduler.
452+ # ' Here is an example for how to run parallel mirai workers on a
453+ # ' Slurm scheduler and then use these in futureverse.
454+ # '
455+ # ' ```r
456+ # ' # Here we give each worker 200 MiB of RAM and a maximum of one hour
457+ # ' # to run. Unless we specify '-l pe smp N', each mirai worker is
458+ # ' # allotted one CPU core, which impacts nested parallelization.
459+ # ' # R is provided via environment module 'r' on this cluster.
460+ # ' config <- mirai::cluster_config(command = "sbatch", options = "
461+ # ' #SBATCH --job-name=mirai
462+ # ' #SBATCH --time=01:00:00
463+ # ' #SBATCH --mem=200M
464+ # ' module load r
465+ # ' ")
466+ # '
467+ # ' # -------------------------------------------------------------------
468+ # ' # Launch eight mirai workers, via equally many jobs, wait for all of
469+ # ' # them to become available, and use them in futureverse
470+ # ' # -------------------------------------------------------------------
471+ # ' workers <- 8
472+ # ' mirai::daemons(n = workers, url = mirai::host_url(), remote = config)
473+ # ' while(mirai::info()[["connections"]] < workers) Sys.sleep(1.0)
474+ # ' plan(future.mirai::mirai_cluster)
475+ # '
476+ # ' # Verify that futures are resolved on a compute node
477+ # ' f <- future({
478+ # ' data.frame(
479+ # ' hostname = Sys.info()[["nodename"]],
480+ # ' os = Sys.info()[["sysname"]],
481+ # ' cores = unname(parallelly::availableCores()),
482+ # ' modules = Sys.getenv("LOADEDMODULES")
483+ # ' )
484+ # ' })
485+ # ' info <- value(f)
486+ # ' print(info)
487+ # ' #> hostname os cores modules
488+ # ' #> 1 n12 Linux 1 r/4.5.1
489+ # '
490+ # ' # Shut down parallel workers
491+ # ' plan(sequential)
492+ # ' mirai::daemons(0)
493+ # ' ```
494+ # '
495+ # ' If you are on SGE, you can use the following configuration:
496+ # '
497+ # ' ```r
498+ # ' # -----------------------------------------------------------
499+ # ' # Configure mirai to launch R workers via the job scheduler
500+ # ' # -----------------------------------------------------------
501+ # ' # Here we give each worker 200 MiB of RAM and a maximum of
502+ # ' # one hour to run. Unless we specify '-l pe smp N', each
503+ # ' # mirai worker is allotted one CPU core, which impacts
504+ # ' # nested parallelization. To make sure R is available to
505+ # ' # launch the mirai workers, we load environment module
506+ # ' # 'r/x.y.z', where 'x.y.z' is the current R version.
507+ # ' config <- mirai::cluster_config(command = "qsub", options = "
508+ # ' #$ -N mirai
509+ # ' #$ -j y
510+ # ' #$ -cwd
511+ # ' #$ -l h_rt=01:00:00
512+ # ' #$ -l mem_free=200M
513+ # ' module load r
514+ # ' ")
515+ # ' ```
516+ # '
517+ # ' Everything else is the same.
518+ # '
519+ # ' _Comment_: [mirai::cluster_config()] configures the jobs to run vanilla
520+ # ' POSIX shells, i.e. `/bin/sh`. This might be too strict for some users.
521+ # ' If your setup requires your jobs to be run using Bash (`/bin/sh`), you
522+ # ' can tweak the configuration `config` object manually to do so;
523+ # '
524+ # ' ```r
525+ # ' config$command <- "/bin/bash"
526+ # ' config$args <- sub("/bin/sh", config$command, config$args)
527+ # ' ```
528+ # '
419529# ' @example incl/mirai_cluster.R
420530# '
421531# ' @importFrom future future
0 commit comments