Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
e9b7912
Create icr_alma institutional config
rachelicr Jul 4, 2024
d0e2ad4
Profile refinement based on reviewer feedback
msarkis-icr Feb 20, 2025
687d015
Update docs/icr_alma.md
rachelicr Feb 21, 2025
628318a
move max_* from process to params
msarkis-icr Feb 21, 2025
f420f16
Merge pull request #845 from ICR-RSE-Group/icr_alma
rachelicr Feb 21, 2025
916b701
[automated] Update pipeline configs
nf-core-bot Feb 25, 2025
e6fab87
Merge pull request #846 from nf-core/create-pull-request/patch
jfy133 Feb 25, 2025
c043b33
Update seadragon.config
jiawku Feb 25, 2025
bc9fd34
Update seadragon.config
jiawku Feb 25, 2025
b51ed18
Update seadragon.config
jiawku Feb 25, 2025
add653f
Merge pull request #847 from jiawku/master
jfy133 Feb 26, 2025
3b2f8cf
Update icr_alma.config
rachelicr Feb 26, 2025
71c3dfa
Update Seattle Children's profile for new HPC
lvclark Feb 26, 2025
2089f5b
Additional locations to list profile
lvclark Feb 26, 2025
c6d5e1d
Merge pull request #848 from ICR-RSE-Group/icr_alma
jfy133 Feb 26, 2025
c596674
Profile name
lvclark Feb 26, 2025
4dbea4c
Merge branch 'master' into lvclark-patch1
maxulysse Feb 26, 2025
cbb07e5
[automated] Fix code linting
nf-core-bot Feb 26, 2025
6e1eab7
Dummy value for params.assoc to pass testing
lvclark Feb 26, 2025
1bea41d
Undo commit for merge
lvclark Feb 26, 2025
71d0114
Merge branch 'lvclark-patch1' of https://github.com/lvclark/configs i…
lvclark Feb 26, 2025
34243b7
Dummy value for params.assoc to pass testing
lvclark Feb 26, 2025
3aa836b
More specific config URL
lvclark Feb 26, 2025
3e359c4
Assoc as environmental variable, and no workDir
lvclark Feb 26, 2025
1cab186
Merge pull request #849 from lvclark/lvclark-patch1
jfy133 Feb 27, 2025
c553e68
Update seadragon.config
jiawku Mar 3, 2025
76b7b66
Merge pull request #850 from jiawku/patch-1
jfy133 Mar 4, 2025
26c4436
Update engaging.config by changing partition, updating resource limit…
bumproo Mar 4, 2025
b8ce297
Update engaging.config to correct profile contact to git username
bumproo Mar 4, 2025
3a09763
Update ki_luria.config so profile contact is correct
bumproo Mar 4, 2025
b4d1e74
Update engaging.config fixing line 24 space number
bumproo Mar 4, 2025
f662d22
Merge pull request #851 from nf-core/bumproo-patch-1
jfy133 Mar 4, 2025
f08341b
Update roslin.config - Remove -l rl9=false option
sguizard Mar 4, 2025
eafc679
Fixing typos
sguizard Mar 4, 2025
2919ef4
Merge pull request #853 from sguizard/master
sguizard Mar 5, 2025
490bf14
Updated cluster profile
alexnater Mar 5, 2025
f36f295
Updated documentation of unibe_ibu profile
alexnater Mar 5, 2025
ada21e6
Removed beforeScript and added information about project to docs
alexnater Mar 5, 2025
a2c974a
Update mjolnir_globe.config
bentpetersendk Mar 5, 2025
e3a72c4
Merge pull request #857 from bentpetersendk/master
jfy133 Mar 5, 2025
49336c8
Merge pull request #855 from alexnater/unibe_ibu
alexnater Mar 5, 2025
13def2e
Update eva.config
TCLamnidis Mar 6, 2025
f75197d
Merge pull request #858 from nf-core/update_eager_gatk_hc_eva
TCLamnidis Mar 6, 2025
62b5da4
modify for test with large run
MahShaaban Mar 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ jobs:
- "hasta"
- "hki"
- "hypatia"
- "icr_alma"
- "icr_davros"
- "ifb_core"
- "imb"
Expand Down Expand Up @@ -138,6 +139,7 @@ jobs:
- "sanger"
- "scw"
- "seadragon"
- "seattlechildrens"
- "seawulf"
- "seg_globe"
- "self_hosted_runner"
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ Currently documentation is available for the following systems:
- [HKI](docs/hki.md)
- [HYPATIA](docs/hypatia.md)
- [ICR_DAVROS](docs/icr_davros.md)
- [ICR_ALMA](docs/icr_alma.md)
- [IFB](docs/ifb_core.md)
- [ILIFU](docs/ilifu.md)
- [IMPERIAL](docs/imperial.md)
Expand Down
16 changes: 10 additions & 6 deletions conf/engaging.config
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Nextflow config for running on the MIT Engaging HPC cluster
params {
config_profile_description = 'MIT Engaging HPC cluster profile.'
config_profile_contact = 'Phil Palmer (@PhilPalmer)'
config_profile_contact = 'Charlie Whittaker (@bumproo)'
config_profile_url = "https://engaging-web.mit.edu/eofe-wiki/"
}

Expand All @@ -12,16 +12,20 @@ singularity {

process {
resourceLimits = [
memory: 64.GB,
memory: 128.GB,
cpus: 16,
time: 12.h
time: 10.h
]
executor = 'slurm'
clusterOptions = '-p sched_mit_hill'
clusterOptions = '-p mit_normal'
}

executor {
queueSize = 8
}

params {
max_memory = 64.GB
max_memory = 128.GB
max_cpus = 16
max_time = 12.h
max_time = 10.h
}
59 changes: 59 additions & 0 deletions conf/icr_alma.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@

/*
* -------------------------------------------------
* Nextflow nf-core config file for ICR alma HPC
* -------------------------------------------------
* Defines slurm process executor and singularity
* settings.
*
*/
params {

config_profile_description = "Nextflow nf-core profile for ICR alma HPC"
config_profile_contact = "Rachel Alcraft (@rachelicr), Mira Sarkis (@msarkis-icr)"
// max_memory = 256.GB
// max_cpus = 30
// max_time = 5.d
Copy link

@msarkis-icr msarkis-icr Mar 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These values specify resource limits for tasks running on the compute node. This is useful for setting upper bounds on resource usage, especially when using dynamic resource allocation.
Any process asking for excessive resources, will fail!

Also this Allow for dynamic resource allocation within these limits.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding is, using the max_* in params applies globally to the whole workflow. I could be wrong. Do we want to limit a run to these resources?

}

process {
queue="compute"
executor = "slurm"
maxRetries = 3
maxErrors = '-1'

errorStrategy = { task.exitStatus in [137,255] ? 'retry' : 'terminate' }

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

before merging to nf-core, we had
errorStrategy = { task.exitStatus in [143,137,104,134,139,140,247,255] ? 'retry' : 'finish' }
The reviewer told us that we don't need to set this, as usually each pipeline has its own errorStrategty.

To be honest, I have run through 255 code error, and retrying was a waste of resources..
The only solution was to increase the mem/cpu or time!

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. The issue here is that the current config ties the allocated memory to the number of cpus, which is not what some processes expect. This resulted in an error 255, in my case, and I could increase the mem/cpu by using a retry error strategy and task.attempt in a custom config

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In addition, retry does request more resources if the process resource allocation is done with # * task.attempt which is the case in sarek.

withName: ".*" { time = 5.d }
Copy link

@msarkis-icr msarkis-icr Mar 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Setting a fixed time limit of 5 days for all processes could lead to inefficient resource usage. Some tasks might finish much quicker...
Additionally, processes that require more resources (time, memory, or CPUs) are given lower priority in the queue. This means if the cluster is busy, you will risk waiting for too long before getting a chance to run

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed. There maybe a better way to increase time without hard coding to 5.d. For example use it with task.attempt


clusterOptions = '--mem-per-cpu=8000'

resourceLimits = [
memory: 256.GB,
cpus: 30,
time: 5.d
]

}
// Preform work directory cleanup after a successful run?
cleanup = false

executor {
// This is set because of an issue with too many
// singularity containers launching at once, they
// cause an singularity error with exit code 255.
// submitRateLimit = "2 sec"
Copy link

@msarkis-icr msarkis-icr Mar 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As the above comment states, Alma gets overwhelmed when many processes are fired simultaneously.
Submitting one job each 2 sec seems acceptable.

By unsetting it, we allow an "unlimited" number of jobs to be launched simultaneously, which is not good!

Im curious as why would you want to remove this? and how beneficial it could be for the long runs?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see the need to limit simultanous launches. I just think "2 sec" penalizes smaller quick processes. queueSize may be a suitable alternative.

queueSize = 50

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok for queueSize, but again I don't see how it could be beneficial for long runs?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Above

perCpuMemAllocation = true
}

singularity {
enabled = true
// runOptions = "--bind /mnt:/mnt --bind /data:/data"
autoMounts = true
// pullTimeout = 2.h
// cacheDir = '/data/scratch/shared/SINGULARITY-DOWNLOAD/nextflow/.singularity'

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Im confused why would you avoid using the cacheDir?
To my understanding, runOptions, pullTimeOut and cacheDir will only be taken into account if pulling an image.
In the case where there are no images to pull, nothing will happen.

Again, this is a generic config that is meant to work for most cases.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True. The existance of these run-options gave the impression that I could just excute the run without pre downloading the containers, which still fails. I needed to predownload the containers to a separate cache, and override the cashe dir param in a custom config.

}




2 changes: 1 addition & 1 deletion conf/ki_luria.config
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Nextflow config for running on the Koch Institute of MIT Luria HPC cluster
params {
config_profile_description = 'KI at MIT Luria HPC cluster profile.'
config_profile_contact = 'Charlie Whittaker (@Charlie14557807)'
config_profile_contact = 'Charlie Whittaker (@bumproo)'
config_profile_url = "https://igb.mit.edu/computing-resources/luria-cluster"
}

Expand Down
7 changes: 3 additions & 4 deletions conf/mjolnir_globe.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,19 @@ params {
config_profile_contact = 'Bent Petersen (@bentpetersendk)'
config_profile_url = 'https://globe.ku.dk/research/'
max_memory = 750.GB
max_cpus = 50
max_cpus = 48
max_time = 336.h
}

singularity {
enabled = true
autoMounts = true
cacheDir = '/maps/projects/mjolnir1/data/cache/nf-core/singularity'
}

process {
resourceLimits = [
memory: 750.GB,
cpus: 50,
cpus: 48,
time: 336.h
]
executor = 'slurm'
Expand All @@ -26,5 +25,5 @@ process {
cleanup = true

executor {
queueSize = 10
queueSize = 20
}
5 changes: 5 additions & 0 deletions conf/pipeline/eager/eva.config
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,11 @@ process {
errorStrategy = { task.exitStatus in [1, 143, 137, 104, 134, 139, 140] ? 'retry' : 'finish' }
}

withName: genotyping_hc {
clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 2)}G" }
errorStrategy = { task.exitStatus in [1, 143, 137, 104, 134, 139, 140] ? 'retry' : 'finish' }
}

withName: get_software_versions {
cache = false
clusterOptions = { "-S /bin/bash -V -l h=!(bionode06)" }
Expand Down
13 changes: 8 additions & 5 deletions conf/roslin.config
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@ params {
config_profile_description = 'University of Edinburgh (Eddie) cluster profile for Roslin Institute provided by nf-core/configs.'
config_profile_contact = 'Sebastien Guizard (@sguizard) and Donald Dunbar (@ddunbar)'
config_profile_url = 'https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing'

}

executor {
name = "sge"

}

process {
stageInMode = 'symlink'
scratch = 'false'
penv = { task.cpus > 1 ? "sharedmem" : null }
penv = { task.cpus > 1 ? "sharedmem" : null }

// To date (16/08/2024), the FastQC module is still broken.
// More details here: https://github.com/nf-core/modules/pull/6156
Expand All @@ -24,9 +26,9 @@ process {
// Check if an environment variable NFX_SGE_PROJECT exists, if yes, use the stored value for -P option
// Otherwise set the project to uoe_baseline
if (System.getenv('NFX_SGE_PROJECT')) {
clusterOptions = {"-l rl9=false -l h=!node1d01 -l h_vmem=10G -pe sharedmem 5 -P $NFX_SGE_PROJECT"}
clusterOptions = {"-l h=!node1d01 -l h_vmem=10G -pe sharedmem 5 -P $NFX_SGE_PROJECT"}
} else {
clusterOptions = {"-l rl9=false -l h=!node1d01 -l h_vmem=10G -pe sharedmem 5 -P uoe_baseline"}
clusterOptions = {"-l h=!node1d01 -l h_vmem=10G -pe sharedmem 5 -P uoe_baseline"}
}
}

Expand All @@ -41,9 +43,9 @@ process {
// Check if an environment variable NFX_SGE_PROJECT exists, if yes, use the stored value for -P option
// Otherwise set the project to uoe_baseline
if (System.getenv('NFX_SGE_PROJECT')) {
clusterOptions = {"-l rl9=false -l h=!node1d01 -l h_vmem=${(task.memory + 8.GB).bytes/task.cpus} -P $NFX_SGE_PROJECT"}
clusterOptions = {"-l h=!node1d01 -l h_vmem=${(task.memory + 8.GB).bytes/task.cpus} -P $NFX_SGE_PROJECT"}
} else {
clusterOptions = {"-l rl9=false -l h=!node1d01 -l h_vmem=${(task.memory + 8.GB).bytes/task.cpus} -P uoe_baseline"}
clusterOptions = {"-l h=!node1d01 -l h_vmem=${(task.memory + 8.GB).bytes/task.cpus} -P uoe_baseline"}
}
}

Expand Down Expand Up @@ -76,3 +78,4 @@ singularity {
autoMounts = true
cacheDir = '/exports/cmvm/eddie/eb/groups/alaw3_eb_singularity_cache'
}

42 changes: 11 additions & 31 deletions conf/seadragon.config
Original file line number Diff line number Diff line change
Expand Up @@ -24,33 +24,16 @@ singularity {
def membership = "groups".execute().text

def select_queue = { memory, cpu, walltime ->
// Cdragon queues
if (memory <= 168.GB && cpu <= 28) {
if (memory <= 950.GB && cpu <= 80) {
if (walltime <= 3.h) return 'short'
if (walltime <= 24.h) return 'medium'
if (walltime <= 240.h) return 'long'
}

// Edragon E40 queues
if (memory <= 475.GB && cpu <= 40) {
if (walltime <= 3.h) return 'e40short'
if (walltime <= 24.h) return 'e40medium'
if (walltime <= 240.h) return 'e40long'
}

// Edragon E80 queues
if (memory <= 950.GB && cpu <= 80) {
if (walltime <= 3.h) return 'e80short'
if (walltime <= 24.h) return 'e80medium'
if (walltime <= 240.h) return 'e80long'
if (walltime <= 504.h) return 'vlong'
}

// High memory queues
if (memory <= 1900.GB && cpu <= 35) {
if (walltime <= 240.h) return 'highmem'
}
if (memory <= 2900.GB && cpu <= 24) {
if (walltime <= 240.h) return 'vhighmem'
if (memory <= 3900.GB && cpu <= 80 && walltime <= 504.h) {
if (walltime <= 240.h) return 'evhighmem'
}

throw new IllegalArgumentException("No matching queue for memory=${memory}, cpu=${cpu}, time=${time}")
Expand All @@ -72,24 +55,21 @@ executor {

process {
resourceLimits = [
memory: 2900.GB, // Max memory based on vhighmem node
memory: 3900.GB, // Max memory based on vhighmem node
cpus: 80, // Max CPUs based on E80 node
time: 240.h // Max time for long queues
time: 504.h // Max time for long queues
]

executor = 'lsf' // Use LSF executor

memory = { task.memory ?: params.default_memory }
cpus = { task.cpus ?: params.default_cpus }
time = { task.time ?: params.default_time }

cpus = { 2 * task.attempt }
memory = { 12.GB * task.attempt }
time = { 3.h * task.attempt }

maxRetries = 3
afterScript = 'sleep 10' // Prevent abrupt re-submissions after retries

queue = { select_queue(task.memory, task.cpus, task.time) } // Use the updated select_queue function


withLabel:process_gpu {
cpus = { 40 } // Use Gdragon nodes
memory = { 168.GB } // Max memory for GPU nodes
Expand All @@ -98,9 +78,9 @@ process {
}

params {
max_memory = 2900.GB // Maximum memory based on vhighmem nodes
max_memory = 3900.GB // Maximum memory based on evhighmem nodes
max_cpus = 80 // Maximum CPUs based on E80 nodes
max_time = 240.h // Maximum runtime for long queues
max_time = 504.h // Maximum runtime for evlong queues
igenomes_base = '/rsrch3/scratch/reflib/REFLIB_data/AWS-iGenomes'
}

Expand Down
43 changes: 23 additions & 20 deletions conf/seattlechildrens.config
Original file line number Diff line number Diff line change
@@ -1,30 +1,33 @@
//Create profiles to easily switch between the different process executors and platforms.
def assoc = System.getenv("ASSOC") // Association belonging to a lab or project

//global parameters
params {
config_profile_description = 'The SCRI (seattle childrens research institute) cluster profile'
config_profile_contact = 'Research Scientific Computing (@RSC-RP)'
config_profile_url = 'https://github.com/RSC-RP'
config_profile_url = 'https://github.com/RSC-RP/nextflow_scri_config'
}

//workDir = "/data/hps/assoc/private/${assoc}/user/$USER/temp"

// SCRI HPC project params
queue = "paidq"
// freeq
project = "${params.project}"
process {
executor = 'slurm'
queue = 'cpu-core-sponsored'
memory = 7500.MB
time = '72h'
clusterOptions = "--account cpu-${assoc}-sponsored"
}

docker {
enabled = false
}

singularity {
enabled = true
autoMounts = true
cacheDir = "/data/hps/assoc/private/${assoc}/container"
runOptions = '--containall --no-home'
}

profiles {
//For running on an interactive session on cybertron with singularity module loaded
local_singularity {
process.executor = 'local'
singularity.enabled = true
}
//For executing the jobs on the HPC cluster with singularity containers
PBS_singularity {
process.executor = 'pbspro'
process.queue = "${params.queue}"
process.clusterOptions = "-P ${params.project}"
process.beforeScript = 'module load singularity'
singularity.enabled = true
}
executor {
queueSize = 2000
}
24 changes: 15 additions & 9 deletions conf/unibe_ibu.config
Original file line number Diff line number Diff line change
@@ -1,25 +1,31 @@
params {
config_profile_description = "University of Bern, Interfaculty Bioinformatics Unit cluster profile"
config_profile_contact = "irene.keller@dbmr.unibe.ch; [email protected]"
config_profile_contact = "alexander.nater@unibe.ch; [email protected]"
config_profile_url = "https://www.bioinformatics.unibe.ch/"
max_memory = 500.GB
max_cpus = 128
max_time = 240.h
schema_ignore_params = "project,clusterOptions"
project = null
clusterOptions = null
}

validation {
ignoreParams = ["schema_ignore_params", "project", "clusterOptions"]
}

process {
resourceLimits = [
memory: 500.GB,
cpus: 128,
time: 240.h
time: 672.h
]
executor = "slurm"
maxRetries = 2
beforeScript = 'mkdir -p ./tmp/ && export TMPDIR=./tmp/'
executor = 'slurm'
queue = 'pibu_el8'
maxRetries = 2
scratch = '$SCRATCH'
clusterOptions = (params.project ? "-A ${params.project} " : '') + "${params.clusterOptions ?: ''}"
}

executor {
queueSize = 30
queueSize = 50
}

singularity {
Expand Down
Loading