Skip to content

Commit 6d7fcf8

Browse files
committed
Remove check_max and fix foldseek for k8s (again).
1 parent 6563003 commit 6d7fcf8

File tree

4 files changed

+47
-91
lines changed

4 files changed

+47
-91
lines changed

conf/base.config

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -10,51 +10,51 @@
1010

1111
process {
1212

13-
cpus = { check_max( 1 * task.attempt, 'cpus' ) }
14-
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
15-
time = { check_max( 4.h * task.attempt, 'time' ) }
13+
cpus = 1
14+
memory = { 6.GB * task.attempt }
15+
time = { 4.h * task.attempt }
1616

1717
errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
1818
maxRetries = 1
1919
maxErrors = '-1'
2020

2121
// Process-specific resource requirements
2222
withLabel:process_single {
23-
cpus = { check_max( 1 , 'cpus' ) }
24-
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
25-
time = { check_max( 4.h * task.attempt, 'time' ) }
23+
cpus = 1
24+
memory = { 6.GB * task.attempt }
25+
time = { 4.h * task.attempt }
2626
}
2727
withLabel:process_low {
28-
cpus = { check_max( 2 * task.attempt, 'cpus' ) }
29-
memory = { check_max( 12.GB * task.attempt, 'memory' ) }
30-
time = { check_max( 4.h * task.attempt, 'time' ) }
28+
cpus = { 2 * task.attempt }
29+
memory = { 12.GB * task.attempt }
30+
time = { 4.h * task.attempt }
3131
}
3232
withLabel:process_medium {
33-
cpus = { check_max( 6 * task.attempt, 'cpus' ) }
34-
memory = { check_max( 36.GB * task.attempt, 'memory' ) }
35-
time = { check_max( 8.h * task.attempt, 'time' ) }
33+
cpus = { 6 * task.attempt }
34+
memory = { 36.GB * task.attempt }
35+
time = { 8.h * task.attempt }
3636
}
3737
withLabel:process_high {
38-
cpus = { check_max( 12 * task.attempt, 'cpus' ) }
39-
memory = { check_max( 72.GB * task.attempt, 'memory' ) }
40-
time = { check_max( 16.h * task.attempt, 'time' ) }
38+
cpus = { 12 * task.attempt }
39+
memory = { 72.GB * task.attempt }
40+
time = { 16.h * task.attempt }
4141
}
4242
withLabel:process_long {
43-
time = { check_max( 20.h * task.attempt, 'time' ) }
43+
time = { 20.h * task.attempt }
4444
}
4545
withLabel:process_high_memory {
46-
memory = { check_max( 200.GB * task.attempt, 'memory' ) }
46+
memory = { 200.GB * task.attempt }
4747
}
4848

4949
// GPU-specific labels for Boltzgen
5050
withLabel:process_high_gpu {
5151
// Boltzgen requires GPU and substantial memory
52-
cpus = { check_max( 8 * task.attempt, 'cpus' ) }
53-
memory = { check_max( 64.GB * task.attempt, 'memory' ) }
54-
time = { check_max( 48.h * task.attempt, 'time' ) }
52+
cpus = { 8 * task.attempt }
53+
memory = { 64.GB * task.attempt }
54+
time = { 48.h * task.attempt }
5555

5656
// GPU configuration - request 1 GPU by default
57-
accelerator = { check_max( 1, 'gpus' ) }
57+
accelerator = 1
5858
clusterOptions = { task.ext.clusterOptions ?: '' }
5959

6060
// Container GPU access for Docker
@@ -63,38 +63,38 @@ process {
6363

6464
// GPU label for processes that can optionally use GPU
6565
withLabel:process_medium_gpu {
66-
cpus = { check_max( 6 * task.attempt, 'cpus' ) }
67-
memory = { check_max( 36.GB * task.attempt, 'memory' ) }
68-
time = { check_max( 8.h * task.attempt, 'time' ) }
66+
cpus = { 6 * task.attempt }
67+
memory = { 36.GB * task.attempt }
68+
time = { 8.h * task.attempt }
6969

7070
// GPU configuration - request 1 GPU
71-
accelerator = { check_max( 1, 'gpus' ) }
71+
accelerator = 1
7272

7373
// Container GPU access for Docker
7474
containerOptions = '--gpus all'
7575
}
7676

7777
withName:BOLTZGEN_RUN {
7878
// Extended time for large design runs
79-
time = { check_max( 72.h * task.attempt, 'time' ) }
79+
time = { 72.h * task.attempt }
8080

8181
// Increase memory for large num_designs
82-
memory = { 40.GB * task.attempt }
82+
memory = { 40.GB * task.attempt }
8383

8484
// Request 1 GPU - Boltzgen uses single GPU efficiently
85-
accelerator = { check_max( 1, 'gpus' ) }
85+
accelerator = 1
8686
containerOptions = '--gpus all'
8787
}
8888

8989
withName:PROTEINMPNN_OPTIMIZE {
9090
// ProteinMPNN can benefit significantly from GPU acceleration
9191
// The model is PyTorch-based and CUDA-compatible
92-
cpus = { check_max( 4 * task.attempt, 'cpus' ) }
93-
memory = { check_max( 16.GB * task.attempt, 'memory' ) }
94-
time = { check_max( 6.h * task.attempt, 'time' ) }
92+
cpus = { 4 * task.attempt }
93+
memory = { 16.GB * task.attempt }
94+
time = { 6.h * task.attempt }
9595

9696
// Request 1 GPU for ProteinMPNN inference
97-
accelerator = { check_max( 1, 'gpus' ) }
97+
accelerator = 1
9898

9999
// Container GPU access for Docker
100100
containerOptions = '--gpus all'
@@ -103,20 +103,20 @@ process {
103103
withName:FOLDSEEK_SEARCH {
104104
// Foldseek supports GPU acceleration for searches
105105
// GPU provides 4-27x speedup for structure alignment
106-
cpus = { check_max( 8 * task.attempt, 'cpus' ) }
107-
memory = { check_max( 32.GB * task.attempt, 'memory' ) }
108-
time = { check_max( 4.h * task.attempt, 'time' ) }
106+
cpus = { 8 * task.attempt }
107+
memory = { 32.GB * task.attempt }
108+
time = { 4.h * task.attempt }
109109

110110
// Request 1 GPU for accelerated searches
111-
accelerator = { check_max( 1, 'gpus' ) }
111+
accelerator = 1
112112

113113
// Container GPU access for Docker + override entrypoint to allow bash scripts
114114
containerOptions = '--gpus all --entrypoint ""'
115115
}
116116

117117
withName:BOLTZ2_REFOLD {
118-
accelerator = { check_max( 1, 'gpus' ) }
119-
memory = { check_max( 32.GB * task.attempt, 'memory' ) }
118+
accelerator = 1
119+
memory = { 32.GB * task.attempt }
120120
containerOptions = '--gpus all -e TORCH_FLOAT32_MATMUL_PRECISION=medium'
121121
}
122122
}

modules/local/foldseek_search.nf

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,31 +12,25 @@ process FOLDSEEK_SEARCH {
1212

1313
input:
1414
tuple val(meta), path(structure)
15-
path database
15+
path database_dir
1616

1717
output:
1818
tuple val(meta), path("${meta.id}_foldseek_results.tsv"), emit: results
1919
tuple val(meta), path("${meta.id}_foldseek_summary.tsv"), emit: summary
2020
path "versions.yml", emit: versions
2121

2222
script:
23-
def db_path = database.name != 'NO_DATABASE' ? database : params.foldseek_database
2423
def evalue = params.foldseek_evalue ?: 0.001
2524
def max_seqs = params.foldseek_max_seqs ?: 100
2625
def sensitivity = params.foldseek_sensitivity ?: 9.5
2726
def coverage = params.foldseek_coverage ?: 0.0
2827
def alignment_type = params.foldseek_alignment_type ?: 2
2928
def threads = task.cpus
3029

31-
// Validate database
32-
if (!db_path) {
33-
error "ERROR: No Foldseek database specified. Please set --foldseek_database parameter."
34-
}
35-
3630
"""
3731
/usr/local/bin/foldseek_avx2 easy-search \\
3832
${structure} \\
39-
${db_path}/afdb \\
33+
${database_dir}/afdb \\
4034
${meta.id}_foldseek_results.tsv \\
4135
tmp_foldseek \\
4236
-e ${evalue} \\

nextflow.config

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -170,42 +170,3 @@ profiles {
170170
includeConfig 'conf/test_design_protein.config'
171171
}
172172
}
173-
174-
// Function to ensure that resource requirements don't go beyond maximum limit
175-
def check_max(obj, type) {
176-
if (type == 'memory') {
177-
try {
178-
if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
179-
return params.max_memory as nextflow.util.MemoryUnit
180-
else
181-
return obj
182-
} catch (all) {
183-
println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj"
184-
return obj
185-
}
186-
} else if (type == 'time') {
187-
try {
188-
if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
189-
return params.max_time as nextflow.util.Duration
190-
else
191-
return obj
192-
} catch (all) {
193-
println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj"
194-
return obj
195-
}
196-
} else if (type == 'cpus') {
197-
try {
198-
return Math.min( obj, params.max_cpus as int )
199-
} catch (all) {
200-
println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
201-
return obj
202-
}
203-
} else if (type == 'gpus') {
204-
try {
205-
return Math.min( obj, params.max_gpus as int )
206-
} catch (all) {
207-
println " ### ERROR ### Max gpus '${params.max_gpus}' is not valid! Using default value: $obj"
208-
return obj
209-
}
210-
}
211-
}

workflows/protein_design.nf

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -292,14 +292,15 @@ workflow PROTEIN_DESIGN {
292292
// Search for structural homologs of both Boltzgen and Protenix structures
293293
// in the AlphaFold database (or other specified database)
294294
if (params.run_foldseek) {
295-
// Prepare database channel
296-
if (params.foldseek_database) {
297-
ch_foldseek_database = Channel.fromPath(params.foldseek_database, checkIfExists: true).first()
298-
} else {
299-
log.warn "⚠️ Foldseek is enabled but no database specified. Please set --foldseek_database parameter."
300-
ch_foldseek_database = Channel.value(file('NO_DATABASE'))
295+
// Validate and prepare database channel
296+
if (!params.foldseek_database) {
297+
error "ERROR: Foldseek is enabled but no database specified. Please set --foldseek_database parameter."
301298
}
302299

300+
// Create channel from database directory path
301+
ch_foldseek_database = Channel.fromPath(params.foldseek_database, type: 'dir', checkIfExists: true).first()
302+
303+
303304
// ====================================================================
304305
// Process Boltz-2 refolded structures
305306
// ====================================================================

0 commit comments

Comments
 (0)