Remove check_max and fix foldseek for k8s (again).

FloWuenne · FloWuenne · commit 6d7fcf8d4d54 · 2025-11-30T22:06:40.000Z
diff --git a/conf/base.config b/conf/base.config
@@ -10,51 +10,51 @@
 
 process {
 
-    cpus   = { check_max( 1    * task.attempt, 'cpus'   ) }
-    memory = { check_max( 6.GB * task.attempt, 'memory' ) }
-    time   = { check_max( 4.h  * task.attempt, 'time'   ) }
+    cpus   = 1
+    memory = { 6.GB * task.attempt }
+    time   = { 4.h  * task.attempt }
 
     errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
     maxRetries    = 1
     maxErrors     = '-1'
 
     // Process-specific resource requirements
     withLabel:process_single {
-        cpus   = { check_max( 1                  , 'cpus'    ) }
-        memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 4.h  * task.attempt, 'time'    ) }
+        cpus   = 1
+        memory = { 6.GB * task.attempt }
+        time   = { 4.h  * task.attempt }
     }
     withLabel:process_low {
-        cpus   = { check_max( 2     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 12.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 4.h   * task.attempt, 'time'    ) }
+        cpus   = { 2     * task.attempt }
+        memory = { 12.GB * task.attempt }
+        time   = { 4.h   * task.attempt }
     }
     withLabel:process_medium {
-        cpus   = { check_max( 6     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 36.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 8.h   * task.attempt, 'time'    ) }
+        cpus   = { 6     * task.attempt }
+        memory = { 36.GB * task.attempt }
+        time   = { 8.h   * task.attempt }
     }
     withLabel:process_high {
-        cpus   = { check_max( 12    * task.attempt, 'cpus'    ) }
-        memory = { check_max( 72.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 16.h  * task.attempt, 'time'    ) }
+        cpus   = { 12    * task.attempt }
+        memory = { 72.GB * task.attempt }
+        time   = { 16.h  * task.attempt }
     }
     withLabel:process_long {
-        time   = { check_max( 20.h  * task.attempt, 'time'    ) }
+        time   = { 20.h  * task.attempt }
     }
     withLabel:process_high_memory {
-        memory = { check_max( 200.GB * task.attempt, 'memory' ) }
+        memory = { 200.GB * task.attempt }
     }
     
     // GPU-specific labels for Boltzgen
     withLabel:process_high_gpu {
         // Boltzgen requires GPU and substantial memory
-        cpus          = { check_max( 8     * task.attempt, 'cpus'    ) }
-        memory        = { check_max( 64.GB * task.attempt, 'memory'  ) }
-        time          = { check_max( 48.h  * task.attempt, 'time'    ) }
+        cpus          = { 8     * task.attempt }
+        memory        = { 64.GB * task.attempt }
+        time          = { 48.h  * task.attempt }
         
         // GPU configuration - request 1 GPU by default
-        accelerator   = { check_max( 1, 'gpus' ) }
+        accelerator   = 1
         clusterOptions = { task.ext.clusterOptions ?: '' }
         
         // Container GPU access for Docker
@@ -63,38 +63,38 @@ process {
     
     // GPU label for processes that can optionally use GPU
     withLabel:process_medium_gpu {
-        cpus          = { check_max( 6     * task.attempt, 'cpus'    ) }
-        memory        = { check_max( 36.GB * task.attempt, 'memory'  ) }
-        time          = { check_max( 8.h   * task.attempt, 'time'    ) }
+        cpus          = { 6     * task.attempt }
+        memory        = { 36.GB * task.attempt }
+        time          = { 8.h   * task.attempt }
         
         // GPU configuration - request 1 GPU
-        accelerator   = { check_max( 1, 'gpus' ) }
+        accelerator   = 1
         
         // Container GPU access for Docker
         containerOptions = '--gpus all'
     }
     
     withName:BOLTZGEN_RUN {
         // Extended time for large design runs
-        time          = { check_max( 72.h  * task.attempt, 'time'    ) }
+        time          = { 72.h  * task.attempt }
         
         // Increase memory for large num_designs
-        memory        = {  40.GB * task.attempt }
+        memory        = { 40.GB * task.attempt }
         
         // Request 1 GPU - Boltzgen uses single GPU efficiently
-        accelerator = { check_max( 1, 'gpus' ) }
+        accelerator = 1
         containerOptions = '--gpus all'
     }
     
     withName:PROTEINMPNN_OPTIMIZE {
         // ProteinMPNN can benefit significantly from GPU acceleration
         // The model is PyTorch-based and CUDA-compatible
-        cpus   = { check_max( 4     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 16.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 6.h   * task.attempt, 'time'    ) }
+        cpus   = { 4     * task.attempt }
+        memory = { 16.GB * task.attempt }
+        time   = { 6.h   * task.attempt }
         
         // Request 1 GPU for ProteinMPNN inference
-        accelerator = { check_max( 1, 'gpus' ) }
+        accelerator = 1
         
         // Container GPU access for Docker
         containerOptions = '--gpus all'
@@ -103,20 +103,20 @@ process {
     withName:FOLDSEEK_SEARCH {
         // Foldseek supports GPU acceleration for searches
         // GPU provides 4-27x speedup for structure alignment
-        cpus   = { check_max( 8     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 32.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 4.h   * task.attempt, 'time'    ) }
+        cpus   = { 8     * task.attempt }
+        memory = { 32.GB * task.attempt }
+        time   = { 4.h   * task.attempt }
 
         // Request 1 GPU for accelerated searches
-        accelerator = { check_max( 1, 'gpus' ) }
+        accelerator = 1
 
         // Container GPU access for Docker + override entrypoint to allow bash scripts
         containerOptions = '--gpus all --entrypoint ""'
     }
 
     withName:BOLTZ2_REFOLD {
-        accelerator = { check_max( 1, 'gpus' ) }
-        memory = { check_max( 32.GB * task.attempt, 'memory'  ) }
+        accelerator = 1
+        memory = { 32.GB * task.attempt }
         containerOptions = '--gpus all -e TORCH_FLOAT32_MATMUL_PRECISION=medium'
     }
 }
diff --git a/modules/local/foldseek_search.nf b/modules/local/foldseek_search.nf
@@ -12,31 +12,25 @@ process FOLDSEEK_SEARCH {
 
     input:
     tuple val(meta), path(structure)
-    path database
+    path database_dir
 
     output:
     tuple val(meta), path("${meta.id}_foldseek_results.tsv"), emit: results
     tuple val(meta), path("${meta.id}_foldseek_summary.tsv"), emit: summary
     path "versions.yml", emit: versions
 
     script:
-    def db_path = database.name != 'NO_DATABASE' ? database : params.foldseek_database
     def evalue = params.foldseek_evalue ?: 0.001
     def max_seqs = params.foldseek_max_seqs ?: 100
     def sensitivity = params.foldseek_sensitivity ?: 9.5
     def coverage = params.foldseek_coverage ?: 0.0
     def alignment_type = params.foldseek_alignment_type ?: 2
     def threads = task.cpus
 
-    // Validate database
-    if (!db_path) {
-        error "ERROR: No Foldseek database specified. Please set --foldseek_database parameter."
-    }
-
     """
     /usr/local/bin/foldseek_avx2 easy-search \\
         ${structure} \\
-        ${db_path}/afdb \\
+        ${database_dir}/afdb \\
         ${meta.id}_foldseek_results.tsv \\
         tmp_foldseek \\
         -e ${evalue} \\
diff --git a/nextflow.config b/nextflow.config
@@ -170,42 +170,3 @@ profiles {
         includeConfig 'conf/test_design_protein.config'
     }
 }
-
-// Function to ensure that resource requirements don't go beyond maximum limit
-def check_max(obj, type) {
-    if (type == 'memory') {
-        try {
-            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
-                return params.max_memory as nextflow.util.MemoryUnit
-            else
-                return obj
-        } catch (all) {
-            println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
-            return obj
-        }
-    } else if (type == 'time') {
-        try {
-            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
-                return params.max_time as nextflow.util.Duration
-            else
-                return obj
-        } catch (all) {
-            println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
-            return obj
-        }
-    } else if (type == 'cpus') {
-        try {
-            return Math.min( obj, params.max_cpus as int )
-        } catch (all) {
-            println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
-            return obj
-        }
-    } else if (type == 'gpus') {
-        try {
-            return Math.min( obj, params.max_gpus as int )
-        } catch (all) {
-            println "   ### ERROR ###   Max gpus '${params.max_gpus}' is not valid! Using default value: $obj"
-            return obj
-        }
-    }
-}
diff --git a/workflows/protein_design.nf b/workflows/protein_design.nf
@@ -292,14 +292,15 @@ workflow PROTEIN_DESIGN {
     // Search for structural homologs of both Boltzgen and Protenix structures
     // in the AlphaFold database (or other specified database)
     if (params.run_foldseek) {
-        // Prepare database channel
-        if (params.foldseek_database) {
-            ch_foldseek_database = Channel.fromPath(params.foldseek_database, checkIfExists: true).first()
-        } else {
-            log.warn "⚠️  Foldseek is enabled but no database specified. Please set --foldseek_database parameter."
-            ch_foldseek_database = Channel.value(file('NO_DATABASE'))
+        // Validate and prepare database channel
+        if (!params.foldseek_database) {
+            error "ERROR: Foldseek is enabled but no database specified. Please set --foldseek_database parameter."
         }
         
+        // Create channel from database directory path
+        ch_foldseek_database = Channel.fromPath(params.foldseek_database, type: 'dir', checkIfExists: true).first()
+
+        
         // ====================================================================
         // Process Boltz-2 refolded structures
         // ====================================================================