nf-core · eduard-watchmaker · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025
diff --git a/subworkflows/nf-core/fastq_align_dedup_bwamem/main.nf b/subworkflows/nf-core/fastq_align_dedup_bwamem/main.nf
@@ -1,6 +1,8 @@
-include { FASTQ_ALIGN_BWA                                   } from '../fastq_align_bwa/main'
+include { BAM_SORT_STATS_SAMTOOLS                           } from '../../nf-core/bam_sort_stats_samtools/main'
+include { FASTQ_ALIGN_BWA                                   } from '../../nf-core/fastq_align_bwa/main'
 include { PICARD_ADDORREPLACEREADGROUPS                     } from '../../../modules/nf-core/picard/addorreplacereadgroups/main'
 include { PICARD_MARKDUPLICATES                             } from '../../../modules/nf-core/picard/markduplicates/main'  
+include { PARABRICKS_FQ2BAM                                 } from '../../../modules/nf-core/parabricks/fq2bam/main'
 include { SAMTOOLS_INDEX                                    } from '../../../modules/nf-core/samtools/index/main'
 
 workflow FASTQ_ALIGN_DEDUP_BWAMEM {
@@ -12,6 +14,8 @@ workflow FASTQ_ALIGN_DEDUP_BWAMEM {
     ch_bwamem_index      // channel: [ val(meta), [ bwam index ] ]
     skip_deduplication   // boolean: whether to deduplicate alignments
     use_gpu              // boolean: whether to use GPU or CPU for bwamem alignment
+    interval_file        // channel: [ val(meta), [ interval file ] ]
+    known_sites          // channel: [ val(meta), [ known sites ] ]
 
     main:
 
@@ -23,18 +27,46 @@ workflow FASTQ_ALIGN_DEDUP_BWAMEM {
     ch_multiqc_files                 = Channel.empty()
     ch_versions                      = Channel.empty()
 
-    FASTQ_ALIGN_BWA (
-        ch_reads,
-        ch_bwamem_index,
-        true, // val_sort_bam hardcoded to true
-        ch_fasta
-    )
-    ch_alignment        = ch_alignment.mix(FASTQ_ALIGN_BWA.out.bam)
-    ch_alignment_index  = ch_alignment.mix(FASTQ_ALIGN_BWA.out.bai)
-    ch_stats            = ch_alignment.mix(FASTQ_ALIGN_BWA.out.stats)    // channel: [ val(meta), path(stats) ]
-    ch_flagstat         = ch_alignment.mix(FASTQ_ALIGN_BWA.out.flagstat) // channel: [ val(meta), path(flagstat) ]
-    ch_idxstats         = ch_alignment.mix(FASTQ_ALIGN_BWA.out.idxstats) // channel: [ val(meta), path(idxstats) ]
-    ch_versions         = ch_versions.mix(FASTQ_ALIGN_BWA.out.versions.first())
+    if (use_gpu) {
+        /*
+        * Align with parabricks GPU enabled fq2bammeth implementation of bwameth
+        */
+        PARABRICKS_FQ2BAM (
+            ch_reads,
+            ch_fasta,
+            ch_bwamem_index,
+            interval_file, // interval file
+            known_sites, // known sites
+            'bam' // output format
+        )
+        ch_alignment = PARABRICKS_FQ2BAM.out.bam
+        ch_versions  = ch_versions.mix(PARABRICKS_FQ2BAM.out.versions.first())
+
+        BAM_SORT_STATS_SAMTOOLS ( 
+            ch_alignment,
+            ch_fasta
+        )
+        ch_alignment        = BAM_SORT_STATS_SAMTOOLS.out.bam
+        ch_alignment_index  = BAM_SORT_STATS_SAMTOOLS.out.bai
+        ch_stats            = BAM_SORT_STATS_SAMTOOLS.out.stats    // channel: [ val(meta), path(stats) ]
+        ch_flagstat         = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ]
+        ch_idxstats         = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ]
+        ch_versions         = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions.first())
+    }
+    else {
+        FASTQ_ALIGN_BWA (
+            ch_reads,
+            ch_bwamem_index,
+            true, // val_sort_bam hardcoded to true
+            ch_fasta
+        )
+        ch_alignment        = ch_alignment.mix(FASTQ_ALIGN_BWA.out.bam)
+        ch_alignment_index  = ch_alignment.mix(FASTQ_ALIGN_BWA.out.bai)
+        ch_stats            = ch_alignment.mix(FASTQ_ALIGN_BWA.out.stats)    // channel: [ val(meta), path(stats) ]
+        ch_flagstat         = ch_alignment.mix(FASTQ_ALIGN_BWA.out.flagstat) // channel: [ val(meta), path(flagstat) ]
+        ch_idxstats         = ch_alignment.mix(FASTQ_ALIGN_BWA.out.idxstats) // channel: [ val(meta), path(idxstats) ]
+        ch_versions         = ch_versions.mix(FASTQ_ALIGN_BWA.out.versions.first())
+    }
 
     if (!skip_deduplication) {
         /*

diff --git a/subworkflows/nf-core/fastq_align_dedup_bwamem/meta.yml b/subworkflows/nf-core/fastq_align_dedup_bwamem/meta.yml
@@ -1,6 +1,6 @@
 # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
 name: "fastq_align_dedup_bwamem"
-description: Performs alignment of DNA or TAPS-treated reads using bwamem, sort and deduplicate
+description: Performs alignment of DNA or TAPS-treated reads using bwamem or parabricks/fq2bam, sort and deduplicate
 keywords:
   - bwamem
   - alignment
@@ -11,9 +11,11 @@ keywords:
   - fastq
   - bam
 components:
+  - parabricks/fq2bam
   - samtools/index
   - picard/addorreplacereadgroups
   - picard/markduplicates
+  - bam_sort_stats_samtools
   - fastq_align_bwa
 input:
   - ch_reads:
@@ -41,6 +43,20 @@ input:
       type: boolean
       description: |
         Skip deduplication of aligned reads
+  - use_gpu:
+      type: boolean
+      description: |
+        Use GPU for alignment
+  - interval_file:
+      type: file
+      description: |
+        Structure: [ val(meta), path(interval file) ]
+      pattern: "*.{bed,intervals}"
+  - known_sites:
+      type: file
+      description: |
+        Structure: [ val(meta), path(known sites) ]
+      pattern: "*.{vcf,vcf.gz}"
 output:
   - bam:
       type: file

diff --git a/subworkflows/nf-core/fastq_align_dedup_bwamem/nextflow.config b/subworkflows/nf-core/fastq_align_dedup_bwamem/nextflow.config
@@ -1,4 +1,3 @@
-// IMPORTANT: This config file should be included to ensure that the subworkflow works properly.
 process {
     withName: 'SAMTOOLS_SORT' {
         ext.prefix = { "${meta.id}.sorted" }

diff --git a/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/gpu.nf.test b/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/gpu.nf.test
@@ -0,0 +1,224 @@
+nextflow_workflow {
+
+    name "Test Subworkflow FASTQ_ALIGN_DEDUP_BWAMEM"
+    script "../main.nf"
+    workflow "FASTQ_ALIGN_DEDUP_BWAMEM"
+    config "./nextflow.config"
+
+    tag "gpu"
+    tag "subworkflows"
+    tag "subworkflows_nfcore"
+    tag "subworkflows/fastq_align_dedup_bwamem"
+    tag "parabricks/fq2bam"
+    tag "samtools/index"
+    tag "picard/markduplicates"
+    tag "untar"
+
+    setup {
+        run("BWA_INDEX") {
+            script "../../../../modules/nf-core/bwa/index/main.nf"
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test' ], // meta map
+                    file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true)
+                ])
+                """
+            }
+        }
+
+        run("BWA_INDEX", alias: 'BWA_INDEX_PE') {
+            script "../../../../modules/nf-core/bwa/index/main.nf"
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test' ], // meta map
+                    file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true)
+                ])
+                """
+            }
+        }
+    }
+
+    test("Params: parabricks/fq2bam single-end | use_gpu ") {
+        when {
+            params {
+                use_gpu    = true
+                bwa_prefix = 'genome.fa'
+            }
+            workflow {
+                """
+                input[0] = Channel.of([
+                            [ id:'test', single_end:true ],
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                ])
+                input[1] = Channel.of([
+                            [:],
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ])
+                input[2] = Channel.of([
+                            [:],
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
+                ])
+                input[3] = BWA_INDEX.out.index
+                input[4] = false // skip_deduplication
+                input[5] = true // use_gpu
+                input[6] = Channel.empty() // interval_file
+                input[7] = Channel.empty() // known_sites
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(
+                    workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() },
+                    workflow.out.bai.collect { meta, bai -> file(bai).name },
+                    workflow.out.samtools_flagstat,
+                    workflow.out.samtools_stats,
+                    workflow.out.samtools_index_stats,
+                    workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name },
+                    workflow.out.multiqc.flatten().collect { path -> file(path).name },
+                    workflow.out.versions
+                    ).match()
+                }
+            )
+        }
+    }
+
+    test("Params: parabricks/fq2bam single-end | use_gpu | skip_deduplication") {
+        when {
+            params {
+                skip_deduplication = true
+                use_gpu            = true
+                bwa_prefix = 'genome.fa'
+            }
+            workflow {
+                """
+                input[0] = Channel.of([
+                            [ id:'test', single_end:true ],
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                ])
+                input[1] = Channel.of([
+                            [:],
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ])
+                input[2] = Channel.of([
+                            [:],
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
+                ])
+                input[3] = BWA_INDEX.out.index
+                input[4] = true // skip_deduplication
+                input[5] = true // use_gpu
+                input[6] = Channel.empty() // interval_file
+                input[7] = Channel.empty() // known_sites
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(
+                    workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() },
+                    workflow.out.bai.collect { meta, bai -> file(bai).name },
+                    workflow.out.samtools_flagstat,
+                    workflow.out.samtools_stats,
+                    workflow.out.samtools_index_stats,
+                    workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name },
+                    workflow.out.multiqc.flatten().collect { path -> file(path).name },
+                    workflow.out.versions
+                    ).match()
+                }
+            )
+        }
+    }
+
+    test("Params: parabricks/fq2bam single-end | use_gpu | stub") {
+        options '-stub'
+        when {
+
+            workflow {
+                """
+                input[0] = Channel.of([
+                            [ id:'test', single_end:true ],
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                ])
+                input[1] = Channel.of([
+                            [:],
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ])
+                input[2] = Channel.of([
+                            [:],
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
+                ])
+                input[3] = BWA_INDEX.out.index
+                input[4] = false // skip_deduplication
+                input[5] = true // use_gpu
+                input[6] = Channel.empty() // interval_file
+                input[7] = Channel.empty() // known_sites
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(workflow.out).match() }
+            )
+        }
+    }
+
+    test("Params: parabricks/fq2bam paired-end | use_gpu | skip_deduplication") {
+
+        when {
+            params {
+                skip_deduplication = true
+                use_gpu            = true
+                bwa_prefix = 'genome.fa'
+            }
+            workflow {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ],
+                    [ 
+                        file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+                    ]
+                ])
+                input[1] = Channel.of([
+                            [:],
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ])
+                input[2] = Channel.of([
+                            [:],
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
+                ])
+                input[3] = BWA_INDEX_PE.out.index
+                input[4] = true // skip_deduplication
+                input[5] = true // use_gpu
+                input[6] = Channel.empty() // interval_file
+                input[7] = Channel.empty() // known_sites
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(
+                    workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() },
+                    workflow.out.bai.collect { meta, bai -> file(bai).name },
+                    workflow.out.samtools_flagstat,
+                    workflow.out.samtools_stats,
+                    workflow.out.samtools_index_stats,
+                    workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name },
+                    workflow.out.multiqc.flatten().collect { path -> file(path).name },
+                    workflow.out.versions
+                    ).match()
+                }
+            )
+        }
+    }
+}