diff --git a/README.md b/README.md index d5904f2a..cbfd4386 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ On release, automated continuous integration tests run the pipeline on a full-si The pipeline allows you to choose between running either [Bismark](https://github.com/FelixKrueger/Bismark), [bwa-meth](https://github.com/brentp/bwa-meth) / [MethylDackel](https://github.com/dpryan79/methyldackel) or [BWA-Mem](https://github.com/lh3/bwa) plus [rastair](https://bitbucket.org/bsblabludwig/rastair/src/master/) for TAPS data processing. rastair can also be used with bwa-meth aligned reads by setting the aligner to `--aligner bwameth` and adding the flag `--taps`. -Choose between workflows by using `--aligner bismark` (default, uses bowtie2 for alignment), `--aligner bismark_hisat`, `--aligner bwameth` or `--aligner bwamem`. For higher performance, the pipeline can leverage the [Parabricks implementation of bwa-meth (fq2bammeth)](https://docs.nvidia.com/clara/parabricks/latest/documentation/tooldocs/man_fq2bam_meth.html) and the [Parabricks implementation of bwa-mem (fq2bammemh)](https://docs.nvidia.com/clara/parabricks/latest/documentation/tooldocs/man_fq2bam.html), which implement the baseline tools `bwa-meth` and `bwa-mem`. To use this option, include the `gpu` profile along with `--aligner bwameth` or `--aligner bwamem`. +Choose between workflows by using `--aligner bismark` (default, uses bowtie2 for alignment), `--aligner bismark_hisat`, `--aligner bwameth` or `--aligner bwamem`. For higher performance, the pipeline can leverage the [Parabricks implementation of bwa-meth (fq2bammeth)](https://docs.nvidia.com/clara/parabricks/latest/documentation/tooldocs/man_fq2bam_meth.html) and the [Parabricks implementation of bwa-mem (fq2bam)](https://docs.nvidia.com/clara/parabricks/latest/documentation/tooldocs/man_fq2bam.html), which implement the baseline tools `bwa-meth` and `bwa-mem`. To use this option, include the `gpu` profile along with `--aligner bwameth` or `--aligner bwamem`. Note: For faster CPU runs with BWA-Meth, enable the BWA-MEM2 algorithm using `--use_mem2`. The GPU pathway (Parabricks) requires `-profile gpu` and a container runtime (Docker, Singularity, or Podman); Conda/Mamba are not supported for the GPU module. diff --git a/conf/modules/parabricks_fq2bam.config b/conf/modules/parabricks_fq2bam.config new file mode 100644 index 00000000..86d1a16c --- /dev/null +++ b/conf/modules/parabricks_fq2bam.config @@ -0,0 +1,11 @@ +process { + withName: PARABRICKS_FQ2BAM { + cache = 'lenient' // This is set because in the module command the index files are touched so as to have bwameth not complain + publishDir = [ + path: { "${params.outdir}/${params.aligner}/alignments" }, + pattern: "*.bam", + mode: params.publish_dir_mode, + enabled: params.save_align_intermeds + ] + } +} diff --git a/conf/modules/parabricks_fq2bammeth.config b/conf/modules/parabricks_fq2bammeth.config index 71e2596a..996bfbd2 100644 --- a/conf/modules/parabricks_fq2bammeth.config +++ b/conf/modules/parabricks_fq2bammeth.config @@ -1,7 +1,6 @@ process { withName: PARABRICKS_FQ2BAMMETH { cache = 'lenient' // This is set because in the module command the index files are touched so as to have bwameth not complain - ext.args = '--low-memory' publishDir = [ path: { "${params.outdir}/${params.aligner}/alignments" }, pattern: "*.bam", diff --git a/conf/subworkflows/fastq_align_dedup_bwamem.config b/conf/subworkflows/fastq_align_dedup_bwamem.config index 63c1b35a..2ba76bfd 100644 --- a/conf/subworkflows/fastq_align_dedup_bwamem.config +++ b/conf/subworkflows/fastq_align_dedup_bwamem.config @@ -1,4 +1,6 @@ + includeConfig "../modules/bwamem_align.config" +includeConfig "../modules/parabricks_fq2bam.config" includeConfig "../modules/picard_addorreplacereadgroups.config" includeConfig "../modules/picard_markduplicates.config" includeConfig "../modules/samtools_stats.config" @@ -35,16 +37,4 @@ process { ] ] } - - // SAMTOOLS_INDEX after PICARD_MARKDUPLICATES - publish to deduplicated when deduplication runs - withName: 'NFCORE_METHYLSEQ:METHYLSEQ:FASTQ_ALIGN_DEDUP_BWAMEM:SAMTOOLS_INDEX' { - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/deduplicated/" }, - mode: params.publish_dir_mode, - pattern: "*.bai", - enabled: !params.skip_deduplication - ] - ] - } } diff --git a/modules.json b/modules.json index e3a46b32..81ab4bc4 100644 --- a/modules.json +++ b/modules.json @@ -95,6 +95,29 @@ "git_sha": "0b2435805036a16dcdcf21533632d956b8273ac4", "installed_by": ["modules"] }, + "parabricks/fq2bam": { + "branch": "master", + "git_sha": "1e4345f4e18eca1985e35562e8f5d29caf7f8206", + "installed_by": [ + "_", + "a", + "b", + "d", + "e", + "f", + "g", + "i", + "l", + "m", + "n", + "p", + "q", + "s", + "t", + "u", + "w" + ] + }, "parabricks/fq2bammeth": { "branch": "master", "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", @@ -221,7 +244,26 @@ "bam_sort_stats_samtools": { "branch": "master", "git_sha": "0b2435805036a16dcdcf21533632d956b8273ac4", - "installed_by": ["fastq_align_bwa"] + "installed_by": [ + "_", + "a", + "b", + "d", + "e", + "f", + "fastq_align_bwa", + "g", + "i", + "l", + "m", + "n", + "p", + "q", + "s", + "t", + "u", + "w" + ] }, "bam_stats_samtools": { "branch": "master", @@ -255,7 +297,7 @@ }, "fastq_align_dedup_bwamem": { "branch": "master", - "git_sha": "654daca6c7d8a17479d1c56fb5fb8d1663f11428", + "git_sha": "e3d2496d0e47f444e6cd2cfddc9299ceb320c80e", "installed_by": ["subworkflows"] }, "fastq_align_dedup_bwameth": { diff --git a/modules/nf-core/parabricks/fq2bam/main.nf b/modules/nf-core/parabricks/fq2bam/main.nf new file mode 100644 index 00000000..61be1b45 --- /dev/null +++ b/modules/nf-core/parabricks/fq2bam/main.nf @@ -0,0 +1,117 @@ +process PARABRICKS_FQ2BAM { + tag "${meta.id}" + label 'process_high' + label 'process_gpu' + // needed by the module to run on a cluster because we need to copy the fasta reference, see https://github.com/nf-core/modules/issues/9230 + stageInMode 'copy' + + container "nvcr.io/nvidia/clara/clara-parabricks:4.6.0-1" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(fasta) + tuple val(meta3), path(index) + tuple val(meta4), path(interval_file) + tuple val(meta5), path(known_sites) + val output_fmt + + output: + tuple val(meta), path("*.bam"), emit: bam, optional:true + tuple val(meta), path("*.bai"), emit: bai, optional:true + tuple val(meta), path("*.cram"), emit: cram, optional:true + tuple val(meta), path("*.crai"), emit: crai, optional:true + tuple val(meta), path("*.table"), emit: bqsr_table, optional:true + tuple val(meta), path("*_qc_metrics"), emit: qc_metrics, optional:true + tuple val(meta), path("*.duplicate-metrics.txt"), emit: duplicate_metrics, optional:true + path "compatible_versions.yml", emit: compatible_versions, optional:true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Parabricks module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def in_fq_command = meta.single_end ? "--in-se-fq ${reads}" : "--in-fq ${reads}" + def extension = "${output_fmt}" + + def known_sites_command = known_sites ? (known_sites instanceof List ? known_sites.collect { "--knownSites ${it}" }.join(' ') : "--knownSites ${known_sites}") : "" + def known_sites_output_cmd = known_sites ? "--out-recal-file ${prefix}.table" : "" + def interval_file_command = interval_file ? (interval_file instanceof List ? interval_file.collect { "--interval-file ${it}" }.join(' ') : "--interval-file ${interval_file}") : "" + + def num_gpus = task.accelerator ? "--num-gpus ${task.accelerator.request}" : '' + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + cp ${fasta} \$INDEX + + pbrun \\ + fq2bam \\ + --ref \$INDEX \\ + ${in_fq_command} \\ + --out-bam ${prefix}.${extension} \\ + ${known_sites_command} \\ + ${known_sites_output_cmd} \\ + ${interval_file_command} \\ + ${num_gpus} \\ + --bwa-cpu-thread-pool ${task.cpus} \\ + --monitor-usage \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pbrun: \$(echo \$(pbrun version 2>&1) | sed 's/^Please.* //' ) + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Parabricks module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = "${output_fmt}" + def extension_index = "${output_fmt}" == "cram" ? "crai" : "bai" + def known_sites_output = known_sites ? "touch ${prefix}.table" : "" + def qc_metrics_output = args.contains("--out-qc-metrics-dir") ? "mkdir ${prefix}_qc_metrics" : "" + def duplicate_metrics_output = args.contains("--out-duplicate-metrics") ? "touch ${prefix}.duplicate-metrics.txt" : "" + """ + touch ${prefix}.${extension} + touch ${prefix}.${extension}.${extension_index} + ${known_sites_output} + ${qc_metrics_output} + ${duplicate_metrics_output} + + # Capture once and build single-line compatible_with (spaces only, no tabs) + pbrun_version_output=\$(pbrun fq2bam --version 2>&1) + + # Because of a space between BWA and mem in the version output this is handled different to the other modules + compat_line=\$(echo "\$pbrun_version_output" | awk -F':' ' + /Compatible With:/ {on=1; next} + /^---/ {on=0} + on && /:/ { + key=\$1; val=\$2 + gsub(/[ \\t]+/, " ", key); gsub(/^[ \\t]+|[ \\t]+\$/, "", key) + gsub(/[ \\t]+/, " ", val); gsub(/^[ \\t]+|[ \\t]+\$/, "", val) + a[++i]=key ": " val + } + END { for (j=1;j<=i;j++) printf "%s%s", (j>1?", ":""), a[j] } + ') + + cat < compatible_versions.yml + "${task.process}": + pbrun_version: \$(echo "\$pbrun_version_output" | awk '/^pbrun:/ {print \$2; exit}') + compatible_with: "\$compat_line" + EOF + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pbrun: \$(echo \$(pbrun version 2>&1) | sed 's/^Please.* //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/parabricks/fq2bam/meta.yml b/modules/nf-core/parabricks/fq2bam/meta.yml new file mode 100644 index 00000000..b2859d25 --- /dev/null +++ b/modules/nf-core/parabricks/fq2bam/meta.yml @@ -0,0 +1,175 @@ +name: "parabricks_fq2bam" +description: NVIDIA Clara Parabricks GPU-accelerated alignment, sorting, BQSR calculation, + and duplicate marking. Note this nf-core module requires files to be copied into + the working directory and not symlinked. +keywords: + - align + - sort + - bqsr + - duplicates +tools: + - "parabricks": + description: "NVIDIA Clara Parabricks GPU-accelerated genomics tools" + homepage: "https://www.nvidia.com/en-us/clara/genomics/" + documentation: "https://docs.nvidia.com/clara/parabricks/latest/index.html" + licence: ["custom"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: fastq.gz files + pattern: "*.fastq.gz" + ontologies: + - edam: "http://edamontology.org/format_1930" # FASTQ + - edam: "http://edamontology.org/format_3989" # GZIP format + - - meta2: + type: map + description: | + Groovy Map containing fasta information + - fasta: + type: file + description: reference fasta file - must be unzipped + pattern: "*.fasta" + ontologies: + - edam: "http://edamontology.org/format_1929" # FASTA + - - meta3: + type: map + description: | + Groovy Map containing index information + - index: + type: file + description: reference BWA index + pattern: "*.{amb,ann,bwt,pac,sa}" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing index information + - interval_file: + type: file + description: (optional) file(s) containing genomic intervals for use in base + quality score recalibration (BQSR) + pattern: "*.{bed,interval_list,picard,list,intervals}" + ontologies: [] + - - meta5: + type: map + description: | + Groovy Map containing known sites information + - known_sites: + type: file + description: (optional) known sites file(s) for calculating BQSR. markdups must + be true to perform BQSR. + pattern: "*.vcf.gz" + ontologies: + - edam: "http://edamontology.org/format_3016" # VCF + - edam: "http://edamontology.org/format_3989" # GZIP + - output_fmt: + type: string + description: Output format for the alignment. Options are 'bam' or 'cram' + pattern: "{bam,cram}" +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Sorted BAM file + pattern: "*.bam" + ontologies: [] + bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: index corresponding to sorted BAM file + pattern: "*.bai" + ontologies: [] + cram: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Sorted CRAM file + pattern: "*.cram" + ontologies: [] + crai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: index corresponding to sorted CRAM file + pattern: "*.crai" + ontologies: [] + bqsr_table: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.table": + type: file + description: (optional) table from base quality score recalibration calculation, + to be used with parabricks/applybqsr + pattern: "*.table" + ontologies: [] + qc_metrics: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_qc_metrics": + type: directory + description: (optional) optional directory of qc metrics + pattern: "*_qc_metrics" + duplicate_metrics: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.duplicate-metrics.txt": + type: file + description: (optional) metrics calculated from marking duplicates in the + bam file + pattern: "*.duplicate-metrics.txt" + ontologies: [] + compatible_versions: + - compatible_versions.yml: + type: file + description: File containing info on compatible CPU-based software versions. + pattern: "compatible_versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: "http://edamontology.org/format_3750" # YAML +authors: + - "@bsiranosian" + - "@adamrtalbot" +maintainers: + - "@bsiranosian" + - "@adamrtalbot" + - "@gallvp" + - "@famosab" diff --git a/modules/nf-core/parabricks/fq2bam/tests/main.nf.test b/modules/nf-core/parabricks/fq2bam/tests/main.nf.test new file mode 100644 index 00000000..dd63c691 --- /dev/null +++ b/modules/nf-core/parabricks/fq2bam/tests/main.nf.test @@ -0,0 +1,316 @@ +nextflow_process { + + name "Test Process PARABRICKS_FQ2BAM" + script "../main.nf" + process "PARABRICKS_FQ2BAM" + + tag "bwa/index" + tag "modules" + tag "parabricks/fq2bam" + tag "modules_nfcore" + tag "parabricks" + tag "gpu" + + config './nextflow.config' + + setup { + run("BWA_INDEX") { + script "../../../bwa/index/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + """ + } + } + + run("BWA_INDEX", alias: 'BWA_INDEX_PE') { + script "../../../bwa/index/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + run("BWA_INDEX", alias: 'BWA_INDEX_CRAM') { + script "../../../bwa/index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + test("SRR389222 - fastq - se") { + + config './nextflow.config' + + when { + params { + module_args = '--low-memory' + // Ref: https://forums.developer.nvidia.com/t/problem-with-gpu/256825/6 + // Parabricks’s fq2bam requires 24GB of memory. + // Using --low-memory for testing + bwa_prefix = 'genome.fa' + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'test' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = BWA_INDEX.out.index + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = 'bam' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.bai[0][1]).name, + process.out.versions, + path(process.out.versions[0]).yaml + ).match() } + ) + } + } + + test("SRR389222 - fastq - se - stub") { + + options '-stub' + + when { + params { + module_args = '' + bwa_prefix = '' + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'test' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = BWA_INDEX.out.index + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = 'bam' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + path(process.out.versions.get(0)).yaml, + path(process.out.compatible_versions.get(0)).yaml + ).match() } + ) + } + } + + test("sarscov2 - fastq - pe") { + + when { + params { + module_args = '--low-memory' + // Ref: https://forums.developer.nvidia.com/t/problem-with-gpu/256825/6 + // Parabricks’s fq2bam requires 24GB of memory. + // Using --low-memory for testing + bwa_prefix = 'genome.fasta' + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = BWA_INDEX_PE.out.index + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = 'bam' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.bai[0][1]).name, + process.out.versions, + path(process.out.versions[0]).yaml + ).match() } + ) + } + + } + + test("sarscov2 - fastq - pe - stub") { + + options '-stub' + + when { + params { + module_args = '' + bwa_prefix = '' + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = BWA_INDEX_PE.out.index + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = 'bam' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + path(process.out.versions.get(0)).yaml, + path(process.out.compatible_versions.get(0)).yaml + ).match() } + ) + } + + } + + test("sarscov2 - fastq - se - cram") { + + when { + params { + module_args = '--low-memory' + // Ref: https://forums.developer.nvidia.com/t/problem-with-gpu/256825/6 + // Parabricks’s fq2bam requires 24GB of memory. + // Using --low-memory for testing + bwa_prefix = 'genome.fasta' + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ] + input[1] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[2] = BWA_INDEX_CRAM.out.index + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = 'cram' + """ + } + } + + then { + def fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/sarscov2/genome/genome.fasta" + assertAll( + { assert process.success }, + { assert snapshot( + cram( + process.out.cram[0][1], + fasta, + ).getReadsMD5(), + file(process.out.crai[0][1]).name, + process.out.versions, + path(process.out.versions[0]).yaml + ).match() } + ) + } + + } + + test("sarscov2 - fastq - pe - cram - stub") { + + options '-stub' + + when { + params { + module_args = '' + bwa_prefix = '' + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = BWA_INDEX_PE.out.index + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = 'cram' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + path(process.out.versions[0]).yaml + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/parabricks/fq2bam/tests/main.nf.test.snap b/modules/nf-core/parabricks/fq2bam/tests/main.nf.test.snap new file mode 100644 index 00000000..aa6fef3c --- /dev/null +++ b/modules/nf-core/parabricks/fq2bam/tests/main.nf.test.snap @@ -0,0 +1,351 @@ +{ + "SRR389222 - fastq - se - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + "compatible_versions.yml:md5,01a6599f855da953f58c26a0c1df301e" + ], + "8": [ + "versions.yml:md5,0c47791a7efece8e797f064b663ef855" + ], + "bai": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bqsr_table": [ + + ], + "compatible_versions": [ + "compatible_versions.yml:md5,01a6599f855da953f58c26a0c1df301e" + ], + "crai": [ + + ], + "cram": [ + + ], + "duplicate_metrics": [ + + ], + "qc_metrics": [ + + ], + "versions": [ + "versions.yml:md5,0c47791a7efece8e797f064b663ef855" + ] + }, + { + "PARABRICKS_FQ2BAM": { + "pbrun": "4.6.0-1" + } + }, + { + "PARABRICKS_FQ2BAM": null, + "pbrun_version": "4.6.0-1", + "compatible_with": "BWA mem: 0.7.15, Picard: 2.18.25" + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2025-10-13T16:46:45.735206352" + }, + "sarscov2 - fastq - pe - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + "compatible_versions.yml:md5,01a6599f855da953f58c26a0c1df301e" + ], + "8": [ + "versions.yml:md5,0c47791a7efece8e797f064b663ef855" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bqsr_table": [ + + ], + "compatible_versions": [ + "compatible_versions.yml:md5,01a6599f855da953f58c26a0c1df301e" + ], + "crai": [ + + ], + "cram": [ + + ], + "duplicate_metrics": [ + + ], + "qc_metrics": [ + + ], + "versions": [ + "versions.yml:md5,0c47791a7efece8e797f064b663ef855" + ] + }, + { + "PARABRICKS_FQ2BAM": { + "pbrun": "4.6.0-1" + } + }, + { + "PARABRICKS_FQ2BAM": null, + "pbrun_version": "4.6.0-1", + "compatible_with": "BWA mem: 0.7.15, Picard: 2.18.25" + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2025-10-13T16:47:45.14187671" + }, + "sarscov2 - fastq - pe": { + "content": [ + "2d64e4363d9f3c0e2167fce49d5087cf", + "test.bam.bai", + [ + "versions.yml:md5,0c47791a7efece8e797f064b663ef855" + ], + { + "PARABRICKS_FQ2BAM": { + "pbrun": "4.6.0-1" + } + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2025-10-13T16:47:32.891223581" + }, + "sarscov2 - fastq - se - cram": { + "content": [ + "30c325e1e032eb1782a280d34c0fb1c7", + "test.cram.crai", + [ + "versions.yml:md5,0c47791a7efece8e797f064b663ef855" + ], + { + "PARABRICKS_FQ2BAM": { + "pbrun": "4.6.0-1" + } + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2025-10-13T16:48:32.435485918" + }, + "sarscov2 - fastq - pe - cram - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + "compatible_versions.yml:md5,01a6599f855da953f58c26a0c1df301e" + ], + "8": [ + "versions.yml:md5,0c47791a7efece8e797f064b663ef855" + ], + "bai": [ + + ], + "bam": [ + + ], + "bqsr_table": [ + + ], + "compatible_versions": [ + "compatible_versions.yml:md5,01a6599f855da953f58c26a0c1df301e" + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + [ + { + "id": "test", + "single_end": false + }, + "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "duplicate_metrics": [ + + ], + "qc_metrics": [ + + ], + "versions": [ + "versions.yml:md5,0c47791a7efece8e797f064b663ef855" + ] + }, + { + "PARABRICKS_FQ2BAM": { + "pbrun": "4.6.0-1" + } + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2025-10-13T16:48:43.255740964" + }, + "SRR389222 - fastq - se": { + "content": [ + "3d5b94990c7fdf90a682edb5ee0f59de", + "test.bam.bai", + [ + "versions.yml:md5,0c47791a7efece8e797f064b663ef855" + ], + { + "PARABRICKS_FQ2BAM": { + "pbrun": "4.6.0-1" + } + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2025-10-13T16:46:33.272405432" + } +} \ No newline at end of file diff --git a/modules/nf-core/parabricks/fq2bam/tests/nextflow.config b/modules/nf-core/parabricks/fq2bam/tests/nextflow.config new file mode 100644 index 00000000..20585ddc --- /dev/null +++ b/modules/nf-core/parabricks/fq2bam/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + + withName: 'PARABRICKS_FQ2BAM' { + ext.args = params.module_args + } + + withName: 'BWA_INDEX' { + ext.prefix = params.bwa_prefix + } + +} diff --git a/subworkflows/nf-core/fastq_align_dedup_bwamem/main.nf b/subworkflows/nf-core/fastq_align_dedup_bwamem/main.nf old mode 100644 new mode 100755 index 026bbf6f..83dbfb3b --- a/subworkflows/nf-core/fastq_align_dedup_bwamem/main.nf +++ b/subworkflows/nf-core/fastq_align_dedup_bwamem/main.nf @@ -1,6 +1,8 @@ -include { FASTQ_ALIGN_BWA } from '../fastq_align_bwa/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../../nf-core/bam_sort_stats_samtools/main' +include { FASTQ_ALIGN_BWA } from '../../nf-core/fastq_align_bwa/main' include { PICARD_ADDORREPLACEREADGROUPS } from '../../../modules/nf-core/picard/addorreplacereadgroups/main' include { PICARD_MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main' +include { PARABRICKS_FQ2BAM } from '../../../modules/nf-core/parabricks/fq2bam/main' include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' workflow FASTQ_ALIGN_DEDUP_BWAMEM { @@ -11,29 +13,61 @@ workflow FASTQ_ALIGN_DEDUP_BWAMEM { ch_fasta_index // channel: [ val(meta), [ fasta index ] ] ch_bwamem_index // channel: [ val(meta), [ bwamem index ] ] skip_deduplication // boolean: whether to deduplicate alignments + use_gpu // boolean: whether to use GPU accelerated alignment + output_fmt // string: output format for parabricks fq2bam (e.g., 'bam' or 'cram') + interval_file // channel: [ val(meta), [ interval file ] ] + known_sites // channel: [ val(meta), [ known sites ] ] main: - ch_alignment = channel.empty() - ch_alignment_index = channel.empty() - ch_flagstat = channel.empty() - ch_stats = channel.empty() - ch_idxstats = channel.empty() - ch_picard_metrics = channel.empty() - ch_multiqc_files = channel.empty() - ch_versions = channel.empty() - FASTQ_ALIGN_BWA ( - ch_reads, - ch_bwamem_index, - true, // val_sort_bam hardcoded to true - ch_fasta - ) - ch_alignment = FASTQ_ALIGN_BWA.out.bam // channel: [ val(meta), [ bam ] ] + ch_alignment = channel.empty() + ch_alignment_index = channel.empty() + ch_flagstat = channel.empty() + ch_stats = channel.empty() + ch_idxstats = channel.empty() + ch_picard_metrics = channel.empty() + ch_multiqc_files = channel.empty() + ch_versions = channel.empty() + if (use_gpu) { + /* + * Align with parabricks GPU enabled fq2bam implementation of bwa-mem + */ + PARABRICKS_FQ2BAM ( + ch_reads, // channel: [ val(meta), [ reads ] ] + ch_fasta, // channel: [ val(meta), [ fasta ] ] + ch_bwamem_index, // channel: [ val(meta), [ bwamem index ] ] + interval_file, // channel: [ val(meta), [ interval file ] ] + known_sites, // channel: [ val(meta), [ known sites ] ] + output_fmt // string: output format + ) + ch_alignment = PARABRICKS_FQ2BAM.out.bam + ch_versions = ch_versions.mix(PARABRICKS_FQ2BAM.out.versions.first()) + + BAM_SORT_STATS_SAMTOOLS ( + ch_alignment, + ch_fasta + ) + ch_alignment = BAM_SORT_STATS_SAMTOOLS.out.bam + ch_alignment_index = BAM_SORT_STATS_SAMTOOLS.out.bai + ch_stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + ch_flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + ch_idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions.first()) + } + else { + FASTQ_ALIGN_BWA ( + ch_reads, + ch_bwamem_index, + true, // val_sort_bam hardcoded to true + ch_fasta + ) + ch_alignment = FASTQ_ALIGN_BWA.out.bam // channel: [ val(meta), [ bam ] ] ch_alignment_index = FASTQ_ALIGN_BWA.out.bai // channel: [ val(meta), [ bai ] ] - ch_stats = FASTQ_ALIGN_BWA.out.stats // channel: [ val(meta), path(stats) ] - ch_flagstat = FASTQ_ALIGN_BWA.out.flagstat // channel: [ val(meta), path(flagstat) ] - ch_idxstats = FASTQ_ALIGN_BWA.out.idxstats // channel: [ val(meta), path(idxstats) ] - ch_versions = ch_versions.mix(FASTQ_ALIGN_BWA.out.versions.first()) + ch_stats = FASTQ_ALIGN_BWA.out.stats // channel: [ val(meta), path(stats) ] + ch_flagstat = FASTQ_ALIGN_BWA.out.flagstat // channel: [ val(meta), path(flagstat) ] + ch_idxstats = FASTQ_ALIGN_BWA.out.idxstats // channel: [ val(meta), path(idxstats) ] + ch_versions = ch_versions.mix(FASTQ_ALIGN_BWA.out.versions.first()) + } if (!skip_deduplication) { /* @@ -85,4 +119,4 @@ workflow FASTQ_ALIGN_DEDUP_BWAMEM { picard_metrics = ch_picard_metrics // channel: [ val(meta), [ metrics ] ] multiqc = ch_multiqc_files // channel: [ *{html,txt} ] versions = ch_versions // channel: [ versions.yml ] -} +} diff --git a/subworkflows/nf-core/fastq_align_dedup_bwamem/meta.yml b/subworkflows/nf-core/fastq_align_dedup_bwamem/meta.yml old mode 100644 new mode 100755 index fd6670e2..d38dae81 --- a/subworkflows/nf-core/fastq_align_dedup_bwamem/meta.yml +++ b/subworkflows/nf-core/fastq_align_dedup_bwamem/meta.yml @@ -1,6 +1,7 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json name: "fastq_align_dedup_bwamem" -description: Performs alignment of DNA or TAPS-treated reads using bwamem, sort and deduplicate +description: Performs alignment of DNA or TAPS-treated reads using bwamem or + parabricks/fq2bam, sort and deduplicate keywords: - bwamem - alignment @@ -11,9 +12,11 @@ keywords: - fastq - bam components: + - parabricks/fq2bam - samtools/index - picard/addorreplacereadgroups - picard/markduplicates + - bam_sort_stats_samtools - fastq_align_bwa input: - ch_reads: @@ -41,6 +44,24 @@ input: type: boolean description: | Skip deduplication of aligned reads + - use_gpu: + type: boolean + description: | + Use GPU for alignment + - output_fmt: + type: string + description: Output format for the alignment. Options are 'bam' or 'cram' + pattern: "{bam,cram}" + - interval_file: + type: file + description: | + Structure: [ val(meta), path(interval file) ] + pattern: "*.{bed,intervals}" + - known_sites: + type: file + description: | + Structure: [ val(meta), path(known sites) ] + pattern: "*.{vcf,vcf.gz}" output: - bam: type: file @@ -84,12 +105,6 @@ output: Channel containing files for MultiQC input (metrics, stats, flagstat, idxstats). Structure: [ path(file) ] pattern: "*{.txt,.stats,.flagstat,.idxstats}" - - versions: - type: file - description: | - File containing software versions - Structure: [ path(versions.yml) ] - pattern: "versions.yml" authors: - "@eduard-watchmaker" maintainers: diff --git a/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/gpu.nf.test b/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/gpu.nf.test new file mode 100755 index 00000000..1ede2464 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/gpu.nf.test @@ -0,0 +1,213 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_ALIGN_DEDUP_BWAMEM" + script "../main.nf" + workflow "FASTQ_ALIGN_DEDUP_BWAMEM" + config "./nextflow.config" + + tag "gpu" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_align_dedup_bwamem" + tag "parabricks/fq2bam" + tag "samtools/index" + tag "picard/markduplicates" + tag "bwa" + tag "bwa/index" + tag "parabricks/fq2bam" + tag "samtools" + tag "samtools/sort" + tag "samtools/index" + tag "samtools/idxstats" + tag "samtools/flagstat" + tag "samtools/stats" + tag "bam_sort_stats_samtools" + tag "picard/markduplicates" + tag "picard/addorreplacereadgroups" + + setup { + run("BWA_INDEX") { + script "../../../../modules/nf-core/bwa/index/main.nf" + process { + """ + input[0] = Channel.of([ [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + } + + test("Sarscov2 fasta - SE - deduplicate - with GPU parabricks/fq2bam") { + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + input[1] = Channel.of([ + [:], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + input[3] = BWA_INDEX.out.index + input[4] = false // skip_deduplication + input[5] = true // use_gpu + input[6] = "bam" // output_fmt + input[7] = Channel.of([[:], []]) // interval_file + input[8] = Channel.of([[:], []]) // known_sites + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.samtools_flagstat, + workflow.out.samtools_stats, + workflow.out.samtools_index_stats, + workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name }, + workflow.out.multiqc.flatten().collect { path -> file(path).name }, + workflow.out.versions + ).match() + } + ) + } + } + + test("Sarscov2 fasta - SE - skip deduplication - with GPU parabricks/fq2bam") { + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + input[1] = Channel.of([ + [:], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + input[3] = BWA_INDEX.out.index + input[4] = true // skip_deduplication + input[5] = true // use_gpu + input[6] = "bam" // output_fmt + input[7] = Channel.of([[:], []]) // interval_file + input[8] = Channel.of([[:], []]) // known_sites + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.samtools_flagstat, + workflow.out.samtools_stats, + workflow.out.samtools_index_stats, + workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name }, + workflow.out.multiqc.flatten().collect { path -> file(path).name }, + workflow.out.versions + ).match() + } + ) + } + } + + test("Sarscov2 fasta - PE - skip deduplication - with GPU parabricks/fq2bam") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [:], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + input[3] = BWA_INDEX.out.index + input[4] = true // skip_deduplication + input[5] = true // use_gpu + input[6] = "bam" // output_fmt + input[7] = Channel.of([[:], []]) // interval_file + input[8] = Channel.of([[:], []]) // known_sites + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.samtools_flagstat, + workflow.out.samtools_stats, + workflow.out.samtools_index_stats, + workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name }, + workflow.out.multiqc.flatten().collect { path -> file(path).name }, + workflow.out.versions + ).match() + } + ) + } + } + + test("Sarscov2 fasta - SE - skip deduplication - with GPU parabricks/fq2bam - stub") { + options '-stub' + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + input[1] = Channel.of([ + [:], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + input[3] = BWA_INDEX.out.index + input[4] = false // deduplicate + input[5] = true // use_gpu + input[6] = "bam" // output_fmt + input[7] = Channel.of([[:], []]) // interval_file + input[8] = Channel.of([[:], []]) // known_sites + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out, + workflow.out.versions.collect{ path(it).yaml } + ).match() + } + ) + } + } +} diff --git a/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/gpu.nf.test.snap b/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/gpu.nf.test.snap new file mode 100644 index 00000000..0c0ce9d5 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/gpu.nf.test.snap @@ -0,0 +1,332 @@ +{ + "Sarscov2 fasta - SE - deduplicate - with GPU parabricks/fq2bam": { + "content": [ + [ + "b4ac761f4117f0c95693ce61825aa03d" + ], + [ + "test.deduped.sorted.bam.bai" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,1d27922f8027430ae3055f7e7fc1ce36" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,f65552b84f1a36c9ded508c03a493158" + ] + ], + null, + [ + "test.deduped.sorted.MarkDuplicates.metrics.txt" + ], + [ + "test.deduped.sorted.MarkDuplicates.metrics.txt", + "test.flagstat", + "test.idxstats", + "test.stats" + ], + [ + "versions.yml:md5,33ef2596639d7968f743b8cdb697a274", + "versions.yml:md5,6fa0c192669339220d5c5735739188ac", + "versions.yml:md5,71928d7ca7a14123b4b025bab027d798", + "versions.yml:md5,c8ad9ea35566f100e41588f4c271b663", + "versions.yml:md5,e0c30eb274e33f3086ff9e694aae2161" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-12-03T20:40:01.700398606" + }, + "Sarscov2 fasta - PE - skip deduplication - with GPU parabricks/fq2bam": { + "content": [ + [ + "2d64e4363d9f3c0e2167fce49d5087cf" + ], + [ + "test.sorted.bam.bai" + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,18d602435a02a4d721b78d1812622159" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,75934f2a51780a80d2ab4674301a018d" + ] + ], + null, + [ + + ], + [ + "test.flagstat", + "test.idxstats", + "test.stats" + ], + [ + "versions.yml:md5,33ef2596639d7968f743b8cdb697a274", + "versions.yml:md5,71928d7ca7a14123b4b025bab027d798" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-12-03T20:41:36.509584972" + }, + "Sarscov2 fasta - SE - skip deduplication - with GPU parabricks/fq2bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.deduped.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.deduped.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.deduped.sorted.MarkDuplicates.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + "test.deduped.sorted.MarkDuplicates.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ], + [ + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + "versions.yml:md5,33ef2596639d7968f743b8cdb697a274", + "versions.yml:md5,6fa0c192669339220d5c5735739188ac", + "versions.yml:md5,71928d7ca7a14123b4b025bab027d798", + "versions.yml:md5,c8ad9ea35566f100e41588f4c271b663", + "versions.yml:md5,e0c30eb274e33f3086ff9e694aae2161" + ], + "bai": [ + [ + { + "id": "test", + "single_end": true + }, + "test.deduped.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.deduped.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "multiqc": [ + [ + "test.deduped.sorted.MarkDuplicates.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ], + [ + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "picard_metrics": [ + [ + { + "id": "test", + "single_end": true + }, + "test.deduped.sorted.MarkDuplicates.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "samtools_flagstat": [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "samtools_idxstats": [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "samtools_stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,33ef2596639d7968f743b8cdb697a274", + "versions.yml:md5,6fa0c192669339220d5c5735739188ac", + "versions.yml:md5,71928d7ca7a14123b4b025bab027d798", + "versions.yml:md5,c8ad9ea35566f100e41588f4c271b663", + "versions.yml:md5,e0c30eb274e33f3086ff9e694aae2161" + ] + }, + [ + { + "FASTQ_ALIGN_DEDUP_BWAMEM:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX": { + "samtools": "1.22.1" + } + }, + { + "FASTQ_ALIGN_DEDUP_BWAMEM:PICARD_MARKDUPLICATES": { + "picard": "3.4.0" + } + }, + { + "FASTQ_ALIGN_DEDUP_BWAMEM:PARABRICKS_FQ2BAM": { + "pbrun": "4.6.0-1" + } + }, + { + "FASTQ_ALIGN_DEDUP_BWAMEM:SAMTOOLS_INDEX": { + "samtools": "1.22.1" + } + }, + { + "FASTQ_ALIGN_DEDUP_BWAMEM:PICARD_ADDORREPLACEREADGROUPS": { + "picard": "3.4.0" + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-12-03T20:41:52.090425476" + }, + "Sarscov2 fasta - SE - skip deduplication - with GPU parabricks/fq2bam": { + "content": [ + [ + "30c325e1e032eb1782a280d34c0fb1c7" + ], + [ + "test.sorted.bam.bai" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,1d27922f8027430ae3055f7e7fc1ce36" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,f65552b84f1a36c9ded508c03a493158" + ] + ], + null, + [ + + ], + [ + "test.flagstat", + "test.idxstats", + "test.stats" + ], + [ + "versions.yml:md5,33ef2596639d7968f743b8cdb697a274", + "versions.yml:md5,71928d7ca7a14123b4b025bab027d798" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-12-03T20:40:48.92955311" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/main.nf.test b/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/main.nf.test old mode 100644 new mode 100755 index 56c996c5..6debe9a0 --- a/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/main.nf.test @@ -9,29 +9,29 @@ nextflow_workflow { tag "subworkflows_nfcore" tag "subworkflows/fastq_align_dedup_bwamem" tag "subworkflows/fastq_align_bwa" + tag "bwa" tag "bwa/index" tag "bwa/mem" + tag "parabricks/fq2bam" tag "samtools" tag "samtools/sort" tag "samtools/index" + tag "samtools/idxstats" tag "samtools/flagstat" tag "samtools/stats" - tag "samtools/idxstats" tag "bam_sort_stats_samtools" tag "fastq_align_bwa" tag "picard/markduplicates" tag "picard/addorreplacereadgroups" - tag "untar" setup { run("BWA_INDEX") { script "../../../../modules/nf-core/bwa/index/main.nf" process { """ - input[0] = [ - [ id:'genome' ], + input[0] = Channel.value([ [ id:'genome' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] + ]) """ } } @@ -56,7 +56,11 @@ nextflow_workflow { file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ]) input[3] = BWA_INDEX.out.index - input[4] = false // skip_deduplication + input[4] = false // skip_deduplication + input[5] = false // use_gpu + input[6] = "bam" // output_fmt + input[7] = [[:], [] ] // interval_file + input[8] = [[:], [] ] // known_sites """ } } @@ -98,7 +102,11 @@ nextflow_workflow { file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ]) input[3] = BWA_INDEX.out.index - input[4] = false // skip_deduplication + input[4] = false // skip_deduplication + input[5] = false // use_gpu + input[6] = "bam" // output_fmt + input[7] = [[:], [] ] // interval_file + input[8] = [[:], [] ] // known_sites """ } } @@ -140,7 +148,11 @@ nextflow_workflow { file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ]) input[3] = BWA_INDEX.out.index - input[4] = true // skip_deduplication + input[4] = true // skip_deduplication + input[5] = false // use_gpu + input[6] = "bam" // output_fmt + input[7] = [[:], [] ] // interval_file + input[8] = [[:], [] ] // known_sites """ } } @@ -181,7 +193,11 @@ nextflow_workflow { file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ]) input[3] = BWA_INDEX.out.index - input[4] = false // skip_deduplication + input[4] = false // skip_deduplication + input[5] = false // use_gpu + input[6] = "bam" // output_fmt + input[7] = [[:], [] ] // interval_file + input[8] = [[:], [] ] // known_sites """ } } @@ -189,12 +205,11 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, - { assert snapshot( + { assert snapshot( workflow.out, workflow.out.versions.collect{ path(it).yaml } ).match() } ) } } - } diff --git a/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/main.nf.test.snap index 5ee3a073..46fda1f8 100644 --- a/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/main.nf.test.snap @@ -54,7 +54,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.0" }, - "timestamp": "2025-12-03T12:27:38.204299" + "timestamp": "2025-11-03T18:07:22.444845091" }, "Params: bwamem paired-end - skip_deduplication": { "content": [ @@ -107,7 +107,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.0" }, - "timestamp": "2025-12-03T12:28:09.085652" + "timestamp": "2025-11-03T18:07:48.375738382" }, "Params: bwamem single-end - default - stub": { "content": [ @@ -289,7 +289,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.0" }, - "timestamp": "2025-12-03T12:28:25.358403" + "timestamp": "2025-11-03T18:08:01.772454395" }, "Params: bwamem paired-end - default": { "content": [ @@ -346,6 +346,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.0" }, - "timestamp": "2025-12-03T12:27:56.314782" + "timestamp": "2025-11-03T18:07:38.589146631" } } \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/nextflow.config b/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/nextflow.config old mode 100644 new mode 100755 index 755ba1b3..bd5ccae7 --- a/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/nextflow.config +++ b/subworkflows/nf-core/fastq_align_dedup_bwamem/tests/nextflow.config @@ -1,4 +1,7 @@ process { + withName: 'PARABRICKS_FQ2BAM' { + ext.args = '--low-memory' + } withName: 'SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.sorted" } } diff --git a/workflows/methylseq/main.nf b/workflows/methylseq/main.nf index 9630375d..e0d0222d 100644 --- a/workflows/methylseq/main.nf +++ b/workflows/methylseq/main.nf @@ -60,6 +60,10 @@ workflow METHYLSEQ { ch_preseq = channel.empty() ch_multiqc_files = channel.empty() + // FIX: Convert reference channels to Value Channels for reuse + ch_fasta_val = ch_fasta.first() + ch_fasta_index_val = ch_fasta_index.first() + // // Branch channels from input samplesheet channel // @@ -174,28 +178,34 @@ workflow METHYLSEQ { else if (params.aligner == 'bwamem'){ ch_bwamem_inputs = ch_reads - .combine(ch_fasta) - .combine(ch_fasta_index) - .combine(ch_bwamem_index) - .multiMap { meta, reads, meta_fasta, fasta, meta_fasta_index, fasta_index, meta_bwamem, bwamem_index -> - reads: [ meta, reads ] - fasta: [ meta_fasta, fasta ] - fasta_index: [ meta_fasta_index, fasta_index ] - bwamem_index: [ meta_bwamem, bwamem_index ] + .combine(ch_fasta_val) + .combine(ch_fasta_index_val) + .combine(ch_bwamem_index.first()) + .multiMap { meta, reads, _mf, fasta, _mi, fasta_index, _mb, bwamem_index -> + reads: [ meta, reads ] + fasta: [ meta, fasta ] + fasta_index: [ meta, fasta_index ] + bwamem_index: [ meta, bwamem_index ] } + ch_bwamem_inputs.reads.view { "BWAMEM input: ${it[0].id}" } + FASTQ_ALIGN_DEDUP_BWAMEM ( ch_bwamem_inputs.reads, ch_bwamem_inputs.fasta, ch_bwamem_inputs.fasta_index, ch_bwamem_inputs.bwamem_index, - params.skip_deduplication + params.skip_deduplication, + workflow.profile.tokenize(',').intersect(['gpu']).size() >= 1, + "bam", // output_fmt + Channel.of([[:], []]).collect(), // interval_file + Channel.of([[:], []]).collect(), // known_sites ) ch_bam = FASTQ_ALIGN_DEDUP_BWAMEM.out.bam ch_bai = FASTQ_ALIGN_DEDUP_BWAMEM.out.bai ch_aligner_mqc = FASTQ_ALIGN_DEDUP_BWAMEM.out.multiqc - ch_versions = ch_versions.mix(FASTQ_ALIGN_DEDUP_BWAMEM.out.versions.unique{ it.baseName }) + ch_versions = ch_versions.mix(FASTQ_ALIGN_DEDUP_BWAMEM.out.versions) } else {