Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 45 additions & 13 deletions subworkflows/nf-core/fastq_align_dedup_bwamem/main.nf
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
include { FASTQ_ALIGN_BWA } from '../fastq_align_bwa/main'
include { BAM_SORT_STATS_SAMTOOLS } from '../../nf-core/bam_sort_stats_samtools/main'
include { FASTQ_ALIGN_BWA } from '../../nf-core/fastq_align_bwa/main'
include { PICARD_ADDORREPLACEREADGROUPS } from '../../../modules/nf-core/picard/addorreplacereadgroups/main'
include { PICARD_MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main'
include { PARABRICKS_FQ2BAM } from '../../../modules/nf-core/parabricks/fq2bam/main'
include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main'

workflow FASTQ_ALIGN_DEDUP_BWAMEM {
Expand All @@ -12,6 +14,8 @@ workflow FASTQ_ALIGN_DEDUP_BWAMEM {
ch_bwamem_index // channel: [ val(meta), [ bwam index ] ]
skip_deduplication // boolean: whether to deduplicate alignments
use_gpu // boolean: whether to use GPU or CPU for bwamem alignment
interval_file // channel: [ val(meta), [ interval file ] ]
known_sites // channel: [ val(meta), [ known sites ] ]

main:

Expand All @@ -23,18 +27,46 @@ workflow FASTQ_ALIGN_DEDUP_BWAMEM {
ch_multiqc_files = Channel.empty()
ch_versions = Channel.empty()

FASTQ_ALIGN_BWA (
ch_reads,
ch_bwamem_index,
true, // val_sort_bam hardcoded to true
ch_fasta
)
ch_alignment = ch_alignment.mix(FASTQ_ALIGN_BWA.out.bam)
ch_alignment_index = ch_alignment.mix(FASTQ_ALIGN_BWA.out.bai)
ch_stats = ch_alignment.mix(FASTQ_ALIGN_BWA.out.stats) // channel: [ val(meta), path(stats) ]
ch_flagstat = ch_alignment.mix(FASTQ_ALIGN_BWA.out.flagstat) // channel: [ val(meta), path(flagstat) ]
ch_idxstats = ch_alignment.mix(FASTQ_ALIGN_BWA.out.idxstats) // channel: [ val(meta), path(idxstats) ]
ch_versions = ch_versions.mix(FASTQ_ALIGN_BWA.out.versions.first())
if (use_gpu) {
/*
* Align with parabricks GPU enabled fq2bammeth implementation of bwameth
*/
PARABRICKS_FQ2BAM (
ch_reads,
ch_fasta,
ch_bwamem_index,
interval_file, // interval file
known_sites, // known sites
'bam' // output format
)
ch_alignment = PARABRICKS_FQ2BAM.out.bam
ch_versions = ch_versions.mix(PARABRICKS_FQ2BAM.out.versions.first())

BAM_SORT_STATS_SAMTOOLS (
ch_alignment,
ch_fasta
)
ch_alignment = BAM_SORT_STATS_SAMTOOLS.out.bam
ch_alignment_index = BAM_SORT_STATS_SAMTOOLS.out.bai
ch_stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ]
ch_flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ]
ch_idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ]
ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions.first())
}
else {
FASTQ_ALIGN_BWA (
ch_reads,
ch_bwamem_index,
true, // val_sort_bam hardcoded to true
ch_fasta
)
ch_alignment = ch_alignment.mix(FASTQ_ALIGN_BWA.out.bam)
ch_alignment_index = ch_alignment.mix(FASTQ_ALIGN_BWA.out.bai)
ch_stats = ch_alignment.mix(FASTQ_ALIGN_BWA.out.stats) // channel: [ val(meta), path(stats) ]
ch_flagstat = ch_alignment.mix(FASTQ_ALIGN_BWA.out.flagstat) // channel: [ val(meta), path(flagstat) ]
ch_idxstats = ch_alignment.mix(FASTQ_ALIGN_BWA.out.idxstats) // channel: [ val(meta), path(idxstats) ]
ch_versions = ch_versions.mix(FASTQ_ALIGN_BWA.out.versions.first())
}

if (!skip_deduplication) {
/*
Expand Down
18 changes: 17 additions & 1 deletion subworkflows/nf-core/fastq_align_dedup_bwamem/meta.yml
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
name: "fastq_align_dedup_bwamem"
description: Performs alignment of DNA or TAPS-treated reads using bwamem, sort and deduplicate
description: Performs alignment of DNA or TAPS-treated reads using bwamem or parabricks/fq2bam, sort and deduplicate
keywords:
- bwamem
- alignment
Expand All @@ -11,9 +11,11 @@ keywords:
- fastq
- bam
components:
- parabricks/fq2bam
- samtools/index
- picard/addorreplacereadgroups
- picard/markduplicates
- bam_sort_stats_samtools
- fastq_align_bwa
input:
- ch_reads:
Expand Down Expand Up @@ -41,6 +43,20 @@ input:
type: boolean
description: |
Skip deduplication of aligned reads
- use_gpu:
type: boolean
description: |
Use GPU for alignment
- interval_file:
type: file
description: |
Structure: [ val(meta), path(interval file) ]
pattern: "*.{bed,intervals}"
- known_sites:
type: file
description: |
Structure: [ val(meta), path(known sites) ]
pattern: "*.{vcf,vcf.gz}"
output:
- bam:
type: file
Expand Down
1 change: 0 additions & 1 deletion subworkflows/nf-core/fastq_align_dedup_bwamem/nextflow.config
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// IMPORTANT: This config file should be included to ensure that the subworkflow works properly.
process {
withName: 'SAMTOOLS_SORT' {
ext.prefix = { "${meta.id}.sorted" }
Expand Down
224 changes: 224 additions & 0 deletions subworkflows/nf-core/fastq_align_dedup_bwamem/tests/gpu.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
nextflow_workflow {

name "Test Subworkflow FASTQ_ALIGN_DEDUP_BWAMEM"
script "../main.nf"
workflow "FASTQ_ALIGN_DEDUP_BWAMEM"
config "./nextflow.config"

tag "gpu"
tag "subworkflows"
tag "subworkflows_nfcore"
tag "subworkflows/fastq_align_dedup_bwamem"
tag "parabricks/fq2bam"
tag "samtools/index"
tag "picard/markduplicates"
tag "untar"

setup {
run("BWA_INDEX") {
script "../../../../modules/nf-core/bwa/index/main.nf"
process {
"""
input[0] = Channel.of([
[ id:'test' ], // meta map
file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true)
])
"""
}
}

run("BWA_INDEX", alias: 'BWA_INDEX_PE') {
script "../../../../modules/nf-core/bwa/index/main.nf"
process {
"""
input[0] = Channel.of([
[ id:'test' ], // meta map
file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true)
])
"""
}
}
}

test("Params: parabricks/fq2bam single-end | use_gpu ") {
when {
params {
use_gpu = true
bwa_prefix = 'genome.fa'
}
workflow {
"""
input[0] = Channel.of([
[ id:'test', single_end:true ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
])
input[1] = Channel.of([
[:],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
])
input[2] = Channel.of([
[:],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
])
input[3] = BWA_INDEX.out.index
input[4] = false // skip_deduplication
input[5] = true // use_gpu
input[6] = Channel.empty() // interval_file
input[7] = Channel.empty() // known_sites
"""
}
}

then {
assertAll(
{ assert workflow.success },
{ assert snapshot(
workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() },
workflow.out.bai.collect { meta, bai -> file(bai).name },
workflow.out.samtools_flagstat,
workflow.out.samtools_stats,
workflow.out.samtools_index_stats,
workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name },
workflow.out.multiqc.flatten().collect { path -> file(path).name },
workflow.out.versions
).match()
}
)
}
}

test("Params: parabricks/fq2bam single-end | use_gpu | skip_deduplication") {
when {
params {
skip_deduplication = true
use_gpu = true
bwa_prefix = 'genome.fa'
}
workflow {
"""
input[0] = Channel.of([
[ id:'test', single_end:true ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
])
input[1] = Channel.of([
[:],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
])
input[2] = Channel.of([
[:],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
])
input[3] = BWA_INDEX.out.index
input[4] = true // skip_deduplication
input[5] = true // use_gpu
input[6] = Channel.empty() // interval_file
input[7] = Channel.empty() // known_sites
"""
}
}

then {
assertAll(
{ assert workflow.success },
{ assert snapshot(
workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() },
workflow.out.bai.collect { meta, bai -> file(bai).name },
workflow.out.samtools_flagstat,
workflow.out.samtools_stats,
workflow.out.samtools_index_stats,
workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name },
workflow.out.multiqc.flatten().collect { path -> file(path).name },
workflow.out.versions
).match()
}
)
}
}

test("Params: parabricks/fq2bam single-end | use_gpu | stub") {
options '-stub'
when {

workflow {
"""
input[0] = Channel.of([
[ id:'test', single_end:true ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
])
input[1] = Channel.of([
[:],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
])
input[2] = Channel.of([
[:],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
])
input[3] = BWA_INDEX.out.index
input[4] = false // skip_deduplication
input[5] = true // use_gpu
input[6] = Channel.empty() // interval_file
input[7] = Channel.empty() // known_sites
"""
}
}

then {
assertAll(
{ assert workflow.success },
{ assert snapshot(workflow.out).match() }
)
}
}

test("Params: parabricks/fq2bam paired-end | use_gpu | skip_deduplication") {

when {
params {
skip_deduplication = true
use_gpu = true
bwa_prefix = 'genome.fa'
}
workflow {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ],
[
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
]
])
input[1] = Channel.of([
[:],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
])
input[2] = Channel.of([
[:],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
])
input[3] = BWA_INDEX_PE.out.index
input[4] = true // skip_deduplication
input[5] = true // use_gpu
input[6] = Channel.empty() // interval_file
input[7] = Channel.empty() // known_sites
"""
}
}

then {
assertAll(
{ assert workflow.success },
{ assert snapshot(
workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() },
workflow.out.bai.collect { meta, bai -> file(bai).name },
workflow.out.samtools_flagstat,
workflow.out.samtools_stats,
workflow.out.samtools_index_stats,
workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name },
workflow.out.multiqc.flatten().collect { path -> file(path).name },
workflow.out.versions
).match()
}
)
}
}
}
Loading
Loading