From 6dc45ac897a9457d00a2bd632d694336f0b40e26 Mon Sep 17 00:00:00 2001 From: erkutilaslan Date: Mon, 28 Oct 2024 11:45:21 +0100 Subject: [PATCH 1/4] add module bbmap/clumpify --- modules.json | 5 ++ .../nf-core/bbmap/clumpify/environment.yml | 5 ++ modules/nf-core/bbmap/clumpify/main.nf | 38 ++++++++++ modules/nf-core/bbmap/clumpify/meta.yml | 56 +++++++++++++++ .../nf-core/bbmap/clumpify/tests/main.nf.test | 72 +++++++++++++++++++ .../bbmap/clumpify/tests/main.nf.test.snap | 49 +++++++++++++ workflows/seqinspector.nf | 12 ++++ 7 files changed, 237 insertions(+) create mode 100644 modules/nf-core/bbmap/clumpify/environment.yml create mode 100644 modules/nf-core/bbmap/clumpify/main.nf create mode 100644 modules/nf-core/bbmap/clumpify/meta.yml create mode 100644 modules/nf-core/bbmap/clumpify/tests/main.nf.test create mode 100644 modules/nf-core/bbmap/clumpify/tests/main.nf.test.snap diff --git a/modules.json b/modules.json index 8e632d5..bf8f5f0 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,11 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "bbmap/clumpify": { + "branch": "master", + "git_sha": "a1abf90966a2a4016d3c3e41e228bfcbd4811ccc", + "installed_by": ["modules"] + }, "fastqc": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", diff --git a/modules/nf-core/bbmap/clumpify/environment.yml b/modules/nf-core/bbmap/clumpify/environment.yml new file mode 100644 index 0000000..a2f6550 --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bbmap=39.10 diff --git a/modules/nf-core/bbmap/clumpify/main.nf b/modules/nf-core/bbmap/clumpify/main.nf new file mode 100644 index 0000000..fc6a85a --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/main.nf @@ -0,0 +1,38 @@ +process BBMAP_CLUMPIFY { + tag "$meta.id" + label 'process_single' + label 'process_high_memory' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bbmap:39.10--h92535d8_0': + 'biocontainers/bbmap:39.10--h92535d8_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path('*.fastq.gz'), emit: reads + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def raw = meta.single_end ? "in=$reads" : "in1=${reads[0]} in2=${reads[1]}" + def clumped = meta.single_end ? "out=${prefix}.clumped.fastq.gz" : "out1=${prefix}_1.clumped.fastq.gz out2=${prefix}_2.clumped.fastq.gz" + """ + clumpify.sh \\ + $raw \\ + $clumped \\ + $args \\ + &> ${prefix}.clumpify.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bbmap/clumpify/meta.yml b/modules/nf-core/bbmap/clumpify/meta.yml new file mode 100644 index 0000000..7db435c --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/meta.yml @@ -0,0 +1,56 @@ +name: bbmap_clumpify +description: Create 30% Smaller, Faster Gzipped Fastq Files. And remove duplicates +keywords: + - clumping fastqs + - smaller fastqs + - deduping + - fastq +tools: + - bbmap: + description: BBMap is a short read aligner, as well as various other bioinformatic + tools. + homepage: https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/clumpify-guide/ + documentation: https://www.biostars.org/p/225338/ + licence: ["UC-LBL license (see package)"] + identifier: biotools:bbmap +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastq.gz": + type: file + description: The reordered/clumped (and if necessary deduped) fastq reads + pattern: "*.clumped.fastq.gz" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: Clumpify log file + pattern: "*clumpify.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@tamuanand" +maintainers: + - "@tamuanand" diff --git a/modules/nf-core/bbmap/clumpify/tests/main.nf.test b/modules/nf-core/bbmap/clumpify/tests/main.nf.test new file mode 100644 index 0000000..f43b876 --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/tests/main.nf.test @@ -0,0 +1,72 @@ + +nextflow_process { + + name "Test Process BBMAP_CLUMPIFY" + script "../main.nf" + process "BBMAP_CLUMPIFY" + + tag "modules" + tag "modules_nfcore" + tag "bbmap" + tag "bbmap/clumpify" + + test("test-bbmap-clumpify-single-end") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.reads, + file(process.out.log[0][1]).name, + process.out.versions + ).match() + } + ) + } + } + + test("test-bbmap-clumpify-paired-end") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.reads, + file(process.out.log[0][1]).name, + process.out.versions + ).match() + } + ) + } + } + +} diff --git a/modules/nf-core/bbmap/clumpify/tests/main.nf.test.snap b/modules/nf-core/bbmap/clumpify/tests/main.nf.test.snap new file mode 100644 index 0000000..e84c345 --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/tests/main.nf.test.snap @@ -0,0 +1,49 @@ +{ + "test-bbmap-clumpify-paired-end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.clumped.fastq.gz:md5,27e51643262c1ef3905c4be184c3814c", + "test_2.clumped.fastq.gz:md5,c70ab7bbd44d6b6fadd6a1a79ef1648f" + ] + ] + ], + "test.clumpify.log", + [ + "versions.yml:md5,fdf0404f694fca43bcf9be6458d927cd" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T16:16:59.10822554" + }, + "test-bbmap-clumpify-single-end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.clumped.fastq.gz:md5,27e51643262c1ef3905c4be184c3814c" + ] + ], + "test.clumpify.log", + [ + "versions.yml:md5,fdf0404f694fca43bcf9be6458d927cd" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T16:16:36.9005326" + } +} \ No newline at end of file diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index ea62811..4b63365 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -4,6 +4,8 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ include { FASTQC } from '../modules/nf-core/fastqc/main' +include { BBMAP_CLUMPIFY } from '../modules/nf-core/bbmap/clumpify/main' + include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_TAG } from '../modules/nf-core/multiqc/main' @@ -39,6 +41,16 @@ workflow SEQINSPECTOR { ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + // + // MODULE: Run BBMAP_CLUMPIFY + // + BBMAP_CLUMPIFY ( + ch_samplesheet + ) +// ch_multiqc_files = ch_multiqc_files.mix(BBMAP_CLUMPIFY.out.zip.collect{it[1]}) + ch_versions = ch_versions.mix(BBMAP_CLUMPIFY.out.versions) + + // // Collate and save software versions // From 445edd1bf793005dbdf774d2da67ecd50f611b33 Mon Sep 17 00:00:00 2001 From: erkutilaslan Date: Mon, 28 Oct 2024 15:28:37 +0100 Subject: [PATCH 2/4] cleanup code --- workflows/seqinspector.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 4b63365..59f19e5 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -47,7 +47,6 @@ workflow SEQINSPECTOR { BBMAP_CLUMPIFY ( ch_samplesheet ) -// ch_multiqc_files = ch_multiqc_files.mix(BBMAP_CLUMPIFY.out.zip.collect{it[1]}) ch_versions = ch_versions.mix(BBMAP_CLUMPIFY.out.versions) From ba4bac0b66a39306cb94212efa519f531e210942 Mon Sep 17 00:00:00 2001 From: erkutilaslan Date: Mon, 28 Oct 2024 16:58:01 +0100 Subject: [PATCH 3/4] fix lint errors --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index c883822..8d85bdc 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1,4 +1,4 @@ -/* +/ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Config file for defining DSL2 per module options and publishing paths ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From b0e25588ad9a42eea02d25efdd3e9f8c150eeaec Mon Sep 17 00:00:00 2001 From: erkutilaslan Date: Tue, 29 Oct 2024 11:41:14 +0100 Subject: [PATCH 4/4] fix comment --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 8d85bdc..c883822 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1,4 +1,4 @@ -/ +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Config file for defining DSL2 per module options and publishing paths ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~