diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ea1be0..b809631 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c - [#20](https://github.com/nf-core/seqinspector/pull/20) Use tags to generate group reports - [#13](https://github.com/nf-core/seqinspector/pull/13) Generate reports per run, per project and per lane. - [#49](https://github.com/nf-core/seqinspector/pull/49) Merge with template 3.0.2. +- [#47](https://github.com/nf-core/seqinspector/pull/47) Added kraken2 subworkflow - [#56](https://github.com/nf-core/seqinspector/pull/56) Added SeqFu stats module. - [#50](https://github.com/nf-core/seqinspector/pull/50) Add an optional subsampling step. - [#51](https://github.com/nf-core/seqinspector/pull/51) Add nf-test to CI. diff --git a/CITATIONS.md b/CITATIONS.md index 208cfa1..d222ab5 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -14,6 +14,14 @@ > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. +- [Kraken2](https://doi.org/10.1186/s13059-019-1891-0) + + > Wood, D. E., Lu, J., & Langmead, B. (2019). Improved metagenomic analysis with Kraken 2. Genome Biology, 20(1), 257. https://doi.org/10.1186/s13059-019-1891-0 + +- [Krona](https://doi.org/10.1186/1471-2105-12-385) + + > Ondov, B. D., Bergman, N. H., & Phillippy, A. M. (2011). Interactive metagenomic visualization in a Web browser. BMC Bioinformatics, 12. https://doi.org/10.1186/1471-2105-12-385 + - [SeqFu](https://telatin.github.io/seqfu2/) > Telatin A, Fariselli P, Birolo G. SeqFu: A Suite of Utilities for the Robust and Reproducible Manipulation of Sequence Files. Bioengineering 2021, 8, 59. doi.org/10.3390/bioengineering8050059 diff --git a/conf/modules.config b/conf/modules.config index 4a653ed..2f10bfe 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -26,6 +26,14 @@ process { ext.args = '--quiet' } + withName: 'KRAKEN2_KRAKEN2' { + publishDir = [ + path: { "${params.outdir}/kraken2_reports" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'SEQFU_STATS' { ext.args = '' publishDir = [ @@ -35,6 +43,32 @@ process { ] } + withName: 'KRONA_KTUPDATETAXONOMY' { + publishDir = [ + path: { "${params.outdir}/kraken2_reports/krona_reports" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: false + ] + } + + withName: 'KRONA_KTIMPORTTAXONOMY' { + publishDir = [ + path: { "${params.outdir}/kraken2_reports/krona_reports" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'UNTAR' { + publishDir = [ + path: { "${params.outdir}/kraken2_db" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_uncompressed_k2db + ] + } + withName: 'MULTIQC_GLOBAL' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ diff --git a/conf/test.config b/conf/test.config index 0ebc405..4960dcf 100644 --- a/conf/test.config +++ b/conf/test.config @@ -30,4 +30,8 @@ params { // Genome references genome = 'R64-1-1' + + // Kraken options + // Database information: https://github.com/nf-core/test-datasets/blob/taxprofiler/README.md#kraken2 + kraken2_db = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kraken2/testdb-kraken2.tar.gz' } diff --git a/conf/test_full.config b/conf/test_full.config index 53b2288..30f9851 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -21,4 +21,8 @@ params { // Genome references genome = 'R64-1-1' + + // Kraken + // Database information: https://github.com/nf-core/test-datasets/blob/taxprofiler/README.md#kraken2 + kraken2_db = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kraken2/testdb-kraken2.tar.gz' } diff --git a/docs/output.md b/docs/output.md index 3d3c349..b0c141d 100644 --- a/docs/output.md +++ b/docs/output.md @@ -12,6 +12,8 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Seqtk](#seqtk) - Subsample a specific number of reads per sample - [FastQC](#fastqc) - Raw read QC +- [Kraken2](#kraken2) - Phylogenetic assignment of reads using k-mers +- [Krona](#krona) - Interactive visualization of Kraken2 results - [SeqFu Stats](#seqfu_stats) - Statistics for FASTA or FASTQ files - [FastQ Screen](#fastqscreen) - Mapping against a set of references for basic contamination QC - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline @@ -42,6 +44,35 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). +### Kraken2 + +[Kraken](https://ccb.jhu.edu/software/kraken2/) is a taxonomic sequence classifier that assigns taxonomic labels to DNA sequences. Kraken examines the k-mers within a query sequence and uses the information within those k-mers to query a database. That database maps -mers to the lowest common ancestor (LCA) of all genomes known to contain a given k-mer. + +- `kraken2/` + - `.kraken2.report.txt`: A report containing information on the phylogenetic assignment of reads in a given sample. + - `/` + - `_.classified.fastq.gz`: FASTQ file containing all reads that had a hit against a reference in the database for a given sample + - `_.unclassified.fastq.gz`: FASTQ file containing all reads that did not have a hit in the database for a given sample + - `_.classifiedreads.txt`: A list of read IDs and the hits each read had against each database for a given sample + + + +The main taxonomic classification file from Kraken2 is the `*report.txt` file. It gives you the most information for a single sample. +You will only receive the `.fastq` and `*classifiedreads.txt` file if you supply `--kraken2_save_reads` and/or `--kraken2_save_readclassifications` parameters to the pipeline. + +### Krona + +[Krona](https://github.com/marbl/Krona) allows the exploration of (metagenomic) hierarchical data with interactive zooming, multi-layered pie charts. + +Krona charts will be generated by the pipeline for supported tools (Kraken2, Centrifuge, Kaiju, and MALT) + +- `krona/` + - `_.html`: per-tool/per-database interactive HTML file containing hierarchical piecharts + + + +The resulting HTML files can be loaded into your web browser for exploration. Each file will have a dropdown to allow you to switch between each sample aligned against the given database of the tool. + ### FastQ Screen
diff --git a/modules.json b/modules.json index 28c9108..75fa278 100644 --- a/modules.json +++ b/modules.json @@ -20,6 +20,21 @@ "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, + "kraken2/kraken2": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "krona/ktimporttaxonomy": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "krona/ktupdatetaxonomy": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "fastqscreen/fastqscreen": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", @@ -36,6 +51,11 @@ "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, + "untar": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "seqtk/sample": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", diff --git a/modules/nf-core/kraken2/kraken2/environment.yml b/modules/nf-core/kraken2/kraken2/environment.yml new file mode 100644 index 0000000..ba776d3 --- /dev/null +++ b/modules/nf-core/kraken2/kraken2/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::kraken2=2.1.3" + - "coreutils=9.4" + - "pigz=2.8" diff --git a/modules/nf-core/kraken2/kraken2/main.nf b/modules/nf-core/kraken2/kraken2/main.nf new file mode 100644 index 0000000..364a6fe --- /dev/null +++ b/modules/nf-core/kraken2/kraken2/main.nf @@ -0,0 +1,85 @@ +process KRAKEN2_KRAKEN2 { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-8706a1dd73c6cc426e12dd4dd33a5e917b3989ae:c8cbdc8ff4101e6745f8ede6eb5261ef98bdaff4-0' : + 'biocontainers/mulled-v2-8706a1dd73c6cc426e12dd4dd33a5e917b3989ae:c8cbdc8ff4101e6745f8ede6eb5261ef98bdaff4-0' }" + + input: + tuple val(meta), path(reads) + path db + val save_output_fastqs + val save_reads_assignment + + output: + tuple val(meta), path('*.classified{.,_}*') , optional:true, emit: classified_reads_fastq + tuple val(meta), path('*.unclassified{.,_}*') , optional:true, emit: unclassified_reads_fastq + tuple val(meta), path('*classifiedreads.txt') , optional:true, emit: classified_reads_assignment + tuple val(meta), path('*report.txt') , emit: report + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "" : "--paired" + def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" + def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" + def classified_option = save_output_fastqs ? "--classified-out ${classified}" : "" + def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : "" + def readclassification_option = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : "--output /dev/null" + def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : "" + + """ + kraken2 \\ + --db $db \\ + --threads $task.cpus \\ + --report ${prefix}.kraken2.report.txt \\ + --gzip-compressed \\ + $unclassified_option \\ + $classified_option \\ + $readclassification_option \\ + $paired \\ + $args \\ + $reads + + $compress_reads_command + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "" : "--paired" + def classified = meta.single_end ? "${prefix}.classified.fastq.gz" : "${prefix}.classified_1.fastq.gz ${prefix}.classified_2.fastq.gz" + def unclassified = meta.single_end ? "${prefix}.unclassified.fastq.gz" : "${prefix}.unclassified_1.fastq.gz ${prefix}.unclassified_2.fastq.gz" + def readclassification_option = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : "--output /dev/null" + def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : "" + + """ + touch ${prefix}.kraken2.report.txt + if [ "$save_output_fastqs" == "true" ]; then + touch $classified + touch $unclassified + fi + if [ "$save_reads_assignment" == "true" ]; then + touch ${prefix}.kraken2.classifiedreads.txt + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/kraken2/kraken2/meta.yml b/modules/nf-core/kraken2/kraken2/meta.yml new file mode 100644 index 0000000..8693764 --- /dev/null +++ b/modules/nf-core/kraken2/kraken2/meta.yml @@ -0,0 +1,99 @@ +name: kraken2_kraken2 +description: Classifies metagenomic sequence data +keywords: + - classify + - metagenomics + - fastq + - db +tools: + - kraken2: + description: | + Kraken2 is a taxonomic sequence classifier that assigns taxonomic labels to sequence reads + homepage: https://ccb.jhu.edu/software/kraken2/ + documentation: https://github.com/DerrickWood/kraken2/wiki/Manual + doi: 10.1186/s13059-019-1891-0 + licence: ["MIT"] + identifier: biotools:kraken2 +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - - db: + type: directory + description: Kraken2 database + - - save_output_fastqs: + type: string + description: | + If true, optional commands are added to save classified and unclassified reads + as fastq files + - - save_reads_assignment: + type: string + description: | + If true, an optional command is added to save a file reporting the taxonomic + classification of each input read +output: + - classified_reads_fastq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.classified{.,_}*": + type: file + description: | + Reads classified as belonging to any of the taxa + on the Kraken2 database. + pattern: "*{fastq.gz}" + - unclassified_reads_fastq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.unclassified{.,_}*": + type: file + description: | + Reads not classified to any of the taxa + on the Kraken2 database. + pattern: "*{fastq.gz}" + - classified_reads_assignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*classifiedreads.txt": + type: file + description: | + Kraken2 output file indicating the taxonomic assignment of + each input read + - report: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*report.txt": + type: file + description: | + Kraken2 report containing stats about classified + and not classifed reads. + pattern: "*.{report.txt}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/kraken2/kraken2/tests/main.nf.test b/modules/nf-core/kraken2/kraken2/tests/main.nf.test new file mode 100644 index 0000000..c0843df --- /dev/null +++ b/modules/nf-core/kraken2/kraken2/tests/main.nf.test @@ -0,0 +1,143 @@ +nextflow_process { + name "Test Process KRAKEN2_KRAKEN2" + script "../main.nf" + process "KRAKEN2_KRAKEN2" + tag "modules" + tag "modules_nfcore" + tag "untar" + tag "kraken2" + tag "kraken2/kraken2" + + setup { + run("UNTAR") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = Channel.of([ + [], + file( + params.modules_testdata_base_path + "genomics/sarscov2/genome/db/kraken2.tar.gz", + checkIfExists: true + ) + ]) + """ + } + } + } + + test("sarscov2 illumina single end [fastq]") { + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file( + params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", + checkIfExists: true + )] + ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = true + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.report, + process.out.versions, + ).match() + }, + { assert process.out.classified_reads_fastq.get(0).get(1) ==~ ".*/test.classified.fastq.gz" }, + { assert process.out.unclassified_reads_fastq.get(0).get(1) ==~ ".*/test.unclassified.fastq.gz" }, + ) + } + } + + test("sarscov2 illumina paired end [fastq]") { + when { + params { + outdir = "$outputDir" + } + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file( + params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", + checkIfExists: true + ), + file( + params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", + checkIfExists: true + ) + + ] + ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = true + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.report, + process.out.versions, + ).match() + }, + { assert process.out.classified_reads_fastq.get(0).get(1).get(0) + ==~ ".*/test.classified_1.fastq.gz" }, + { assert process.out.classified_reads_fastq.get(0).get(1).get(1) + ==~ ".*/test.classified_2.fastq.gz" }, + { assert process.out.unclassified_reads_fastq.get(0).get(1).get(0) + ==~ ".*/test.unclassified_1.fastq.gz" }, + { assert process.out.unclassified_reads_fastq.get(0).get(1).get(1) + ==~ ".*/test.unclassified_2.fastq.gz" }, + ) + } + } + + test("sarscov2 illumina single end [fastq] + save_reads_assignment") { + when { + params { + outdir = "$outputDir" + } + + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file( + params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", + checkIfExists: true + )] + ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = false + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.report, + process.out.classified_reads_assignment, + process.out.versions, + ).match() + }, + ) + } + } +} diff --git a/modules/nf-core/kraken2/kraken2/tests/main.nf.test.snap b/modules/nf-core/kraken2/kraken2/tests/main.nf.test.snap new file mode 100644 index 0000000..b432f87 --- /dev/null +++ b/modules/nf-core/kraken2/kraken2/tests/main.nf.test.snap @@ -0,0 +1,74 @@ +{ + "sarscov2 illumina single end [fastq]": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.kraken2.report.txt:md5,4227755fe40478b8d7dc8634b489761e" + ] + ], + [ + "versions.yml:md5,79adf2ca1cfc625cb77e391b27142c43" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-04T18:47:03.745692" + }, + "sarscov2 illumina paired end [fastq]": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.kraken2.report.txt:md5,4227755fe40478b8d7dc8634b489761e" + ] + ], + [ + "versions.yml:md5,79adf2ca1cfc625cb77e391b27142c43" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-04T18:47:13.75649" + }, + "sarscov2 illumina single end [fastq] + save_reads_assignment": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.kraken2.report.txt:md5,4227755fe40478b8d7dc8634b489761e" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.kraken2.classifiedreads.txt:md5,e7a90531f0d8d777316515c36fe4cae0" + ] + ], + [ + "versions.yml:md5,79adf2ca1cfc625cb77e391b27142c43" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-04T18:47:22.459465" + } +} \ No newline at end of file diff --git a/modules/nf-core/kraken2/kraken2/tests/tags.yml b/modules/nf-core/kraken2/kraken2/tests/tags.yml new file mode 100644 index 0000000..9ebfd7a --- /dev/null +++ b/modules/nf-core/kraken2/kraken2/tests/tags.yml @@ -0,0 +1,3 @@ +kraken2/kraken2: + - modules/nf-core/kraken2/kraken2/** + - modules/nf-core/untar/** diff --git a/modules/nf-core/krona/ktimporttaxonomy/environment.yml b/modules/nf-core/krona/ktimporttaxonomy/environment.yml new file mode 100644 index 0000000..342c589 --- /dev/null +++ b/modules/nf-core/krona/ktimporttaxonomy/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::krona=2.8.1 diff --git a/modules/nf-core/krona/ktimporttaxonomy/main.nf b/modules/nf-core/krona/ktimporttaxonomy/main.nf new file mode 100644 index 0000000..16f3cd9 --- /dev/null +++ b/modules/nf-core/krona/ktimporttaxonomy/main.nf @@ -0,0 +1,54 @@ +process KRONA_KTIMPORTTAXONOMY { + tag "${meta.id}" + label 'process_single' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krona:2.8.1--pl5321hdfd78af_1': + 'biocontainers/krona:2.8.1--pl5321hdfd78af_1' }" + + input: + tuple val(meta), path(report) + path taxonomy, stageAs: 'taxonomy.tab' + + output: + tuple val(meta), path ('*.html'), emit: html + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.8.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + TAXONOMY=\$(find -L . -name '*.tab' -exec dirname {} \\;) + echo \$TAXONOMY + + ktImportTaxonomy \\ + $args \\ + -o ${prefix}.html \\ + -tax \$TAXONOMY/ \\ + $report + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krona: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.8.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krona: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/krona/ktimporttaxonomy/meta.yml b/modules/nf-core/krona/ktimporttaxonomy/meta.yml new file mode 100644 index 0000000..87f8478 --- /dev/null +++ b/modules/nf-core/krona/ktimporttaxonomy/meta.yml @@ -0,0 +1,57 @@ +name: krona_ktimporttaxonomy +description: KronaTools Import Taxonomy imports taxonomy classifications and produces + an interactive Krona plot. +keywords: + - plot + - taxonomy + - interactive + - html + - visualisation + - krona chart +tools: + - krona: + description: Krona Tools is a set of scripts to create Krona charts from several + Bioinformatics tools as well as from text and XML files. + homepage: https://github.com/marbl/Krona/wiki/KronaTools + documentation: http://manpages.ubuntu.com/manpages/impish/man1/ktImportTaxonomy.1.html + doi: 10.1186/1471-2105-12-385 + identifier: biotools:krona +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - report: + type: file + description: "A tab-delimited file with taxonomy IDs and (optionally) query + IDs, magnitudes, and scores. Query IDs are taken from column 1, taxonomy + IDs from column 2, and scores from column 3. Lines beginning with # will + be ignored." + pattern: "*.{tsv}" + - - taxonomy: + type: file + description: | + Path to a Krona taxonomy .tab file normally downloaded and generated by + krona/ktUpdateTaxonomy. Custom taxonomy files can have any name, but + must end in `.tab`. + pattern: "*tab" +output: + - html: + - meta: + type: file + description: A html file containing an interactive krona plot. + pattern: "*.{html}" + - "*.html": + type: file + description: A html file containing an interactive krona plot. + pattern: "*.{html}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@mjakobs" +maintainers: + - "@mjakobs" diff --git a/modules/nf-core/krona/ktimporttaxonomy/tests/main.nf.test b/modules/nf-core/krona/ktimporttaxonomy/tests/main.nf.test new file mode 100644 index 0000000..1068f30 --- /dev/null +++ b/modules/nf-core/krona/ktimporttaxonomy/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_process { + + name "Test Process KRONA_KTIMPORTTAXONOMY" + script "../main.nf" + process "KRONA_KTIMPORTTAXONOMY" + + tag "modules" + tag "modules_nfcore" + tag "krona" + tag "krona/ktimporttaxonomy" + + test ("sarscov2 - metagenome - kraken report") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true) + ]) + input[1] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/krona_taxonomy.tab', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot ( process.out.versions ).match() }, + { assert file(process.out.html.get(0).get(1)).exists() } + ) + } + } + + test ("sarscov2 - metagenome - kraken report - stub") { + + options '-stub' + when { + process { + """ + input[0] = Channel.of([ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true) + ]) + input[1] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/krona_taxonomy.tab', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } +} diff --git a/modules/nf-core/krona/ktimporttaxonomy/tests/main.nf.test.snap b/modules/nf-core/krona/ktimporttaxonomy/tests/main.nf.test.snap new file mode 100644 index 0000000..61fba86 --- /dev/null +++ b/modules/nf-core/krona/ktimporttaxonomy/tests/main.nf.test.snap @@ -0,0 +1,47 @@ +{ + "sarscov2 - metagenome - kraken report": { + "content": [ + [ + "versions.yml:md5,59fc89b6db8fad0aa9aa06f7437a18a7" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-08-08T10:34:23.760055" + }, + "sarscov2 - metagenome - kraken report - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,59fc89b6db8fad0aa9aa06f7437a18a7" + ], + "html": [ + [ + { + "id": "test" + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,59fc89b6db8fad0aa9aa06f7437a18a7" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-08-08T10:34:29.695251" + } +} diff --git a/modules/nf-core/krona/ktimporttaxonomy/tests/tags.yml b/modules/nf-core/krona/ktimporttaxonomy/tests/tags.yml new file mode 100644 index 0000000..1112970 --- /dev/null +++ b/modules/nf-core/krona/ktimporttaxonomy/tests/tags.yml @@ -0,0 +1,2 @@ +krona/ktimporttaxonomy: + - "modules/nf-core/krona/ktimporttaxonomy/**" diff --git a/modules/nf-core/krona/ktupdatetaxonomy/environment.yml b/modules/nf-core/krona/ktupdatetaxonomy/environment.yml new file mode 100644 index 0000000..cb06934 --- /dev/null +++ b/modules/nf-core/krona/ktupdatetaxonomy/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::krona=2.7.1 diff --git a/modules/nf-core/krona/ktupdatetaxonomy/main.nf b/modules/nf-core/krona/ktupdatetaxonomy/main.nf new file mode 100644 index 0000000..2586f9c --- /dev/null +++ b/modules/nf-core/krona/ktupdatetaxonomy/main.nf @@ -0,0 +1,44 @@ +def VERSION='2.7.1' // Version information not provided by tool on CLI + +process KRONA_KTUPDATETAXONOMY { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krona:2.7.1--pl526_5' : + 'biocontainers/krona:2.7.1--pl526_5' }" + + input: + + output: + path 'taxonomy/taxonomy.tab', emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + ktUpdateTaxonomy.sh \\ + $args \\ + taxonomy/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krona: $VERSION + END_VERSIONS + """ + + stub: + """ + mkdir taxonomy + + touch taxonomy/taxonomy.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krona: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/krona/ktupdatetaxonomy/meta.yml b/modules/nf-core/krona/ktupdatetaxonomy/meta.yml new file mode 100644 index 0000000..d7b5e80 --- /dev/null +++ b/modules/nf-core/krona/ktupdatetaxonomy/meta.yml @@ -0,0 +1,32 @@ +name: krona_ktupdatetaxonomy +description: KronaTools Update Taxonomy downloads a taxonomy database +keywords: + - database + - taxonomy + - krona + - visualisation +tools: + - krona: + description: Krona Tools is a set of scripts to create Krona charts from several + Bioinformatics tools as well as from text and XML files. + homepage: https://github.com/marbl/Krona/wiki/KronaTools + documentation: https://github.com/marbl/Krona/wiki/Installing + doi: + 10.1186/1471-2105-12-385 + # There is no input. This module downloads a pre-built taxonomy database for use with Krona Tools. + identifier: biotools:krona +output: + - db: + - taxonomy/taxonomy.tab: + type: file + description: A TAB separated file that contains a taxonomy database. + pattern: "*.{tab}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@mjakobs" +maintainers: + - "@mjakobs" diff --git a/modules/nf-core/krona/ktupdatetaxonomy/tests/main.nf.test b/modules/nf-core/krona/ktupdatetaxonomy/tests/main.nf.test new file mode 100644 index 0000000..672e82d --- /dev/null +++ b/modules/nf-core/krona/ktupdatetaxonomy/tests/main.nf.test @@ -0,0 +1,54 @@ + +nextflow_process { + + name "Test Process KRONA_KTUPDATETAXONOMY" + script "../main.nf" + process "KRONA_KTUPDATETAXONOMY" + + tag "modules" + tag "modules_nfcore" + tag "krona" + tag "krona/ktupdatetaxonomy" + + test("test-krona-ktupdatetaxonomy") { + + when { + process { + """ + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.db[0]).name, //unstable + process.out.versions + ).match() + } + ) + } + } + + test("test-krona-ktupdatetaxonomy-stub") { + options '-stub' + + when { + process { + """ + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/krona/ktupdatetaxonomy/tests/main.nf.test.snap b/modules/nf-core/krona/ktupdatetaxonomy/tests/main.nf.test.snap new file mode 100644 index 0000000..fba9392 --- /dev/null +++ b/modules/nf-core/krona/ktupdatetaxonomy/tests/main.nf.test.snap @@ -0,0 +1,38 @@ +{ + "test-krona-ktupdatetaxonomy": { + "content": [ + "taxonomy.tab", + [ + "versions.yml:md5,a0e095fdd3ba80fcc62188c4c1f38ff7" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-06T20:17:37.154632" + }, + "test-krona-ktupdatetaxonomy-stub": { + "content": [ + { + "0": [ + "taxonomy.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + "versions.yml:md5,a0e095fdd3ba80fcc62188c4c1f38ff7" + ], + "db": [ + "taxonomy.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,a0e095fdd3ba80fcc62188c4c1f38ff7" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-05T20:15:46.959212" + } +} \ No newline at end of file diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 0000000..c779485 --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 0000000..9bd8f55 --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,84 @@ +process UNTAR { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C $prefix --strip-components 1 \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir ${prefix} + ## Dry-run untaring the archive to get the files and place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch \${i} + else + mkdir -p \${i} + fi + done + else + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch ${prefix}/\${i} + else + mkdir -p ${prefix}/\${i} + fi + done + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml new file mode 100644 index 0000000..290346b --- /dev/null +++ b/modules/nf-core/untar/meta.yml @@ -0,0 +1,49 @@ +name: untar +description: Extract files. +keywords: + - untar + - uncompress + - extract +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - untar: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - $prefix: + type: directory + description: Directory containing contents of archive + pattern: "*/" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test new file mode 100644 index 0000000..c957517 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test @@ -0,0 +1,85 @@ +nextflow_process { + + name "Test Process UNTAR" + script "../main.nf" + process "UNTAR" + tag "modules" + tag "modules_nfcore" + tag "untar" + + test("test_untar") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar_onlyfiles") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar_onlyfiles - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } +} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap new file mode 100644 index 0000000..ceb91b7 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -0,0 +1,158 @@ +{ + "test_untar_onlyfiles": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:28.231047" + }, + "test_untar_onlyfiles - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:45.773103" + }, + "test_untar - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:36.777441" + }, + "test_untar": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:19.377674" + } +} \ No newline at end of file diff --git a/modules/nf-core/untar/tests/tags.yml b/modules/nf-core/untar/tests/tags.yml new file mode 100644 index 0000000..feb6f15 --- /dev/null +++ b/modules/nf-core/untar/tests/tags.yml @@ -0,0 +1,2 @@ +untar: + - modules/nf-core/untar/** diff --git a/nextflow.config b/nextflow.config index bb1cb27..05e3bce 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,6 +20,12 @@ params { igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false + // Kraken2 options + kraken2_db = null + kraken2_save_reads = false + kraken2_save_readclassifications = false + save_uncompressed_k2db = false + // Fastqscreen options fastq_screen_references = './assets/example_fastq_screen_references.csv' diff --git a/nextflow_schema.json b/nextflow_schema.json index 248660b..2c9bd98 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -48,6 +48,18 @@ "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", "fa_icon": "fas fa-file-signature" }, + "kraken2_save_reads": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Turn on saving of Kraken2-aligned reads", + "help_text": "Save reads that do and do not have a taxonomic classification in your output results directory in FASTQ format.\n\n> Modifies tool parameter(s):\n> - kraken2: `--classified-out` and `--unclassified-out`" + }, + "kraken2_save_readclassifications": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Turn on saving of Kraken2 per-read taxonomic assignment file", + "help_text": "Save a text file that contains a list of each read that had a taxonomic assignment, with information on specific taxonomic taxonomic assignment that that read recieved.\n\n> Modifies tool parameter(s):\n> - kraken2: `--output`" + }, "skip_tools": { "type": "string", "description": "Comma-separated string of tools to skip", @@ -84,6 +96,13 @@ "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." }, + "kraken2_db": { + "type": "string", + "exists": true, + "mimetype": "text/plain", + "description": "Path to Kraken2 database file, either a gzipped file or the path to the uncompressed database.", + "fa_icon": "fas fa-database" + }, "igenomes_base": { "type": "string", "format": "directory-path", @@ -259,5 +278,10 @@ { "$ref": "#/$defs/generic_options" } - ] + ], + "properties": { + "save_uncompressed_k2db": { + "type": "boolean" + } + } } diff --git a/subworkflows/local/phylogenetic_qc.nf b/subworkflows/local/phylogenetic_qc.nf new file mode 100644 index 0000000..67c8bb2 --- /dev/null +++ b/subworkflows/local/phylogenetic_qc.nf @@ -0,0 +1,53 @@ +// +// Seqinspector Phylogenetic classification of reads, to check for contamination and adjacent issues +// + +include { UNTAR as UNTAR_KRAKEN2_DB } from '../../modules/nf-core/untar/main.nf' +include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/kraken2/kraken2/main.nf' +include { KRONA_KTUPDATETAXONOMY } from '../../modules/nf-core/krona/ktupdatetaxonomy/main.nf' +include { KRONA_KTIMPORTTAXONOMY } from '../../modules/nf-core/krona/ktimporttaxonomy/main.nf' + +workflow PHYLOGENETIC_QC{ + take: + reads + + main: + ch_reads = reads + ch_versions = Channel.empty() + // + // MODULE: Untar kraken2_db or read it as it is if not compressed + // + if (params.kraken2_db.endsWith('.gz')) { + UNTAR_KRAKEN2_DB ( [ [:], params.kraken2_db ]) + ch_kraken2_db = UNTAR_KRAKEN2_DB.out.untar.map { it[1] } + ch_versions = ch_versions.mix(UNTAR_KRAKEN2_DB.out.versions) + } else { + ch_kraken2_db = Channel.fromPath(params.kraken2_db, checkIfExists: true) + ch_kraken2_db = ch_kraken2_db.collect() + } + + // + // MODULE: Perform kraken2 + // + KRAKEN2_KRAKEN2 ( + ch_reads, + ch_kraken2_db, + params.kraken2_save_reads, + params.kraken2_save_readclassifications + ) + ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions) + + // + // MODULE: krona plot the kraken2 reports + // + KRONA_KTUPDATETAXONOMY() + KRONA_KTIMPORTTAXONOMY ( + KRAKEN2_KRAKEN2.out.report, + KRONA_KTUPDATETAXONOMY.out.db + ) + + emit: + versions = ch_versions + mqc = KRAKEN2_KRAKEN2.out.report + krona_plots = KRONA_KTIMPORTTAXONOMY.out.html.collect() +} diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap index 71bf40c..baeee8e 100644 --- a/tests/MiSeq.main.nf.test.snap +++ b/tests/MiSeq.main.nf.test.snap @@ -228,4 +228,4 @@ }, "timestamp": "2025-02-03T16:40:27.8664" } -} \ No newline at end of file +} diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap index 6838297..47e147e 100644 --- a/tests/NovaSeq6000.main.nf.test.snap +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -589,4 +589,4 @@ }, "timestamp": "2025-02-04T10:10:13.268943" } -} \ No newline at end of file +} diff --git a/tests/NovaSeq6000.main_subsample.nf.test.snap b/tests/NovaSeq6000.main_subsample.nf.test.snap index 3ba5046..b669ead 100644 --- a/tests/NovaSeq6000.main_subsample.nf.test.snap +++ b/tests/NovaSeq6000.main_subsample.nf.test.snap @@ -28,4 +28,4 @@ }, "timestamp": "2024-12-05T15:17:50.525033752" } -} \ No newline at end of file +} diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap index c256a37..e77f2c9 100644 --- a/tests/PromethION.main.nf.test.snap +++ b/tests/PromethION.main.nf.test.snap @@ -140,4 +140,4 @@ }, "timestamp": "2025-02-04T10:12:21.446251" } -} \ No newline at end of file +} diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index fc9b8f7..0e45875 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -14,11 +14,12 @@ include { FASTQSCREEN_FASTQSCREEN } from '../modules/nf-core/fastqscreen/f include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_TAG } from '../modules/nf-core/multiqc/main' +include { PHYLOGENETIC_QC } from '../subworkflows/local/phylogenetic_qc' + include { paramsSummaryMap } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqinspector_pipeline' - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -103,11 +104,21 @@ workflow SEQINSPECTOR { ch_versions = ch_versions.mix(FASTQSCREEN_FASTQSCREEN.out.versions.first()) } + // + // SUBWORKFLOW: Run kraken2 and produce krona plots + // + PHYLOGENETIC_QC ( + ch_samplesheet + ) + ch_multiqc_files = ch_multiqc_files.mix(PHYLOGENETIC_QC.out.mqc) + ch_versions = ch_versions.mix(PHYLOGENETIC_QC.out.versions.first()) + + // // Collate and save software versions // softwareVersionsToYAML(ch_versions) - .collectFile( + .collectFile( storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_' + 'seqinspector_software_' + 'mqc_' + 'versions.yml', sort: true,