diff --git a/CHANGELOG.md b/CHANGELOG.md index d7eea13..e92aa43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,8 +21,10 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c - [#53](https://github.com/nf-core/seqinspector/pull/53) Add FastQ-Screen database multiplexing and limit scope of nf-test in CI. - [#96](https://github.com/nf-core/seqinspector/pull/96) Added missing citations to citation tool - [#103](https://github.com/nf-core/seqinspector/pull/103) Configure full-tests +- [#109](https://github.com/nf-core/seqinspector/pull/109) Adds ToulligQC module for long read QC - [#110](https://github.com/nf-core/seqinspector/pull/110) Update input schema to accept either tar file or directory as rundir, and fastq messages and patterns. + ### `Fixed` - [#71](https://github.com/nf-core/seqinspector/pull/71) FASTQSCREEN does not fail when multiple reads are provided. diff --git a/CITATIONS.md b/CITATIONS.md index 208cfa1..b477943 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -28,6 +28,8 @@ - [Seqtk](https://github.com/lh3/seqtk) +- [ToulligQC](https://github.com/GenomiqueENS/toulligQ) + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index e960d07..40b5fdd 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -2,6 +2,7 @@ report_comment: > This report has been generated by the nf-core/seqinspector analysis pipeline. For information about how to interpret these results, please see the documentation. + If ToulligQC was used, a separate report is available in the results folder. report_section_order: "nf-core-seqinspector-methods-description": order: -1000 diff --git a/conf/modules.config b/conf/modules.config index 5a0b7cc..d5e8abb 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -35,6 +35,15 @@ process { ] } + withName: 'TOULLIGQC' { + ext.args = '' + publishDir = [ + path: { "${params.outdir}/toulligqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'MULTIQC_GLOBAL' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ diff --git a/docs/output.md b/docs/output.md index 3d3c349..2da9539 100644 --- a/docs/output.md +++ b/docs/output.md @@ -80,6 +80,17 @@ The `.csv` is provided as a pipeline parameter `fastq_screen_references` and is [SeqFu](https://telatin.github.io/seqfu2/) is general-purpose program to manipulate and parse information from FASTA/FASTQ files, supporting gzipped input files. Includes functions to interleave and de-interleave FASTQ files, to rename sequences and to count and print statistics on sequence lengths. In this pipeline, the `seqfu stats` module is used to produce general quality metrics statistics. +### ToulligQC + +
+Output files + +- `toulligqc/` + - `*.data`: ToulligQC output text file containing log information and all analysis results + - `*.html`: ToulligQC html report file + +[ToulligQC](https://github.com/GenomiqueENS/toulligQC) is dedicated to the QC analyses of Oxford Nanopore runs. This software is written in Python and developped by the GenomiqueENS core facility of the Institute of Biology of the Ecole Normale Superieure (IBENS). + ### MultiQC nf-core/seqinspector will generate the following MultiQC reports: diff --git a/modules.json b/modules.json index 28c9108..36305dd 100644 --- a/modules.json +++ b/modules.json @@ -40,6 +40,11 @@ "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] + }, + "toulligqc": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"] } } }, diff --git a/modules/nf-core/toulligqc/environment.yml b/modules/nf-core/toulligqc/environment.yml new file mode 100644 index 0000000..e1632a8 --- /dev/null +++ b/modules/nf-core/toulligqc/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::toulligqc=2.5.6 diff --git a/modules/nf-core/toulligqc/main.nf b/modules/nf-core/toulligqc/main.nf new file mode 100644 index 0000000..71ced04 --- /dev/null +++ b/modules/nf-core/toulligqc/main.nf @@ -0,0 +1,63 @@ +process TOULLIGQC { + label 'process_low' + tag "$meta.id" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/toulligqc:2.5.6--pyhdfd78af_0': + 'biocontainers/toulligqc:2.5.6--pyhdfd78af_0' }" + + input: + + tuple val(meta), path(ontfile) + + + output: + tuple val(meta), path("*/*.data") , emit: report_data + tuple val(meta), path("*/*.html") , emit: report_html, optional: true + tuple val(meta), path("*/images/*.html") , emit: plots_html + tuple val(meta), path("*/images/plotly.min.js") , emit: plotly_js + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def input_file = ("$ontfile".endsWith(".fastq") || "$ontfile".endsWith(".fastq.gz") || "$ontfile".endsWith(".fq") || "$ontfile".endsWith(".fq.gz")) ? "--fastq ${ontfile}" : + ("$ontfile".endsWith(".txt") || "$ontfile".endsWith(".txt.gz")) ? "--sequencing-summary-source ${ontfile}" : + ("$ontfile".endsWith(".bam")) ? "--bam ${ontfile}" : '' + + """ + toulligqc \\ + $input_file \\ + --output-directory ${prefix} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + toulligqc: \$(toulligqc --version) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${prefix} + mkdir ${prefix}/images + touch ${prefix}/report.data + touch ${prefix}/images/Correlation_between_read_length_and_PHRED_score.html + touch ${prefix}/images/Distribution_of_read_lengths.html + touch ${prefix}/images/PHRED_score_density_distribution.html + touch ${prefix}/images/Read_count_histogram.html + touch ${prefix}/images/plotly.min.js + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + toulligqc: \$(toulligqc --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/toulligqc/meta.yml b/modules/nf-core/toulligqc/meta.yml new file mode 100644 index 0000000..b269ecc --- /dev/null +++ b/modules/nf-core/toulligqc/meta.yml @@ -0,0 +1,76 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "toulligqc" +description: "A post sequencing QC tool for Oxford Nanopore sequencers" +keywords: + - nanopore sequencing + - quality control + - genomics +tools: + - "toulligqc": + description: "A post sequencing QC tool for Oxford Nanopore sequencers" + homepage: https://github.com/GenomiqueENS/toulligQC + documentation: https://github.com/GenomiqueENS/toulligQC + tool_dev_url: https://github.com/GenomiqueENS/toulligQC + licence: ["CECILL-2.1"] + identifier: biotools:ToulligQC + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ontfile: + type: file + description: Input ONT file + pattern: "*.{fastq,fastq.gz,fq,fq.gz,txt,txt.gz,bam}" +output: + - report_data: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*/*.data": + type: file + description: Report data emitted from toulligqc + pattern: "*.data" + - report_html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*/*.html": + type: file + description: Report data in html format + pattern: "*.html" + - plots_html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*/images/*.html": + type: file + description: Plots emitted in html format + pattern: "*.html" + - plotly_js: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*/images/plotly.min.js": + type: file + description: Plots emitted from toulligqc + pattern: "plotly.min.js" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Salome-Brunon" +maintainers: + - "@Salome-Brunon" diff --git a/modules/nf-core/toulligqc/tests/main.nf.test b/modules/nf-core/toulligqc/tests/main.nf.test new file mode 100644 index 0000000..5bbad94 --- /dev/null +++ b/modules/nf-core/toulligqc/tests/main.nf.test @@ -0,0 +1,125 @@ +nextflow_process { + + name "Test Process TOULLIGQC" + script "../main.nf" + process "TOULLIGQC" + + tag "modules" + tag "modules_nfcore" + tag "toulligqc" + + test("sarscov2 - nanopore sequencing_summary") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/sequencing_summary/test2.sequencing_summary.txt', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("toulligqc_versions_sequencing_summary") }, + { assert snapshot(file(process.out.report_data.get(0).get(1)).readLines()[11..74]).match() }, + { assert process.out.report_html[0][1] ==~ ".*/report.html"} + ) + } + + } + + test("sarscov2 - nanopore sequencing_summary + barcodes") { + config "./nextflow.config" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/sequencing_summary/test2.sequencing_summary.txt', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("toulligqc_versions_sequencing_summary_barcodes") }, + { assert snapshot(file(process.out.report_data.get(0).get(1)).readLines()[11..74]).match() }, + { assert process.out.report_html[0][1] ==~ ".*/report.html"} + ) + } + + } + + test("sarscov2 - nanopore fastq") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("toulligqc_versions_fastq") }, + { assert snapshot(file(process.out.report_data.get(0).get(1)).readLines()[11..67]).match() }, + { assert process.out.report_html[0][1] ==~ ".*/report.html"} + ) + } + + } + + test("sarscov2 - nanopore bam") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/bam/test.sorted.bam', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("toulligqc_versions_bam") }, + { assert snapshot(file(process.out.report_data.get(0).get(1)).readLines()[11..74]).match() }, + { assert process.out.report_html[0][1] ==~ ".*/report.html"} + ) + } + + } + test("sarscov2 - nanopore bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/bam/test.sorted.bam', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("toulligqc_versions_stub") } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/toulligqc/tests/main.nf.test.snap b/modules/nf-core/toulligqc/tests/main.nf.test.snap new file mode 100644 index 0000000..025c3c2 --- /dev/null +++ b/modules/nf-core/toulligqc/tests/main.nf.test.snap @@ -0,0 +1,319 @@ +{ + "sarscov2 - nanopore sequencing_summary": { + "content": [ + [ + "sequencing.telemetry.extractor.software.analysis=1d_basecalling", + "basecaller.sequencing.summary.1d.extractor.read.count=100", + "basecaller.sequencing.summary.1d.extractor.read.pass.count=100", + "basecaller.sequencing.summary.1d.extractor.read.fail.count=0", + "basecaller.sequencing.summary.1d.extractor.read.pass.ratio=1.0", + "basecaller.sequencing.summary.1d.extractor.read.fail.ratio=0.0", + "basecaller.sequencing.summary.1d.extractor.read.count.frequency=100", + "basecaller.sequencing.summary.1d.extractor.read.pass.frequency=100.0", + "basecaller.sequencing.summary.1d.extractor.read.fail.frequency=0.0", + "basecaller.sequencing.summary.1d.extractor.yield=38253", + "basecaller.sequencing.summary.1d.extractor.n50=365", + "basecaller.sequencing.summary.1d.extractor.l50=67", + "basecaller.sequencing.summary.1d.extractor.run.time=29006.4915", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.count=85.0", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.mean=1.1764705882352942", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.std=0.4412231534591759", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.min=1.0", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.25%=1.0", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.50%=1.0", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.75%=1.0", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.max=3.0", + "basecaller.sequencing.summary.1d.extractor.all.read.length.count=100.0", + "basecaller.sequencing.summary.1d.extractor.all.read.length.mean=382.53", + "basecaller.sequencing.summary.1d.extractor.all.read.length.std=219.3747982818722", + "basecaller.sequencing.summary.1d.extractor.all.read.length.min=228.0", + "basecaller.sequencing.summary.1d.extractor.all.read.length.25%=271.75", + "basecaller.sequencing.summary.1d.extractor.all.read.length.50%=305.5", + "basecaller.sequencing.summary.1d.extractor.all.read.length.75%=418.0", + "basecaller.sequencing.summary.1d.extractor.all.read.length.max=1664.0", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.mean=382.53", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.std=219.3747982818722", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.min=228.0", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.25%=271.75", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.50%=305.5", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.75%=418.0", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.max=1664.0", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.mean=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.std=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.min=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.25%=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.50%=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.75%=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.max=nan", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.mean=12.084663391113281", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.std=1.7714887857437134", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.min=7.7340922355651855", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.25%=11.089608192443848", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.50%=11.957954406738281", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.75%=13.238139390945435", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.max=17.272123336791992", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.mean=12.084663391113281", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.std=1.7714887857437134", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.min=7.7340922355651855", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.25%=11.089608192443848", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.50%=11.957954406738281", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.75%=13.238139390945435", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.max=17.272123336791992", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.mean=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.std=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.min=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.25%=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.50%=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.75%=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.max=nan" + ] + ], + "timestamp": "2024-05-02T16:00:29.481355" + }, + "toulligqc_versions_sequencing_summary_barcodes": { + "content": [ + [ + "versions.yml:md5,3be42e94e756b5a89167a891d287c538" + ] + ], + "timestamp": "2024-05-02T16:00:41.607538" + }, + "sarscov2 - nanopore sequencing_summary + barcodes": { + "content": [ + [ + "toulligqc.info.extractor.duration=0.0", + "sequencing.telemetry.extractor.software.analysis=1d_basecalling", + "basecaller.sequencing.summary.1d.extractor.read.count=100", + "basecaller.sequencing.summary.1d.extractor.read.pass.count=100", + "basecaller.sequencing.summary.1d.extractor.read.fail.count=0", + "basecaller.sequencing.summary.1d.extractor.read.pass.ratio=1.0", + "basecaller.sequencing.summary.1d.extractor.read.fail.ratio=0.0", + "basecaller.sequencing.summary.1d.extractor.read.count.frequency=100", + "basecaller.sequencing.summary.1d.extractor.read.pass.frequency=100.0", + "basecaller.sequencing.summary.1d.extractor.read.fail.frequency=0.0", + "basecaller.sequencing.summary.1d.extractor.yield=38253", + "basecaller.sequencing.summary.1d.extractor.n50=365", + "basecaller.sequencing.summary.1d.extractor.l50=67", + "basecaller.sequencing.summary.1d.extractor.run.time=29006.4915", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.count=85.0", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.mean=1.1764705882352942", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.std=0.4412231534591759", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.min=1.0", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.25%=1.0", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.50%=1.0", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.75%=1.0", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.max=3.0", + "basecaller.sequencing.summary.1d.extractor.all.read.length.count=100.0", + "basecaller.sequencing.summary.1d.extractor.all.read.length.mean=382.53", + "basecaller.sequencing.summary.1d.extractor.all.read.length.std=219.3747982818722", + "basecaller.sequencing.summary.1d.extractor.all.read.length.min=228.0", + "basecaller.sequencing.summary.1d.extractor.all.read.length.25%=271.75", + "basecaller.sequencing.summary.1d.extractor.all.read.length.50%=305.5", + "basecaller.sequencing.summary.1d.extractor.all.read.length.75%=418.0", + "basecaller.sequencing.summary.1d.extractor.all.read.length.max=1664.0", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.mean=382.53", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.std=219.3747982818722", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.min=228.0", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.25%=271.75", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.50%=305.5", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.75%=418.0", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.max=1664.0", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.mean=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.std=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.min=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.25%=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.50%=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.75%=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.max=nan", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.mean=12.084663391113281", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.std=1.7714887857437134", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.min=7.7340922355651855", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.25%=11.089608192443848", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.50%=11.957954406738281", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.75%=13.238139390945435", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.max=17.272123336791992", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.mean=12.084663391113281", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.std=1.7714887857437134", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.min=7.7340922355651855", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.25%=11.089608192443848", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.50%=11.957954406738281", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.75%=13.238139390945435", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.max=17.272123336791992", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.mean=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.std=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.min=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.25%=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.50%=nan", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.75%=nan" + ] + ], + "timestamp": "2024-05-02T16:00:41.63299" + }, + "toulligqc_versions_bam": { + "content": [ + [ + "versions.yml:md5,3be42e94e756b5a89167a891d287c538" + ] + ], + "timestamp": "2024-05-02T16:01:05.074867" + }, + "sarscov2 - nanopore bam": { + "content": [ + [ + "sequencing.telemetry.extractor.run.id=Unknown", + "sequencing.telemetry.extractor.sample.id=Unknown", + "sequencing.telemetry.extractor.model.file=Unknown", + "sequencing.telemetry.extractor.software.name=minimap2", + "sequencing.telemetry.extractor.software.version=2.17-r974-dirty", + "sequencing.telemetry.extractor.flowcell.id=Unknown", + "sequencing.telemetry.extractor.basecalling.date=Unknown", + "sequencing.telemetry.extractor.pass.threshold.qscore=9", + "basecaller.sequencing.summary.1d.extractor.read.count=100", + "basecaller.sequencing.summary.1d.extractor.read.pass.count=96", + "basecaller.sequencing.summary.1d.extractor.read.fail.count=4", + "basecaller.sequencing.summary.1d.extractor.read.pass.ratio=0.96", + "basecaller.sequencing.summary.1d.extractor.read.fail.ratio=0.04", + "basecaller.sequencing.summary.1d.extractor.read.count.frequency=100", + "basecaller.sequencing.summary.1d.extractor.read.pass.frequency=96.0", + "basecaller.sequencing.summary.1d.extractor.read.fail.frequency=4.0", + "basecaller.sequencing.summary.1d.extractor.yield=38253", + "basecaller.sequencing.summary.1d.extractor.n50=365", + "basecaller.sequencing.summary.1d.extractor.l50=67", + "basecaller.sequencing.summary.1d.extractor.run.time=99.0", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.count=1.0", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.mean=100.0", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.std=nan", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.min=100.0", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.25%=100.0", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.50%=100.0", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.75%=100.0", + "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.max=100.0", + "basecaller.sequencing.summary.1d.extractor.all.read.length.count=100.0", + "basecaller.sequencing.summary.1d.extractor.all.read.length.mean=382.53", + "basecaller.sequencing.summary.1d.extractor.all.read.length.std=219.37479828187222", + "basecaller.sequencing.summary.1d.extractor.all.read.length.min=228.0", + "basecaller.sequencing.summary.1d.extractor.all.read.length.25%=271.75", + "basecaller.sequencing.summary.1d.extractor.all.read.length.50%=305.5", + "basecaller.sequencing.summary.1d.extractor.all.read.length.75%=418.0", + "basecaller.sequencing.summary.1d.extractor.all.read.length.max=1664.0", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.mean=385.6145833333333", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.std=222.9568931568171", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.min=228.0", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.25%=272.0", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.50%=312.0", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.75%=418.0", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.max=1664.0", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.mean=308.5", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.std=79.87698875980074", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.min=261.0", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.25%=267.75", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.50%=272.5", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.75%=313.25", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.max=428.0", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.mean=12.82710075378418", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.std=1.9102991819381714", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.min=8.199999809265137", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.25%=11.702500104904175", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.50%=12.714999675750732", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.75%=14.082499980926514", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.max=18.110000610351562", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.mean=13.002917289733887", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.std=1.737709403038025", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.min=9.84000015258789", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.25%=11.90749979019165", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.50%=12.820000171661377", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.75%=14.09500002861023", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.max=18.110000610351562" + ] + ], + "timestamp": "2024-05-02T16:01:05.091838" + }, + "toulligqc_versions_stub": { + "content": [ + [ + "versions.yml:md5,3be42e94e756b5a89167a891d287c538" + ] + ], + "timestamp": "2024-05-02T16:01:12.876406" + }, + "toulligqc_versions_fastq": { + "content": [ + [ + "versions.yml:md5,3be42e94e756b5a89167a891d287c538" + ] + ], + "timestamp": "2024-05-02T16:00:52.810853" + }, + "toulligqc_versions_sequencing_summary": { + "content": [ + [ + "versions.yml:md5,3be42e94e756b5a89167a891d287c538" + ] + ], + "timestamp": "2024-05-02T16:00:29.458805" + }, + "sarscov2 - nanopore fastq": { + "content": [ + [ + "sequencing.telemetry.extractor.run.id=Unknow", + "sequencing.telemetry.extractor.sample.id=Unknow", + "sequencing.telemetry.extractor.model.file=Unknow", + "basecaller.sequencing.summary.1d.extractor.read.count=100", + "basecaller.sequencing.summary.1d.extractor.read.pass.count=96", + "basecaller.sequencing.summary.1d.extractor.read.fail.count=4", + "basecaller.sequencing.summary.1d.extractor.read.pass.ratio=0.96", + "basecaller.sequencing.summary.1d.extractor.read.fail.ratio=0.04", + "basecaller.sequencing.summary.1d.extractor.read.count.frequency=100", + "basecaller.sequencing.summary.1d.extractor.read.pass.frequency=96.0", + "basecaller.sequencing.summary.1d.extractor.read.fail.frequency=4.0", + "basecaller.sequencing.summary.1d.extractor.yield=38253", + "basecaller.sequencing.summary.1d.extractor.n50=365", + "basecaller.sequencing.summary.1d.extractor.l50=67", + "basecaller.sequencing.summary.1d.extractor.all.read.length.count=100.0", + "basecaller.sequencing.summary.1d.extractor.all.read.length.mean=382.53", + "basecaller.sequencing.summary.1d.extractor.all.read.length.std=219.37479828187222", + "basecaller.sequencing.summary.1d.extractor.all.read.length.min=228.0", + "basecaller.sequencing.summary.1d.extractor.all.read.length.25%=271.75", + "basecaller.sequencing.summary.1d.extractor.all.read.length.50%=305.5", + "basecaller.sequencing.summary.1d.extractor.all.read.length.75%=418.0", + "basecaller.sequencing.summary.1d.extractor.all.read.length.max=1664.0", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.mean=385.6145833333333", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.std=222.9568931568171", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.min=228.0", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.25%=272.0", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.50%=312.0", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.75%=418.0", + "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.max=1664.0", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.mean=308.5", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.std=79.87698875980074", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.min=261.0", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.25%=267.75", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.50%=272.5", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.75%=313.25", + "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.max=428.0", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.mean=12.82710075378418", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.std=1.9102991819381714", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.min=8.199999809265137", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.25%=11.702500104904175", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.50%=12.714999675750732", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.75%=14.082499980926514", + "basecaller.sequencing.summary.1d.extractor.all.read.qscore.max=18.110000610351562", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.mean=13.002917289733887", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.std=1.737709403038025", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.min=9.84000015258789", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.25%=11.90749979019165", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.50%=12.820000171661377", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.75%=14.09500002861023", + "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.max=18.110000610351562", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.mean=8.607500076293945", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.std=0.27219802141189575", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.min=8.199999809265137", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.25%=8.59000015258789", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.50%=8.735000133514404", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.75%=8.752500057220459", + "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.max=8.760000228881836" + ] + ], + "timestamp": "2024-05-02T16:00:52.831534" + } +} \ No newline at end of file diff --git a/modules/nf-core/toulligqc/tests/nextflow.config b/modules/nf-core/toulligqc/tests/nextflow.config new file mode 100644 index 0000000..3e494e8 --- /dev/null +++ b/modules/nf-core/toulligqc/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + withName: TOULLIGQC { + //Optional barcoding option + //Comma separated barcode list (with no spaces). (e.g. BC05,RB09,NB01,barcode10) + ext.args = '--barcoding --barcodes barcode01,barcode02,barcode03,barcode04,barcode05,barcode06,barcode07,barcode08,barcode09,barcode10,barcode11,barcode12' + } + +} diff --git a/modules/nf-core/toulligqc/tests/tags.yml b/modules/nf-core/toulligqc/tests/tags.yml new file mode 100644 index 0000000..8814a4d --- /dev/null +++ b/modules/nf-core/toulligqc/tests/tags.yml @@ -0,0 +1,2 @@ +toulligqc: + - "modules/nf-core/toulligqc/**" diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 22d37c3..f9a77e8 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -10,6 +10,7 @@ include { SEQTK_SAMPLE } from '../modules/nf-core/seqtk/sample/ include { FASTQC } from '../modules/nf-core/fastqc/main' include { SEQFU_STATS } from '../modules/nf-core/seqfu/stats' include { FASTQSCREEN_FASTQSCREEN } from '../modules/nf-core/fastqscreen/fastqscreen/main' +include { TOULLIGQC } from '../modules/nf-core/toulligqc/main' include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_TAG } from '../modules/nf-core/multiqc/main' @@ -106,6 +107,20 @@ workflow SEQINSPECTOR { ch_versions = ch_versions.mix(FASTQSCREEN_FASTQSCREEN.out.versions.first()) } + // + // MODULE: Run ToulligQC + // + + // This provides useful stats of long reads + + if (!("toulligqc" in skip_tools)) { + TOULLIGQC ( + ch_samplesheet + ) + ch_multiqc_files.mix(TOULLIGQC.out.report_data) + ch_versions = ch_versions.mix(TOULLIGQC.out.versions.first()) + } + // // Collate and save software versions //