From be3ec58f3bcc5bc044e18c4290949ae7a5ec2deb Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Mon, 24 Mar 2025 16:55:45 +0000 Subject: [PATCH 01/18] wip, need container with pyyaml --- modules/local/rundirparser/environment.yml | 7 ++ modules/local/rundirparser/main.nf | 69 ++++++++++++++++++ modules/local/rundirparser/meta.yml | 68 +++++++++++++++++ modules/local/rundirparser/rundirparser.py | 27 +++++++ modules/local/rundirparser/tests/main.nf.test | 73 +++++++++++++++++++ workflows/seqinspector.nf | 14 ++++ 6 files changed, 258 insertions(+) create mode 100644 modules/local/rundirparser/environment.yml create mode 100644 modules/local/rundirparser/main.nf create mode 100644 modules/local/rundirparser/meta.yml create mode 100644 modules/local/rundirparser/rundirparser.py create mode 100644 modules/local/rundirparser/tests/main.nf.test diff --git a/modules/local/rundirparser/environment.yml b/modules/local/rundirparser/environment.yml new file mode 100644 index 0000000..4b3c9d3 --- /dev/null +++ b/modules/local/rundirparser/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "YOUR-TOOL-HERE" diff --git a/modules/local/rundirparser/main.nf b/modules/local/rundirparser/main.nf new file mode 100644 index 0000000..751640c --- /dev/null +++ b/modules/local/rundirparser/main.nf @@ -0,0 +1,69 @@ +// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/modules/nf-core/ +// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta +// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty +// list (`[]`) instead of a file can be used to work around this issue. + +process RUNDIRPARSER { + tag "$rundir.baseName" + label 'process_single' + + // TODO nf-core: List required Conda package(s). + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + conda "python=3.12" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.12' : + 'python:3.12' }" + + input: + path(rundir) + path(parser_script) + + output: + path("*_mqc.yaml"), emit: yaml + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${rundir.baseName}" + """ + python ${parser_script} ${rundir} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rundirparser: \$(python --version |& sed '1!d ; s/Python //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${rundir.baseName}" + // TODO nf-core: A stub section should mimic the execution of the original module as best as possible + // Have a look at the following examples: + // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 + // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 + """ + touch rundir_metadata.yml + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rundirparser: "stub_version" + END_VERSIONS + """ +} diff --git a/modules/local/rundirparser/meta.yml b/modules/local/rundirparser/meta.yml new file mode 100644 index 0000000..7027a80 --- /dev/null +++ b/modules/local/rundirparser/meta.yml @@ -0,0 +1,68 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "rundirparser" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort + - example + - genomics +tools: + - "rundirparser": + ## TODO nf-core: Add a description and other details for the software below + description: "" + homepage: "" + documentation: "" + tool_dev_url: "" + doi: "" + licence: + identifier: + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + +## TODO nf-core: Add a description of all of the variables used as output +output: + - bam: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + ## TODO nf-core: Delete / customise this example output + - "*.bam": + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@kedhammar" +maintainers: + - "@kedhammar" diff --git a/modules/local/rundirparser/rundirparser.py b/modules/local/rundirparser/rundirparser.py new file mode 100644 index 0000000..f5056cd --- /dev/null +++ b/modules/local/rundirparser/rundirparser.py @@ -0,0 +1,27 @@ +# rundirparser.py +import sys +import yaml + + +def parse_rundir(rundir): + # Dummy implementation, replace with actual logic + return_dict = { + "rundir": rundir, + "samples": [ + {"sample_id": "SAMPLE_1", "metric_1": 10, "metric_2": 20}, + {"sample_id": "SAMPLE_2", "metric_1": 15, "metric_2": 25}, + {"sample_id": "SAMPLE_3", "metric_1": 20, "metric_2": 30}, + ], + } + return return_dict + + +def main(): + rundir = sys.argv[1] + metadata = parse_rundir(rundir) + with open(f"{rundir}_mqc.yml", "w") as outfile: + yaml.dump(metadata, outfile, default_flow_style=False) + + +if __name__ == "__main__": + main() diff --git a/modules/local/rundirparser/tests/main.nf.test b/modules/local/rundirparser/tests/main.nf.test new file mode 100644 index 0000000..c1e8440 --- /dev/null +++ b/modules/local/rundirparser/tests/main.nf.test @@ -0,0 +1,73 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core modules test rundirparser +nextflow_process { + + name "Test Process RUNDIRPARSER" + script "../main.nf" + process "RUNDIRPARSER" + + tag "modules" + tag "modules_" + tag "rundirparser" + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used + test("sarscov2 - bam") { + + // TODO nf-core: If you are created a test for a chained module + // (the module requires running more than one process to generate the required output) + // add the 'setup' method here. + // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules). + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. + ) + } + + } + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + ) + } + + } + +} diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 9488cea..dade251 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -10,6 +10,7 @@ include { SEQTK_SAMPLE } from '../modules/nf-core/seqtk/sample/ include { FASTQC } from '../modules/nf-core/fastqc/main' include { SEQFU_STATS } from '../modules/nf-core/seqfu/stats' include { FASTQSCREEN_FASTQSCREEN } from '../modules/nf-core/fastqscreen/fastqscreen/main' +include { RUNDIRPARSER } from '../modules/local/rundirparser/main' include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_TAG } from '../modules/nf-core/multiqc/main' @@ -38,6 +39,19 @@ workflow SEQINSPECTOR { ch_multiqc_extra_files = Channel.empty() ch_multiqc_reports = Channel.empty() + // + // MODULE: Parse rundir info + // + if (!("rundirparser" in skip_tools)) { + + ch_rundir = ch_samplesheet + .map { meta, _reads -> meta.rundir } + .distinct() + .view() + + RUNDIRPARSER(ch_rundir, "${projectDir}/modules/local/rundirparser/rundirparser.py") + } + // // MODULE: Run Seqtk sample to perform subsampling // From 80caa982eca7b37117424bff3a44d0514e70e565 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Tue, 25 Mar 2025 08:40:27 +0000 Subject: [PATCH 02/18] fix conda env and functional containers --- modules/local/rundirparser/environment.yml | 6 +++--- modules/local/rundirparser/main.nf | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/local/rundirparser/environment.yml b/modules/local/rundirparser/environment.yml index 4b3c9d3..046f88d 100644 --- a/modules/local/rundirparser/environment.yml +++ b/modules/local/rundirparser/environment.yml @@ -1,7 +1,7 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - "YOUR-TOOL-HERE" + - pip + - pip: + - PyYAML==6.0.2 diff --git a/modules/local/rundirparser/main.nf b/modules/local/rundirparser/main.nf index 751640c..81a1fe4 100644 --- a/modules/local/rundirparser/main.nf +++ b/modules/local/rundirparser/main.nf @@ -23,17 +23,17 @@ process RUNDIRPARSER { // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. - conda "python=3.12" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.12' : - 'python:3.12' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ad/ad2bcce70756f81c07c7e2ffd9b66213bf48ace786466395ac3a402840df2ffb/data' : + 'community.wave.seqera.io/library/pip_pyyaml:c2ecf27a7f63796e' }" input: path(rundir) path(parser_script) output: - path("*_mqc.yaml"), emit: yaml + path("*_mqc.yml"), emit: yaml path "versions.yml", emit: versions when: From 7144a475aa0587bc62d090462b1e4e964a2aa457 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Tue, 25 Mar 2025 09:47:30 +0000 Subject: [PATCH 03/18] spacing --- modules/nf-core/seqfu/stats/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/seqfu/stats/main.nf b/modules/nf-core/seqfu/stats/main.nf index 0f8bb3e..2ea8e43 100644 --- a/modules/nf-core/seqfu/stats/main.nf +++ b/modules/nf-core/seqfu/stats/main.nf @@ -14,7 +14,7 @@ process SEQFU_STATS { output: tuple val(meta), path("*.tsv") , emit: stats - tuple val(meta), path("*_mqc.txt"), emit: multiqc + tuple val(meta), path("*_mqc.txt"), emit: multiqc path "versions.yml" , emit: versions when: From aec2ad351b1cf86a812851b7e1b9b39068a839ce Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Tue, 25 Mar 2025 09:47:54 +0000 Subject: [PATCH 04/18] channel magic to refactor meta on rundir-level --- modules/local/rundirparser/main.nf | 6 +++--- workflows/seqinspector.nf | 17 ++++++++++++++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/modules/local/rundirparser/main.nf b/modules/local/rundirparser/main.nf index 81a1fe4..de31d5e 100644 --- a/modules/local/rundirparser/main.nf +++ b/modules/local/rundirparser/main.nf @@ -29,12 +29,12 @@ process RUNDIRPARSER { 'community.wave.seqera.io/library/pip_pyyaml:c2ecf27a7f63796e' }" input: - path(rundir) + tuple val(joint_meta), path(rundir) path(parser_script) output: - path("*_mqc.yml"), emit: yaml - path "versions.yml", emit: versions + tuple val(joint_meta), path("*_mqc.yml"), emit: multiqc + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index dade251..5f3e4e6 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -45,11 +45,22 @@ workflow SEQINSPECTOR { if (!("rundirparser" in skip_tools)) { ch_rundir = ch_samplesheet - .map { meta, _reads -> meta.rundir } - .distinct() - .view() + // Group by rundir to merge tags from samples with the same rundir + .map { meta, _reads -> [meta.rundir, meta] } + .groupTuple() + // Create a new meta object with merged tags for each unique rundir + .map { rundir, metas -> + // Collect all tags across all samples with this rundir + def all_tags = metas.collect { it.tags }.flatten().unique() + // Create a new meta object with the merged tags + def new_meta = [tags: all_tags] + // Return the new structure + [new_meta, rundir] + } RUNDIRPARSER(ch_rundir, "${projectDir}/modules/local/rundirparser/rundirparser.py") + ch_multiqc_files = ch_multiqc_files.mix(RUNDIRPARSER.out.multiqc.first()) + ch_versions = ch_versions.mix(RUNDIRPARSER.out.versions.first()) } // From 9b0f0e7bd4304fa0844a9723c57e8fd17abc9567 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Tue, 25 Mar 2025 10:02:12 +0000 Subject: [PATCH 05/18] update process versioning and stubs --- modules/local/rundirparser/main.nf | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/local/rundirparser/main.nf b/modules/local/rundirparser/main.nf index de31d5e..3d6ea9e 100644 --- a/modules/local/rundirparser/main.nf +++ b/modules/local/rundirparser/main.nf @@ -47,7 +47,8 @@ process RUNDIRPARSER { cat <<-END_VERSIONS > versions.yml "${task.process}": - rundirparser: \$(python --version |& sed '1!d ; s/Python //') + Python: \$(python --version |& sed '1!d ; s/Python //') + PyYAML: \$(python -c "import yaml; print(yaml.__version__)") END_VERSIONS """ @@ -59,11 +60,12 @@ process RUNDIRPARSER { // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 """ - touch rundir_metadata.yml + touch rundir_mqc.yml cat <<-END_VERSIONS > versions.yml "${task.process}": - rundirparser: "stub_version" + Python: stub_version + PyYAML: stub_version END_VERSIONS """ } From 915965becbb34247dbec76954990bed17d7b5db2 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Tue, 25 Mar 2025 11:11:32 +0000 Subject: [PATCH 06/18] remove todo:s, make process agnostic to mqc file extensions --- modules/local/rundirparser/main.nf | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/modules/local/rundirparser/main.nf b/modules/local/rundirparser/main.nf index 3d6ea9e..3c9206a 100644 --- a/modules/local/rundirparser/main.nf +++ b/modules/local/rundirparser/main.nf @@ -1,28 +1,7 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process RUNDIRPARSER { tag "$rundir.baseName" label 'process_single' - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ad/ad2bcce70756f81c07c7e2ffd9b66213bf48ace786466395ac3a402840df2ffb/data' : @@ -33,8 +12,8 @@ process RUNDIRPARSER { path(parser_script) output: - tuple val(joint_meta), path("*_mqc.yml"), emit: multiqc - path "versions.yml", emit: versions + tuple val(joint_meta), path("*_mqc.*"), emit: multiqc + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -60,7 +39,7 @@ process RUNDIRPARSER { // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 """ - touch rundir_mqc.yml + touch rundir_mqc.txt cat <<-END_VERSIONS > versions.yml "${task.process}": From 5546c1069db9cf295166b34201a1cceb84e92647 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Tue, 25 Mar 2025 11:11:56 +0000 Subject: [PATCH 07/18] generate _mqc.txt dummy file with table from rundir keys --- modules/local/rundirparser/rundirparser.py | 60 +++++++++++++++++----- 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/modules/local/rundirparser/rundirparser.py b/modules/local/rundirparser/rundirparser.py index f5056cd..0820d4e 100644 --- a/modules/local/rundirparser/rundirparser.py +++ b/modules/local/rundirparser/rundirparser.py @@ -5,22 +5,58 @@ def parse_rundir(rundir): # Dummy implementation, replace with actual logic - return_dict = { - "rundir": rundir, - "samples": [ - {"sample_id": "SAMPLE_1", "metric_1": 10, "metric_2": 20}, - {"sample_id": "SAMPLE_2", "metric_1": 15, "metric_2": 25}, - {"sample_id": "SAMPLE_3", "metric_1": 20, "metric_2": 30}, - ], - } - return return_dict + + yml_contents = """# plot_type: 'table' +# section_name: 'rundir stats' +# description: 'dummy rundir stats' +# pconfig: +# namespace: 'Cust Data' +# headers: +# col1: +# title: '#Seqs' +# description: 'Number of sequences' +# format: '{:,.0f}' +# col2: +# title: 'Total bp' +# description: 'Total size of the dataset' +# col3: +# title: 'Avg' +# description: 'Average sequence length' +# col4: +# title: 'N50' +# description: '50% of the sequences are longer than this size' +# col5: +# title: 'N75' +# description: '75% of the sequences are longer than this size' +# col6: +# title: 'N90' +# description: '90% of the sequences are longer than this size' +# col7: +# title: 'Min' +# description: 'Length of the shortest sequence' +# col8: +# title: 'Max' +# description: 'Length of the longest sequence' +# col9: +# title: 'auN' +# description: 'Area under the Nx curve' +# col10: +# title: 'GC' +# description: 'Relative GC content (excluding Ns)' +""" + tsv_contents = f"""Sample col1 col2 col3 col4 col5 col6 col7 col8 col9 col10 +{rundir} 10 147806 14780.6000000 22507 16573 15322 22801.9181765 344 33340 NaN +""" + + contents = yml_contents + tsv_contents + + with open(f"{rundir}_mqc.txt", "w") as f: + f.write(contents) def main(): rundir = sys.argv[1] - metadata = parse_rundir(rundir) - with open(f"{rundir}_mqc.yml", "w") as outfile: - yaml.dump(metadata, outfile, default_flow_style=False) + parse_rundir(rundir) if __name__ == "__main__": From c87f4766c93586fe0d9cdb415de30e23b384a9e4 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Tue, 25 Mar 2025 11:12:19 +0000 Subject: [PATCH 08/18] clean up formatting and underscore unused variables --- workflows/seqinspector.nf | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 5f3e4e6..c26f9a9 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -150,14 +150,14 @@ workflow SEQINSPECTOR { Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() - summary_params = paramsSummaryMap( + summary_params = paramsSummaryMap( workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value( + ch_workflow_summary = Channel.value( paramsSummaryMultiqc(summary_params)) ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value( + ch_methods_description = Channel.value( methodsDescriptionText(ch_multiqc_custom_methods_description)) ch_multiqc_extra_files = ch_multiqc_extra_files.mix( @@ -172,7 +172,7 @@ workflow SEQINSPECTOR { MULTIQC_GLOBAL ( ch_multiqc_files - .map { meta, file -> file } + .map { _meta, file -> file } .mix(ch_multiqc_extra_files) .collect(), ch_multiqc_config.toList(), @@ -183,7 +183,7 @@ workflow SEQINSPECTOR { ) ch_tags = ch_multiqc_files - .map { meta, sample -> meta.tags } + .map { meta, _sample -> meta.tags } .flatten() .unique() @@ -193,13 +193,13 @@ workflow SEQINSPECTOR { // Group samples by tag tagged_mqc_files = ch_tags .combine(ch_multiqc_files) - .filter { sample_tag, meta, sample -> sample_tag in meta.tags } - .map { sample_tag, meta, sample -> [sample_tag, sample] } + .filter { sample_tag, meta, _sample -> sample_tag in meta.tags } + .map { sample_tag, _meta, sample -> [sample_tag, sample] } .mix(multiqc_extra_files_per_tag) .groupTuple() .tap { mqc_by_tag } .collectFile { - sample_tag, samples -> + sample_tag, _samples -> def prefix_tag = "[TAG:${sample_tag}]" [ "${prefix_tag}_multiqc_extra_config.yml", @@ -212,7 +212,7 @@ workflow SEQINSPECTOR { } .map { file -> [ (file =~ /\[TAG:(.+)\]/)[0][1], file ] } .join(mqc_by_tag) - .multiMap { sample_tag, config, samples -> + .multiMap { _sample_tag, config, samples -> samples_per_tag: samples config: config } From 4d3a70ff709cc1d76b917c980dbc7f87f9b9d3ac Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Tue, 25 Mar 2025 11:52:45 +0000 Subject: [PATCH 09/18] use module binary instead of supplying script as process input --- modules/local/rundirparser/main.nf | 3 +-- .../local/rundirparser/{ => resources/usr/bin}/rundirparser.py | 2 +- nextflow.config | 3 +++ workflows/seqinspector.nf | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) rename modules/local/rundirparser/{ => resources/usr/bin}/rundirparser.py (98%) mode change 100644 => 100755 diff --git a/modules/local/rundirparser/main.nf b/modules/local/rundirparser/main.nf index 3c9206a..86591fb 100644 --- a/modules/local/rundirparser/main.nf +++ b/modules/local/rundirparser/main.nf @@ -9,7 +9,6 @@ process RUNDIRPARSER { input: tuple val(joint_meta), path(rundir) - path(parser_script) output: tuple val(joint_meta), path("*_mqc.*"), emit: multiqc @@ -22,7 +21,7 @@ process RUNDIRPARSER { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${rundir.baseName}" """ - python ${parser_script} ${rundir} + rundirparser.py ${rundir} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/rundirparser/rundirparser.py b/modules/local/rundirparser/resources/usr/bin/rundirparser.py old mode 100644 new mode 100755 similarity index 98% rename from modules/local/rundirparser/rundirparser.py rename to modules/local/rundirparser/resources/usr/bin/rundirparser.py index 0820d4e..6a965f9 --- a/modules/local/rundirparser/rundirparser.py +++ b/modules/local/rundirparser/resources/usr/bin/rundirparser.py @@ -1,4 +1,4 @@ -# rundirparser.py +#!/usr/bin/env python import sys import yaml diff --git a/nextflow.config b/nextflow.config index bb1cb27..caefca9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -6,6 +6,9 @@ ---------------------------------------------------------------------------------------- */ +// Enable use of module binaries for e.g. module specific Python scripts +nextflow.enable.moduleBinaries = true + // Global default params, used in configs params { diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index c26f9a9..ea26529 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -58,7 +58,7 @@ workflow SEQINSPECTOR { [new_meta, rundir] } - RUNDIRPARSER(ch_rundir, "${projectDir}/modules/local/rundirparser/rundirparser.py") + RUNDIRPARSER( ch_rundir ) ch_multiqc_files = ch_multiqc_files.mix(RUNDIRPARSER.out.multiqc.first()) ch_versions = ch_versions.mix(RUNDIRPARSER.out.versions.first()) } From 137629a798fc2fcf728e6527f287d4ac1cd32a30 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Tue, 25 Mar 2025 14:23:10 +0000 Subject: [PATCH 10/18] add all multiqc files to channel, not just first --- workflows/seqinspector.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index ea26529..d0af814 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -59,7 +59,7 @@ workflow SEQINSPECTOR { } RUNDIRPARSER( ch_rundir ) - ch_multiqc_files = ch_multiqc_files.mix(RUNDIRPARSER.out.multiqc.first()) + ch_multiqc_files = ch_multiqc_files.mix(RUNDIRPARSER.out.multiqc) ch_versions = ch_versions.mix(RUNDIRPARSER.out.versions.first()) } From 82d285a495bba34e99cbec9774c0b61ec6a454f7 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Tue, 25 Mar 2025 15:02:34 +0000 Subject: [PATCH 11/18] extend suffix to differentiate from other _mqc files --- modules/local/rundirparser/main.nf | 2 +- modules/local/rundirparser/resources/usr/bin/rundirparser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/rundirparser/main.nf b/modules/local/rundirparser/main.nf index 86591fb..8c18b8a 100644 --- a/modules/local/rundirparser/main.nf +++ b/modules/local/rundirparser/main.nf @@ -11,7 +11,7 @@ process RUNDIRPARSER { tuple val(joint_meta), path(rundir) output: - tuple val(joint_meta), path("*_mqc.*"), emit: multiqc + tuple val(joint_meta), path("*_rundir_mqc.*"), emit: multiqc path "versions.yml", emit: versions when: diff --git a/modules/local/rundirparser/resources/usr/bin/rundirparser.py b/modules/local/rundirparser/resources/usr/bin/rundirparser.py index 6a965f9..9e08887 100755 --- a/modules/local/rundirparser/resources/usr/bin/rundirparser.py +++ b/modules/local/rundirparser/resources/usr/bin/rundirparser.py @@ -50,7 +50,7 @@ def parse_rundir(rundir): contents = yml_contents + tsv_contents - with open(f"{rundir}_mqc.txt", "w") as f: + with open(f"{rundir}_rundir_mqc.txt", "w") as f: f.write(contents) From 70cacab7eb29a361f01387c7ce40c376e7463dbe Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Wed, 26 Mar 2025 08:49:46 +0000 Subject: [PATCH 12/18] give mqc outputs the same name so they will be merged automatically in the multiqc report generation, make function take output name as cmd parameter --- modules/local/rundirparser/main.nf | 16 ++++++---------- .../resources/usr/bin/rundirparser.py | 7 ++++--- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/modules/local/rundirparser/main.nf b/modules/local/rundirparser/main.nf index 8c18b8a..fe12323 100644 --- a/modules/local/rundirparser/main.nf +++ b/modules/local/rundirparser/main.nf @@ -11,17 +11,17 @@ process RUNDIRPARSER { tuple val(joint_meta), path(rundir) output: - tuple val(joint_meta), path("*_rundir_mqc.*"), emit: multiqc + tuple val(joint_meta), path("*_mqc.*"), emit: multiqc path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${rundir.baseName}" + // def args = task.ext.args ?: '' + // def prefix = task.ext.prefix ?: "${rundir.baseName}" """ - rundirparser.py ${rundir} + rundirparser.py ${rundir} ${moduleDir.baseName}_mqc.txt cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -31,12 +31,8 @@ process RUNDIRPARSER { """ stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${rundir.baseName}" - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 + // def args = task.ext.args ?: '' + // def prefix = task.ext.prefix ?: "${rundir.baseName}" """ touch rundir_mqc.txt diff --git a/modules/local/rundirparser/resources/usr/bin/rundirparser.py b/modules/local/rundirparser/resources/usr/bin/rundirparser.py index 9e08887..68e91df 100755 --- a/modules/local/rundirparser/resources/usr/bin/rundirparser.py +++ b/modules/local/rundirparser/resources/usr/bin/rundirparser.py @@ -3,7 +3,7 @@ import yaml -def parse_rundir(rundir): +def parse_rundir(rundir, outname): # Dummy implementation, replace with actual logic yml_contents = """# plot_type: 'table' @@ -50,13 +50,14 @@ def parse_rundir(rundir): contents = yml_contents + tsv_contents - with open(f"{rundir}_rundir_mqc.txt", "w") as f: + with open(outname, "w") as f: f.write(contents) def main(): rundir = sys.argv[1] - parse_rundir(rundir) + outname = sys.argv[2] + parse_rundir(rundir, outname) if __name__ == "__main__": From f0999e8b778f5ac51169489bff9ae6860a68b08a Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Wed, 26 Mar 2025 09:07:48 +0000 Subject: [PATCH 13/18] actually, let the script differentiate between seq platforms and name the outfile accordingly, for proper downstream grouping --- modules/local/rundirparser/main.nf | 2 +- .../rundirparser/resources/usr/bin/rundirparser.py | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/modules/local/rundirparser/main.nf b/modules/local/rundirparser/main.nf index fe12323..bc96977 100644 --- a/modules/local/rundirparser/main.nf +++ b/modules/local/rundirparser/main.nf @@ -21,7 +21,7 @@ process RUNDIRPARSER { // def args = task.ext.args ?: '' // def prefix = task.ext.prefix ?: "${rundir.baseName}" """ - rundirparser.py ${rundir} ${moduleDir.baseName}_mqc.txt + rundirparser.py ${rundir} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/rundirparser/resources/usr/bin/rundirparser.py b/modules/local/rundirparser/resources/usr/bin/rundirparser.py index 68e91df..b8db68d 100755 --- a/modules/local/rundirparser/resources/usr/bin/rundirparser.py +++ b/modules/local/rundirparser/resources/usr/bin/rundirparser.py @@ -3,9 +3,11 @@ import yaml -def parse_rundir(rundir, outname): +def parse_rundir(rundir): # Dummy implementation, replace with actual logic + sequencing_platform = None + yml_contents = """# plot_type: 'table' # section_name: 'rundir stats' # description: 'dummy rundir stats' @@ -50,14 +52,20 @@ def parse_rundir(rundir, outname): contents = yml_contents + tsv_contents + """ + File names should be unique between sequencing platforms, but otherwise identical + so multiple rundirs of the same platform will be written to the same table + in the MultiQC report. + """ + outname = f"{sequencing_platform or 'rundirparser'}_mqc.txt" + with open(outname, "w") as f: f.write(contents) def main(): rundir = sys.argv[1] - outname = sys.argv[2] - parse_rundir(rundir, outname) + parse_rundir(rundir) if __name__ == "__main__": From e38676ded0ec046888c9957415f5cd74531cc625 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Wed, 26 Mar 2025 10:36:55 +0100 Subject: [PATCH 14/18] editorconfig --- modules/local/rundirparser/tests/main.nf.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/rundirparser/tests/main.nf.test b/modules/local/rundirparser/tests/main.nf.test index c1e8440..0dc321d 100644 --- a/modules/local/rundirparser/tests/main.nf.test +++ b/modules/local/rundirparser/tests/main.nf.test @@ -22,7 +22,7 @@ nextflow_process { process { """ // TODO nf-core: define inputs of the process here. Example: - + input[0] = [ [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), @@ -51,7 +51,7 @@ nextflow_process { process { """ // TODO nf-core: define inputs of the process here. Example: - + input[0] = [ [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), From 7873d53d7b1db287f38ea6fbd4e2a221724175ba Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Wed, 26 Mar 2025 13:16:28 +0000 Subject: [PATCH 15/18] fix quotes and make rundirparser publish results with individual names for test consistency --- conf/modules.config | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 4a653ed..60e7895 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,11 +18,11 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: SEQTK_SAMPLE { + withName: 'SEQTK_SAMPLE' { ext.args = '-s100' } - withName: FASTQC { + withName: 'FASTQC' { ext.args = '--quiet' } @@ -35,6 +35,14 @@ process { ] } + withName: 'RUNDIRPARSER' { + publishDir = [ + path: { "${params.outdir}/rundirparser" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${dir_meta.dirname}_$filename" } + ] + } + withName: 'MULTIQC_GLOBAL' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ From d45ee52aa7f4d3c56541039e1c7eecf1b88f62f4 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Wed, 26 Mar 2025 13:17:09 +0000 Subject: [PATCH 16/18] clean up meta object names --- modules/local/rundirparser/main.nf | 10 +++------- workflows/seqinspector.nf | 4 ++-- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/modules/local/rundirparser/main.nf b/modules/local/rundirparser/main.nf index bc96977..95d9719 100644 --- a/modules/local/rundirparser/main.nf +++ b/modules/local/rundirparser/main.nf @@ -1,5 +1,5 @@ process RUNDIRPARSER { - tag "$rundir.baseName" + tag "$rundir.simpleName" label 'process_single' conda "${moduleDir}/environment.yml" @@ -8,18 +8,16 @@ process RUNDIRPARSER { 'community.wave.seqera.io/library/pip_pyyaml:c2ecf27a7f63796e' }" input: - tuple val(joint_meta), path(rundir) + tuple val(dir_meta), path(rundir) output: - tuple val(joint_meta), path("*_mqc.*"), emit: multiqc + tuple val(dir_meta), path("*_mqc.*"), emit: multiqc path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: - // def args = task.ext.args ?: '' - // def prefix = task.ext.prefix ?: "${rundir.baseName}" """ rundirparser.py ${rundir} @@ -31,8 +29,6 @@ process RUNDIRPARSER { """ stub: - // def args = task.ext.args ?: '' - // def prefix = task.ext.prefix ?: "${rundir.baseName}" """ touch rundir_mqc.txt diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 9f94c11..ff92239 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -53,9 +53,9 @@ workflow SEQINSPECTOR { // Collect all tags across all samples with this rundir def all_tags = metas.collect { it.tags }.flatten().unique() // Create a new meta object with the merged tags - def new_meta = [tags: all_tags] + def dir_meta = [tags: all_tags, dirname: rundir.simpleName] // Return the new structure - [new_meta, rundir] + [dir_meta, rundir] } RUNDIRPARSER( ch_rundir ) From 9c66ff89b1aa83603bc082073215ff020ef506bb Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Wed, 26 Mar 2025 13:37:21 +0000 Subject: [PATCH 17/18] docs and formatting --- conf/modules.config | 3 +++ modules/local/rundirparser/main.nf | 2 +- workflows/seqinspector.nf | 17 ++++++++++++----- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 60e7895..9327925 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -39,6 +39,9 @@ process { publishDir = [ path: { "${params.outdir}/rundirparser" }, mode: params.publish_dir_mode, + // The process _mqc.txt outputs should have identical names for the same sequencing platforms + // in order to be grouped together in the MultiQC report, but here we need to enforce uniqueness + // to avoid overwriting results in the publishDir. saveAs: { filename -> filename.equals('versions.yml') ? null : "${dir_meta.dirname}_$filename" } ] } diff --git a/modules/local/rundirparser/main.nf b/modules/local/rundirparser/main.nf index 95d9719..335acef 100644 --- a/modules/local/rundirparser/main.nf +++ b/modules/local/rundirparser/main.nf @@ -12,7 +12,7 @@ process RUNDIRPARSER { output: tuple val(dir_meta), path("*_mqc.*"), emit: multiqc - path "versions.yml", emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index ff92239..0472ab3 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -44,21 +44,28 @@ workflow SEQINSPECTOR { // if (!("rundirparser" in skip_tools)) { + // From samplesheet channel serving (sampleMetaObj, sampleReadsPath) tuples: + // --> Create new rundir channel serving (rundirMetaObj, rundirPath) tuples ch_rundir = ch_samplesheet - // Group by rundir to merge tags from samples with the same rundir + // Group by rundir .map { meta, _reads -> [meta.rundir, meta] } .groupTuple() - // Create a new meta object with merged tags for each unique rundir + // From all meta objects associated with a given rundir... .map { rundir, metas -> - // Collect all tags across all samples with this rundir + // Collect all unique tags into a list def all_tags = metas.collect { it.tags }.flatten().unique() - // Create a new meta object with the merged tags + // Create a new meta object whose attributes are... + // 1. tags: The list of merged tags, used for grouping MultiQC reports + // 2. dirname: The simple name of the rundir, used for setting unique output names in publishDir def dir_meta = [tags: all_tags, dirname: rundir.simpleName] - // Return the new structure + // Return the new structure, to... + // 1. Feed into rundir specific processes + // 2. Mix with the ch_multiqc_files channel downstream [dir_meta, rundir] } RUNDIRPARSER( ch_rundir ) + ch_multiqc_files = ch_multiqc_files.mix(RUNDIRPARSER.out.multiqc) ch_versions = ch_versions.mix(RUNDIRPARSER.out.versions.first()) } From fabdac5825980f03fb30b9f695b758d8ffcb93fd Mon Sep 17 00:00:00 2001 From: matrulda Date: Wed, 26 Mar 2025 16:53:08 +0100 Subject: [PATCH 18/18] Add parse_illumina.py --- modules/local/rundirparser/main.nf | 6 +- .../resources/usr/bin/parse_illumina.py | 95 +++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) create mode 100755 modules/local/rundirparser/resources/usr/bin/parse_illumina.py diff --git a/modules/local/rundirparser/main.nf b/modules/local/rundirparser/main.nf index 335acef..c702e57 100644 --- a/modules/local/rundirparser/main.nf +++ b/modules/local/rundirparser/main.nf @@ -4,8 +4,8 @@ process RUNDIRPARSER { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ad/ad2bcce70756f81c07c7e2ffd9b66213bf48ace786466395ac3a402840df2ffb/data' : - 'community.wave.seqera.io/library/pip_pyyaml:c2ecf27a7f63796e' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/41/412df2cdcf04e0a12971ba61b12cacaa5a49705442afe99ad96668bebbb8f880/data' : + 'community.wave.seqera.io/library/pip_pyyaml_xmltodict:a4e48bd1ab4b6a53' }" input: tuple val(dir_meta), path(rundir) @@ -19,7 +19,9 @@ process RUNDIRPARSER { script: """ + # TODO: check what kind of seq platfrom to decide which script to use rundirparser.py ${rundir} + parse_illumina.py ${rundir} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/rundirparser/resources/usr/bin/parse_illumina.py b/modules/local/rundirparser/resources/usr/bin/parse_illumina.py new file mode 100755 index 0000000..5d210ab --- /dev/null +++ b/modules/local/rundirparser/resources/usr/bin/parse_illumina.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 + +import os +import yaml +import sys +from datetime import datetime +from pathlib import Path +import xmltodict + + +def read_run_parameters(directory): + alt_1 = directory / "runParameters.xml" + alt_2 = directory / "RunParameters.xml" + if alt_1.exists(): + with open(alt_1) as f: + return xmltodict.parse(f.read()) + elif alt_2.exists(): + with open(alt_2) as f: + return xmltodict.parse(f.read()) + else: + raise Exception("[Rr]unParameters.xml not found!") + + +def find(d, tag): + if isinstance(d, dict): + if tag in d: + yield d[tag] + for k, v in d.items(): + if isinstance(v, dict): + yield from find(v, tag) + if isinstance(v, list): + for i in v: + yield from find(i, tag) + + +def construct_data(run_parameters): + run_parameters_tags = { + "RunId": "Run ID", + "RunID": "Run ID", + "InstrumentType": "Instrument type", + "ApplicationName": "Control software", + "Application": "Control software", + "ApplicationVersion": "Control software version", + "SystemSuiteVersion": "Control software version", + "Flowcell": "Flowcell type", + "FlowCellMode": "Flowcell type", + "ReagentKitVersion": "Reagent kit version", + "RTAVersion": "RTA Version", + "RtaVersion": "RTA Version", + } + data = {} + for k, v in run_parameters_tags.items(): + for key, value in run_parameters_tags.items(): + info = list(find(run_parameters, key)) + if info: + data[value] = info[0] + return data + + +def construct_multiqc_yaml(directory): + + directory_name = directory.name + run_parameters = read_run_parameters(directory) + + data = construct_data(run_parameters) + + #TODO: MultiQC currently ignores the data in this yaml RUDE + metadata = { + "custom_data": { + "my_data_type": { + "id": "mqc_seq_metadata", + "section_name": "Sequencing instrument metadata", + "description": directory_name, + "plot_type": "table", + "pconfig": { + "id": 'custom_table', + "title": 'Custom Table', + "no_headers": "true", + }, + "data": data, + } + } + } + + return metadata + + +if __name__ == "__main__": + rundir_path = Path(sys.argv[1]) + output_file = "illumina_mqc.yml" + + multiqc_yaml = construct_multiqc_yaml(rundir_path) + + with open(output_file, "w") as f: + yaml.dump(multiqc_yaml, f)