From 614fef873843206dfb01d19fa9a282c4be35651f Mon Sep 17 00:00:00 2001 From: trangdo Date: Tue, 28 Oct 2025 16:41:17 +0100 Subject: [PATCH 1/3] Hackathon - Add vembrane filter module --- .../nf-core/vembrane/filter/environment.yml | 10 ++ modules/nf-core/vembrane/filter/main.nf | 75 ++++++++++++ modules/nf-core/vembrane/filter/meta.yml | 62 ++++++++++ .../vembrane/filter/tests/main.nf.test | 110 ++++++++++++++++++ 4 files changed, 257 insertions(+) create mode 100644 modules/nf-core/vembrane/filter/environment.yml create mode 100644 modules/nf-core/vembrane/filter/main.nf create mode 100644 modules/nf-core/vembrane/filter/meta.yml create mode 100644 modules/nf-core/vembrane/filter/tests/main.nf.test diff --git a/modules/nf-core/vembrane/filter/environment.yml b/modules/nf-core/vembrane/filter/environment.yml new file mode 100644 index 00000000000..a33c9cdff96 --- /dev/null +++ b/modules/nf-core/vembrane/filter/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # TODO nf-core: List required Conda package(s). + # Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + # For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + - "bioconda::vembrane=2.4.0" diff --git a/modules/nf-core/vembrane/filter/main.nf b/modules/nf-core/vembrane/filter/main.nf new file mode 100644 index 00000000000..70f06410f89 --- /dev/null +++ b/modules/nf-core/vembrane/filter/main.nf @@ -0,0 +1,75 @@ +// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/modules/nf-core/ +// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta +// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty +// list (`[]`) instead of a file can be used to work around this issue. + +process VEMBRANE_FILTER { + tag "$meta.id" + label 'process_single' + + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE': + 'biocontainers/vembrane:2.4.0--pyhdfd78af_0' }" + // container "quay.io/biocontainers/vembrane:2.4.0--pyhdfd78af_0" + + input: + tuple val(meta), path(variant) + val(expression) + + output: + tuple val(meta), path("*.{vcf,bcf,vcf.gz,bcf.gz}"), emit: filtered_variant + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + vembrane filter \\ + ${args} \\ + ${expression} \\ + -o ${prefix}_filtered.vcf \\ + $variant + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vembrane: \$(vembrane --version) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + echo $args + + vembrane filter \\ + ${args} \\ + ${expression} \\ + -o ${prefix}_filtered.vcf \\ + $variant + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vembrane: \$(vembrane --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/vembrane/filter/meta.yml b/modules/nf-core/vembrane/filter/meta.yml new file mode 100644 index 00000000000..ab25455c372 --- /dev/null +++ b/modules/nf-core/vembrane/filter/meta.yml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +# # TODO nf-core: Add a description of the module and list keywords +name: "vembrane_filter" +description: write your description here +keywords: + - filter + - table + - sort +tools: + ## TODO nf-core: Add a description and other details for the software below + - "vembrane": + description: "Filter VCF/BCF files with Python expressions." + homepage: "https://github.com/vembrane/vembrane/tree/main" + documentation: "https://github.com/vembrane/vembrane/blob/main/docs/filter.md" + tool_dev_url: "https://github.com/vembrane/vembrane.git" + doi: "10.1093/bioinformatics/btac810" + licence: ["MIT"] + identifier: biotools:vembrane/filter + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - variant: + type: file + description: | + Path to the VCF/BCF file to be filtered. + e.g. 'file.vcf', 'file.vcf.gz', 'file.bcf', 'file.bcf.gz' + ontologies: [] + - expression: + type: string + description: | + The filter expression can be any valid python expression that evaluates to a value of type bool. + e.g. 'ANN["SYMBOL"] == "CDH2"' + ontologies: [] + +output: + filtered_variant: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,bcf,vcf.gz,bcf.gz}": + type: file + description: VCF normalized output file + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@trangdo-hsc" + - "@mkatsanto" +maintainers: + - "@trangdo-hsc" diff --git a/modules/nf-core/vembrane/filter/tests/main.nf.test b/modules/nf-core/vembrane/filter/tests/main.nf.test new file mode 100644 index 00000000000..c08e6cc4e86 --- /dev/null +++ b/modules/nf-core/vembrane/filter/tests/main.nf.test @@ -0,0 +1,110 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core modules test vembrane +nextflow_process { + + name "Test Process VEMBRANE" + script "../main.nf" + process "VEMBRANE_FILTER" + + tag "modules" + tag "modules_nfcore" + tag "vembrane" + tag "vembrane/filter" + + test("homo sapiens - vcf") { + + // + // (the module requires running more than one process to generate the required output) + // add the 'setup' method here. + // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules). + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/test.rnaseq.vcf', checkIfExists: true), + ] + input[1] = "'QUAL >= 30'" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.filtered_variant).match() } + ) + } + + } + + test("homo sapiens - vcf.gz") { + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.vcf.gz', checkIfExists: true), + ] + input[1] = "'QUAL >= 30'" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.filtered_variant + ).match() } + ) + } + + } + + test("homo sapiens - bcf") { + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.bcf', checkIfExists: true), + ] + input[1] = "'QUAL >= 30'" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.filtered_variant).match() } + ) + } + + } + +test("homo sapiens - bcf.gz") { + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bcf.gz', checkIfExists: true), + ] + input[1] = "'QUAL >= 30'" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.filtered_variant).match() } + ) + } + + } + +} From 0462840dd48ad439b61b220011d3efe1f671e28a Mon Sep 17 00:00:00 2001 From: trangdo Date: Tue, 28 Oct 2025 16:51:29 +0100 Subject: [PATCH 2/3] Prepare for merge --- modules/nf-core/vembrane/filter/main.nf | 19 ----- modules/nf-core/vembrane/filter/meta.yml | 2 - .../vembrane/filter/tests/main.nf.test | 2 - .../vembrane/filter/tests/main.nf.test.snap | 74 +++++++++++++++++++ 4 files changed, 74 insertions(+), 23 deletions(-) create mode 100644 modules/nf-core/vembrane/filter/tests/main.nf.test.snap diff --git a/modules/nf-core/vembrane/filter/main.nf b/modules/nf-core/vembrane/filter/main.nf index 70f06410f89..30c283667ed 100644 --- a/modules/nf-core/vembrane/filter/main.nf +++ b/modules/nf-core/vembrane/filter/main.nf @@ -1,30 +1,11 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process VEMBRANE_FILTER { tag "$meta.id" label 'process_single' - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE': 'biocontainers/vembrane:2.4.0--pyhdfd78af_0' }" - // container "quay.io/biocontainers/vembrane:2.4.0--pyhdfd78af_0" input: tuple val(meta), path(variant) diff --git a/modules/nf-core/vembrane/filter/meta.yml b/modules/nf-core/vembrane/filter/meta.yml index ab25455c372..f88c7391391 100644 --- a/modules/nf-core/vembrane/filter/meta.yml +++ b/modules/nf-core/vembrane/filter/meta.yml @@ -1,5 +1,4 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -# # TODO nf-core: Add a description of the module and list keywords name: "vembrane_filter" description: write your description here keywords: @@ -7,7 +6,6 @@ keywords: - table - sort tools: - ## TODO nf-core: Add a description and other details for the software below - "vembrane": description: "Filter VCF/BCF files with Python expressions." homepage: "https://github.com/vembrane/vembrane/tree/main" diff --git a/modules/nf-core/vembrane/filter/tests/main.nf.test b/modules/nf-core/vembrane/filter/tests/main.nf.test index c08e6cc4e86..ada63541ade 100644 --- a/modules/nf-core/vembrane/filter/tests/main.nf.test +++ b/modules/nf-core/vembrane/filter/tests/main.nf.test @@ -1,5 +1,3 @@ -// TODO nf-core: Once you have added the required tests, please run the following command to build this file: -// nf-core modules test vembrane nextflow_process { name "Test Process VEMBRANE" diff --git a/modules/nf-core/vembrane/filter/tests/main.nf.test.snap b/modules/nf-core/vembrane/filter/tests/main.nf.test.snap new file mode 100644 index 00000000000..17ed0c4d251 --- /dev/null +++ b/modules/nf-core/vembrane/filter/tests/main.nf.test.snap @@ -0,0 +1,74 @@ +{ + "homo sapiens - bcf": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_filtered.vcf:md5,38f3479f9071fbe018226be0aac62354" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.6" + }, + "timestamp": "2025-10-28T16:35:15.82704" + }, + "homo sapiens - vcf.gz": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_filtered.vcf:md5,44659c255c5018ba2e5a3b4b80797a57" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.6" + }, + "timestamp": "2025-10-28T16:35:10.958678" + }, + "homo sapiens - bcf.gz": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_filtered.vcf:md5,c9be02b5c5e036c25e05f59a94528203" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.6" + }, + "timestamp": "2025-10-28T16:35:21.012789" + }, + "homo sapiens - vcf": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_filtered.vcf:md5,20af56305296f2b0e52979465f19804b" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.6" + }, + "timestamp": "2025-10-28T16:35:06.185151" + } +} \ No newline at end of file From d86ad1980eb48cf5bbbb86ab887adc0959449e9b Mon Sep 17 00:00:00 2001 From: trangdo Date: Tue, 28 Oct 2025 17:10:32 +0100 Subject: [PATCH 3/3] Prepare for merge --- .pre-commit-config.yaml | 10 +- .../nf-core/vembrane/filter/environment.yml | 3 - modules/nf-core/vembrane/filter/main.nf | 2 +- .../vembrane/filter/tests/main.nf.test | 8 +- .../vembrane/filter/tests/main.nf.test.snap | 140 +++++++++++++----- 5 files changed, 114 insertions(+), 49 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cb017245913..2a556d27b28 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,13 +1,13 @@ repos: - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v3.1.0" + rev: "v4.0.0-alpha.8" hooks: - id: prettier additional_dependencies: - prettier@3.6.2 - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: trailing-whitespace args: [--markdown-linebreak-ext=md] @@ -22,7 +22,7 @@ repos: )$ - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.33.2 + rev: 0.34.1 hooks: - id: check-jsonschema name: "Match meta.ymls in one of the subdirectories of modules/nf-core" @@ -43,7 +43,7 @@ repos: # use ruff for python files - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.12.9 + rev: v0.14.2 hooks: - id: ruff files: \.py$ @@ -51,7 +51,7 @@ repos: - id: ruff-format # formatter # NOTE This runs with Docker instead of the binary. Hoping anyone messing with Dockerfiles has Docker installed - repo: https://github.com/hadolint/hadolint - rev: v2.12.0 + rev: v2.14.0 hooks: - id: hadolint-docker - repo: https://github.com/nf-core/tools diff --git a/modules/nf-core/vembrane/filter/environment.yml b/modules/nf-core/vembrane/filter/environment.yml index a33c9cdff96..fc61617b4e7 100644 --- a/modules/nf-core/vembrane/filter/environment.yml +++ b/modules/nf-core/vembrane/filter/environment.yml @@ -4,7 +4,4 @@ channels: - conda-forge - bioconda dependencies: - # TODO nf-core: List required Conda package(s). - # Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - # For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - "bioconda::vembrane=2.4.0" diff --git a/modules/nf-core/vembrane/filter/main.nf b/modules/nf-core/vembrane/filter/main.nf index 30c283667ed..5e56de3830e 100644 --- a/modules/nf-core/vembrane/filter/main.nf +++ b/modules/nf-core/vembrane/filter/main.nf @@ -4,7 +4,7 @@ process VEMBRANE_FILTER { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE': + 'https://depot.galaxyproject.org/singularity/vembrane:2.4.0--pyhdfd78af_0': 'biocontainers/vembrane:2.4.0--pyhdfd78af_0' }" input: diff --git a/modules/nf-core/vembrane/filter/tests/main.nf.test b/modules/nf-core/vembrane/filter/tests/main.nf.test index ada63541ade..4a3e2f36425 100644 --- a/modules/nf-core/vembrane/filter/tests/main.nf.test +++ b/modules/nf-core/vembrane/filter/tests/main.nf.test @@ -31,7 +31,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.filtered_variant).match() } + { assert snapshot(process.out).match() } ) } @@ -54,7 +54,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.filtered_variant + process.out ).match() } ) } @@ -77,7 +77,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.filtered_variant).match() } + { assert snapshot(process.out).match() } ) } @@ -99,7 +99,7 @@ test("homo sapiens - bcf.gz") { then { assertAll( { assert process.success }, - { assert snapshot(process.out.filtered_variant).match() } + { assert snapshot(process.out).match() } ) } diff --git a/modules/nf-core/vembrane/filter/tests/main.nf.test.snap b/modules/nf-core/vembrane/filter/tests/main.nf.test.snap index 17ed0c4d251..0bc3b16cf7e 100644 --- a/modules/nf-core/vembrane/filter/tests/main.nf.test.snap +++ b/modules/nf-core/vembrane/filter/tests/main.nf.test.snap @@ -1,74 +1,142 @@ { "homo sapiens - bcf": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test_filtered.vcf:md5,38f3479f9071fbe018226be0aac62354" + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_filtered.vcf:md5,38f3479f9071fbe018226be0aac62354" + ] + ], + "1": [ + "versions.yml:md5,78268fef6c3acebb6a431844197f42d5" + ], + "filtered_variant": [ + [ + { + "id": "test", + "single_end": false + }, + "test_filtered.vcf:md5,38f3479f9071fbe018226be0aac62354" + ] + ], + "versions": [ + "versions.yml:md5,78268fef6c3acebb6a431844197f42d5" ] - ] + } ], "meta": { "nf-test": "0.9.3", "nextflow": "25.04.6" }, - "timestamp": "2025-10-28T16:35:15.82704" + "timestamp": "2025-10-29T10:40:44.493576" }, "homo sapiens - vcf.gz": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test_filtered.vcf:md5,44659c255c5018ba2e5a3b4b80797a57" + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_filtered.vcf:md5,44659c255c5018ba2e5a3b4b80797a57" + ] + ], + "1": [ + "versions.yml:md5,78268fef6c3acebb6a431844197f42d5" + ], + "filtered_variant": [ + [ + { + "id": "test", + "single_end": false + }, + "test_filtered.vcf:md5,44659c255c5018ba2e5a3b4b80797a57" + ] + ], + "versions": [ + "versions.yml:md5,78268fef6c3acebb6a431844197f42d5" ] - ] + } ], "meta": { "nf-test": "0.9.3", "nextflow": "25.04.6" }, - "timestamp": "2025-10-28T16:35:10.958678" + "timestamp": "2025-10-29T10:40:40.227725" }, "homo sapiens - bcf.gz": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test_filtered.vcf:md5,c9be02b5c5e036c25e05f59a94528203" + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_filtered.vcf:md5,c9be02b5c5e036c25e05f59a94528203" + ] + ], + "1": [ + "versions.yml:md5,78268fef6c3acebb6a431844197f42d5" + ], + "filtered_variant": [ + [ + { + "id": "test", + "single_end": false + }, + "test_filtered.vcf:md5,c9be02b5c5e036c25e05f59a94528203" + ] + ], + "versions": [ + "versions.yml:md5,78268fef6c3acebb6a431844197f42d5" ] - ] + } ], "meta": { "nf-test": "0.9.3", "nextflow": "25.04.6" }, - "timestamp": "2025-10-28T16:35:21.012789" + "timestamp": "2025-10-29T10:40:49.157452" }, "homo sapiens - vcf": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test_filtered.vcf:md5,20af56305296f2b0e52979465f19804b" + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_filtered.vcf:md5,20af56305296f2b0e52979465f19804b" + ] + ], + "1": [ + "versions.yml:md5,78268fef6c3acebb6a431844197f42d5" + ], + "filtered_variant": [ + [ + { + "id": "test", + "single_end": false + }, + "test_filtered.vcf:md5,20af56305296f2b0e52979465f19804b" + ] + ], + "versions": [ + "versions.yml:md5,78268fef6c3acebb6a431844197f42d5" ] - ] + } ], "meta": { "nf-test": "0.9.3", "nextflow": "25.04.6" }, - "timestamp": "2025-10-28T16:35:06.185151" + "timestamp": "2025-10-29T10:40:35.909472" } } \ No newline at end of file