Skip to content
Draft
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions modules/nf-core/rmats/prep/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::rmats=4.3.0"
73 changes: 73 additions & 0 deletions modules/nf-core/rmats/prep/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
process RMATS_PREP {
tag "${meta.id}"
label 'process_single'

// TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://depot.galaxyproject.org/singularity/rmats:4.3.0--py311hf2f0b74_5'
: 'biocontainers/rmats:4.3.0--py311hf2f0b74_5'}"

input:
// TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct

tuple val(meta), path(genome_bam)
// TODO - post seems to need only the BAM *names*, not the actual files. Could we just get the first line of each file to get the names?
// for file in `ls multi_bam_rmats_prep_tmp/*.rmats`; do head -1 $file; done | tr '\n' ','
// TODO - for stats, it should be possible to parse the formula using patsy, but if we include PAIRADISE we might have R - just do this in R, first pass
path reference_gtf
val rmats_read_len

output:
// TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct
tuple val(meta), path("*.rmats"), emit: prep_rmats_file
tuple val(meta), path("*outcomes_by_bam.txt"), emit: prep_read_outcomes_file
tuple val("${task.process}"), val('rmats'), eval('rmats.py --version | sed -e "s/v//g"'), emit: versions_rmats, topic: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
// TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
// If the software is unable to output a version number on the command-line then it can be manually specified
// e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
// Each software used MUST provide the software name and version number in the YAML version file (versions.yml)

// --readLength READLENGTH
// The length of each read. Required parameter, with the
// value set according to the RNA-seq read length
// TODO - question. Does this definition mean I should change it by read length? If so, look at a samtools command to figure it out. Samtools stats!
// TODO - should I modify the prefix to include rmats_prep only in a subworkflow via modules.config? It seems so, see example at https://github.com/nf-core/rnaseq/blob/e049f51f0214b2aef7624b9dd496a404a7c34d14/conf/modules.config#L576
"""
echo ${genome_bam} > ${prefix}.prep.b1.txt

rmats.py \\
--task prep \\
${args} \\
--nthread ${task.cpus} \\
--b1 ${prefix}.prep.b1.txt \\
--gtf ${reference_gtf} \\
--readLength ${rmats_read_len} \\
--tmp ${prefix}_rmats_tmp \\
--od ${prefix}_rmats_prep

for file in `ls ${prefix}_rmats_tmp/*`
do
cp \${file} ${prefix}_prep_\$(basename \${file})
done
"""

// NOTES for post - post requires the rmats files to be in the tmp directory, otherwise it fails

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
echo ${args}

touch ${prefix}.rmats
touch ${prefix}_outcomes_by_bam.txt
"""
}
88 changes: 88 additions & 0 deletions modules/nf-core/rmats/prep/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
# # TODO nf-core: Add a description of the module and list keywords
name: "rmats_prep"
description: MATS is a computational tool to detect differential alternative splicing events from RNA-Seq data.
keywords:
- splicing
- RNA-Seq
- alternative splicing
- exon
- intron
- rMATS
tools:
## TODO nf-core: Add a description and other details for the software below
- "rmats":
description: "MATS is a computational tool to detect differential alternative
splicing events from RNA-Seq data."
homepage: "https://github.com/Xinglab/rmats-turbo"
documentation: "https://github.com/Xinglab/rmats-turbo/blob/v4.3.0/README.md"
doi: "10.1038/s41596-023-00944-2"
licence: ["FreeBSD for non-commercial use, see LICENSE file"]
identifier: biotools:rmats

input:
# TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct
- - meta:
type: map
description: Groovy Map containing sample information. e.g. `[ id:'sample1', single_end:false, strandness:'auto']`
- genome_bam:
type: file
description: BAM file aligned to the genome
pattern: "*.{bam}"
ontologies:
- edam: http://edamontology.org/format_2572 # BAM
- reference_gtf:
type: file
description: Annotation GTF file
pattern: "*.{gtf}"
ontologies:
- edam: http://edamontology.org/format_2306 # GTF
- rmats_read_len:
type: integer
description: Read length in bases
output:
# TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct
prep_rmats_file:
- - meta:
type: map
description: Groovy Map containing sample information. e.g. `[ id:'sample1'single_end:false, strandness:'auto']`
- "*.{}":
type: file
description: text file containing rmats processed splice junctions
pattern: "*.rmats"
ontologies: []
prep_read_outcomes_file:
- - meta:
type: map
description: Groovy Map containing sample information. e.g. `[ id:'sample1'single_end:false, strandness:'auto']`
- "*.{}":
type: file
description: text file containing the numbers of reads for each outcome (USED, NOT_PAIRED, etc.)
pattern: "*.txt"
ontologies: []
- edam: http://edamontology.org/format_2330
versions_rmats:
- - ${task.process}:
type: string
description: The name of the process
- rmats:
type: string
description: The name of the tool
- rmats.py --version | sed -e "s/v//g":
type: eval
description: The expression to obtain the version of the tool
topics:
versions:
- - ${task.process}:
type: string
description: The name of the process
- rmats:
type: string
description: The name of the tool
- rmats.py --version | sed -e "s/v//g":
type: eval
description: The expression to obtain the version of the tool
authors:
- "@akaviaLab"
maintainers:
- "@akaviaLab"
2 changes: 2 additions & 0 deletions modules/nf-core/rmats/prep/optional_parameters
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--variable-read-length
--allow-clipping
74 changes: 74 additions & 0 deletions modules/nf-core/rmats/prep/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// TODO nf-core: Once you have added the required tests, please run the following command to build this file:
// nf-core modules test rmats/prep
nextflow_process {

name "Test Process RMATS_PREP"
script "../main.nf"
process "RMATS_PREP"

tag "modules"
tag "modules_nfcore"
tag "rmats"
tag "rmats/prep"

// TODO nf-core: Change the test name preferably indicating the test-data and file-format used
test("sarscov2 - bam") {

// TODO nf-core: If you are created a test for a chained module
// (the module requires running more than one process to generate the required output)
// add the 'setup' method here.
// You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules).

when {
process {
"""
// TODO nf-core: define inputs of the process here. Example:

input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
//TODO nf-core: Add all required assertions to verify the test output.
// See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples.
)
}

}

// TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix.
test("sarscov2 - bam - stub") {

options "-stub"

when {
process {
"""
// TODO nf-core: define inputs of the process here. Example:

input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
//TODO nf-core: Add all required assertions to verify the test output.
)
}

}

}
15 changes: 15 additions & 0 deletions modules/nf-core/rmats/prep/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
process {

withName: RMATS_PREP {
ext.args = {[
"--variable-read-length --allow-clipping",
meta.single_end ? '-t single' : '',
meta.strandness == "forward" ? "--libType fr-firststrand" : '',
meta.strandness == "reverse" ? "--libType fr-secondstrand" : '',
params.novel_splice_site ? "--novelSS" : "",
(params.novel_splice_site && params.minimum_intron_length) ? "--mil ${params.minimum_intron_length}" : "",
(params.novel_splice_site && params.max_exon_length) ? "--mel ${params.max_exon_length}" : "",
].join(' ').trim()}
}

}
Loading