Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions modules/nf-core/samtools/samples/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::samtools=1.22.1"
66 changes: 66 additions & 0 deletions modules/nf-core/samtools/samples/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
process SAMTOOLS_SAMPLES {
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0':
'biocontainers/samtools:1.22.1--h96c455f_0' }"

input:
tuple val(meta) , path(bam) , path(bai)
tuple val(meta2), path(fasta), path(fai)

output:
tuple val(meta), path("*.tsv"), emit: tsv
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
// Wrapping fasta in a list in case there is exactly one (in that case it's a bare path)
def fasta_arg = fasta ? [fasta].flatten().collect { "-f $it" }.join(' ') : ''
def out_arg = "-o ${prefix}.tsv"
def bai_arg = args.contains('-X') ? bai : ''
"""
samtools samples \\
$args \\
$fasta_arg \\
$out_arg \\
$bam \\
$bai_arg

cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
// Write header if requested because the output will likely be parsed by Nextflow
def headers = ''
if (args.contains('-h\\n')) {
headers = "#SM\tPATH"
if (args.contains('-i')) {
headers += "\tINDEX"
}
if (fasta) {
headers += "\tREFERENCE"
}
}
Comment on lines +46 to +55
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why have this logic in here and not just add them all by default?
Also if it gets parsed by nextflow, no samples will be there as well so no other processes will be started afterwards.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess if we remove all this logic, it might be better to just write an empty file. Otherwise the header messes with downstream tools which do not expect one.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fine for me!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Generally we just make empty files. I guess it might be important here for workflow logic downstream though, as @Schmytzi says, so I'm happy to include this here.

"""
echo $args

echo -ne "$headers" > ${prefix}.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}
71 changes: 71 additions & 0 deletions modules/nf-core/samtools/samples/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "samtools_samples"
description: |
Write sample names and path to reference genome of an alignment to a text file.
keywords:
- samples
- bam
- cram
- genomics
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: http://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
identifier: biotools:samtools

input:
- - meta:
type: map
description: Groovy Map containing sample information. e.g. `[ id:'sample1'
]`
- bam:
type: file
description: alignment file
pattern: "*.{bam,sam,cram}"
ontologies:
- edam: http://edamontology.org/format_2572 # BAM
- edam: http://edamontology.org/format_2573 # SAM
- edam: http://edamontology.org/format_3462 # CRAM
- bai:
type: file
description: index file for the alignment
pattern: "*.{bai,crai}"
- - meta2:
type: map
description: Groovy Map containing reference information.
- fasta:
type: file
description: reference genome file (optional)
pattern: "*.{fasta,fa,fna}"
ontologies:
- edam: http://edamontology.org/format_1929 # FASTA
- fai:
type: file
description: index file for the reference genome (optional)
pattern: "*.fai"
output:
tsv:
- - meta:
type: map
description: Groovy Map containing sample information. e.g. `[ id:'sample1' ]`
- "*.tsv":
type: file
description: tab-separated file containing sample names, BAM filename and optionally reference genome path
pattern: "*.tsv"
versions:
- versions.yml:
type: file
description: File containing software versions
pattern: versions.yml
ontologies:
- edam: http://edamontology.org/format_3750 # YAML
authors:
- "@Schmytzi"
maintainers:
- "@Schmytzi"
115 changes: 115 additions & 0 deletions modules/nf-core/samtools/samples/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
nextflow_process {

name "Test Process SAMTOOLS_SAMPLES"
script "../main.nf"
process "SAMTOOLS_SAMPLES"

tag "modules"
tag "modules_nfcore"
tag "samtools"
tag "samtools/samples"

test("sarscov2 - [bam, bai], []") {
when {
process {
"""
input[0] = [
[ id:'test', single_end:false ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
]
input[1] = [ [], [], [] ]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("sarscov2 - [bam, bai], [fasta, fai]") {
when {
process {
"""
input[0] = [
[ id:'test', single_end:false ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
]
input[1] = [
[ id : 'sarscov2' ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}



test("sarscov2 - [bam, bai], [] - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
]
input[1] = [ [], [], [] ]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("sarscov2 - [bam, bai], [fasta, fai] - stub") {
options "-stub"
when {
process {
"""
input[0] = [
[ id:'test', single_end:false ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
]
input[1] = [
[ id : 'sarscov2' ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}}
Loading
Loading