Skip to content

Commit 96b96cd

Browse files
authored
Add samtools/splitheader and deprecate samtools/getrg (#9250)
feat(samtools/splitheader): add splitheader module and deprecate samtools/getrg
1 parent af27af1 commit 96b96cd

File tree

8 files changed

+280
-40
lines changed

8 files changed

+280
-40
lines changed
Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
def deprecation_message = """
2+
WARNING: This module has been deprecated. Please use nf-core/modules/samtools/splitheader
3+
4+
Reason:
5+
This module has been renamed to samtools/splitheader, which has the same functionality but
6+
extends the outputs to include other types of SAM header.
7+
"""
8+
19
process SAMTOOLS_GETRG {
210
tag "$meta.id"
311
label 'process_low'
@@ -19,28 +27,13 @@ process SAMTOOLS_GETRG {
1927

2028
script:
2129
def args = task.ext.args ?: ''
30+
assert false: deprecation_message
2231
"""
23-
samtools \\
24-
view \\
25-
-H \\
26-
$args \\
27-
$input \\
28-
| grep '^@RG' > readgroups.txt
29-
30-
cat <<-END_VERSIONS > versions.yml
31-
"${task.process}":
32-
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
33-
END_VERSIONS
3432
"""
3533

3634
stub:
3735
def prefix = task.ext.prefix ?: "${meta.id}"
36+
assert false: deprecation_message
3837
"""
39-
40-
echo -e "@RG\\tID:${prefix}\\tSM:${prefix}\\tPL:ILLUMINA" > readgroups.txt
41-
cat <<-END_VERSIONS > versions.yml
42-
"${task.process}":
43-
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
44-
END_VERSIONS
4538
"""
4639
}

modules/nf-core/samtools/getrg/tests/main.nf.test

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,7 @@ nextflow_process {
2626

2727
then {
2828
assertAll(
29-
{ assert process.success },
30-
{ assert snapshot(
31-
file(process.out.readgroup[0][1]).readLines(),
32-
process.out.versions
33-
).match()
34-
}
29+
{ assert process.failed }
3530
)
3631
}
3732
}

modules/nf-core/samtools/getrg/tests/main.nf.test.snap

Lines changed: 0 additions & 17 deletions
This file was deleted.
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
# renovate: datasource=conda depName=bioconda/htslib
8+
- bioconda::htslib=1.22.1
9+
# renovate: datasource=conda depName=bioconda/samtools
10+
- bioconda::samtools=1.22.1
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
process SAMTOOLS_SPLITHEADER {
2+
tag "$meta.id"
3+
label 'process_single'
4+
5+
conda "${moduleDir}/environment.yml"
6+
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
7+
'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' :
8+
'biocontainers/samtools:1.22.1--h96c455f_0' }"
9+
10+
input:
11+
tuple val(meta), path(input)
12+
13+
output:
14+
tuple val(meta), path("*_readgroups.txt"), emit: readgroup
15+
tuple val(meta), path("*_programs.txt") , emit: programs
16+
tuple val(meta), path("*_sequences.txt") , emit: sequences
17+
path "versions.yml" , emit: versions
18+
19+
when:
20+
task.ext.when == null || task.ext.when
21+
22+
script:
23+
def args = task.ext.args ?: ''
24+
def prefix = task.ext.prefix ?: "${meta.id}"
25+
"""
26+
samtools \\
27+
view \\
28+
-H \\
29+
$args \\
30+
$input \\
31+
| tee \\
32+
>( grep '^@RG' > ${prefix}_readgroups.txt ) \
33+
>( grep '^@PG' > ${prefix}_programs.txt ) \
34+
>( grep '^@SQ' > ${prefix}_sequences.txt ) \
35+
> /dev/null
36+
37+
cat <<-END_VERSIONS > versions.yml
38+
"${task.process}":
39+
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
40+
END_VERSIONS
41+
"""
42+
43+
stub:
44+
def prefix = task.ext.prefix ?: "${meta.id}"
45+
"""
46+
echo -e "@RG\\tID:${prefix}\\tSM:${prefix}\\tPL:ILLUMINA" > ${prefix}_readgroups.txt
47+
echo -e "@PG\\tID:samtools.4\\tPN:samtools\\tPP:samtools.3\\tVN:1.22.1\\tCL:samtools view -H ${input}" > ${prefix}_programs.txt
48+
echo -e "@SQ\\tSN:chr1\\tLN:10000" > ${prefix}_sequences.txt
49+
50+
cat <<-END_VERSIONS > versions.yml
51+
"${task.process}":
52+
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
53+
END_VERSIONS
54+
"""
55+
}
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
name: samtools_splitheader
2+
description: Extract header lines from a SAM/BAM/CRAM file into separate files
3+
depending on type
4+
keywords:
5+
- view
6+
- bam
7+
- sam
8+
- cram
9+
- readgroup
10+
- program
11+
- sequence
12+
- header
13+
tools:
14+
- samtools:
15+
description: |
16+
SAMtools is a set of utilities for interacting with and post-processing
17+
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
18+
These files are generated as output by short read aligners like BWA.
19+
homepage: http://www.htslib.org/
20+
documentation: http://www.htslib.org/doc/samtools.html
21+
doi: 10.1093/bioinformatics/btp352
22+
licence: ["MIT"]
23+
identifier: biotools:samtools
24+
input:
25+
- - meta:
26+
type: map
27+
description: |
28+
Groovy Map containing sample information
29+
e.g. [ id:'test', single_end:false ]
30+
- input:
31+
type: file
32+
description: BAM/CRAM/SAM file
33+
pattern: "*.{bam,cram,sam}"
34+
ontologies:
35+
- edam: http://edamontology.org/format_2573 # SAM
36+
- edam: http://edamontology.org/format_2572 # BAM
37+
- edam: http://edamontology.org/format_3462 # CRAM
38+
output:
39+
readgroup:
40+
- - meta:
41+
type: map
42+
description: |
43+
Groovy Map containing sample information
44+
e.g. [ id:'test', single_end:false ]
45+
- "*_readgroups.txt":
46+
type: file
47+
description: |
48+
Text file containing read group (@RG) lines from SAM header
49+
ontologies:
50+
- edam: http://edamontology.org/format_3462 # Text
51+
programs:
52+
- - meta:
53+
type: map
54+
description: |
55+
Groovy Map containing sample information
56+
e.g. [ id:'test', single_end:false ]
57+
- "*_programs.txt":
58+
type: file
59+
description: |
60+
Text file containing program (@PG) lines from SAM header
61+
ontologies:
62+
- edam: http://edamontology.org/format_3462 # Text
63+
sequences:
64+
- - meta:
65+
type: map
66+
description: |
67+
Groovy Map containing sample information
68+
e.g. [ id:'test', single_end:false ]
69+
- "*_sequences.txt":
70+
type: file
71+
description: |
72+
Text file containing sequence (@SQ) lines from SAM header
73+
ontologies:
74+
- edam: http://edamontology.org/format_3462 # Text
75+
versions:
76+
- versions.yml:
77+
type: file
78+
description: File containing software versions
79+
pattern: "versions.yml"
80+
ontologies:
81+
- edam: http://edamontology.org/format_3750 # YAML
82+
authors:
83+
- "@matthdsm"
84+
- "@prototaxites"
85+
maintainers:
86+
- "@matthdsm"
87+
- "@prototaxites"
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
2+
nextflow_process {
3+
4+
name "Test Process SAMTOOLS_SPLITHEADER"
5+
script "../main.nf"
6+
process "SAMTOOLS_SPLITHEADER"
7+
8+
tag "modules"
9+
tag "modules_nfcore"
10+
tag "samtools"
11+
tag "samtools/splitheader"
12+
13+
test("test-samtools-splitheader") {
14+
15+
when {
16+
process {
17+
"""
18+
input[0] = [
19+
[ id:'test', single_end:false ], // meta map
20+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
21+
]
22+
23+
"""
24+
}
25+
}
26+
27+
then {
28+
assertAll(
29+
{ assert process.success },
30+
{ assert snapshot(
31+
file(process.out.readgroup[0][1]).readLines(),
32+
file(process.out.programs[0][1]).readLines(),
33+
file(process.out.sequences[0][1]).readLines(),
34+
process.out.versions
35+
).match()
36+
}
37+
)
38+
}
39+
}
40+
41+
test("test-samtools-splitheader - stub") {
42+
43+
options "-stub"
44+
45+
when {
46+
process {
47+
"""
48+
input[0] = [
49+
[ id:'test', single_end:false ], // meta map
50+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
51+
]
52+
53+
"""
54+
}
55+
}
56+
57+
then {
58+
assertAll(
59+
{ assert process.success },
60+
{ assert snapshot(
61+
file(process.out.readgroup[0][1]).readLines(),
62+
file(process.out.programs[0][1]).readLines(),
63+
file(process.out.sequences[0][1]).readLines(),
64+
process.out.versions
65+
).match()
66+
}
67+
)
68+
}
69+
}
70+
71+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"test-samtools-splitheader": {
3+
"content": [
4+
[
5+
"@RG\tID:1\tLB:lib1\tPL:ILLUMINA\tSM:test\tPU:barcode1"
6+
],
7+
[
8+
"@PG\tID:minimap2\tPN:minimap2\tVN:2.17-r941\tCL:minimap2 -ax sr tests/data/fasta/sarscov2/GCA_011545545.1_ASM1154554v1_genomic.fna tests/data/fastq/dna/sarscov2_1.fastq.gz tests/data/fastq/dna/sarscov2_2.fastq.gz",
9+
"@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.11\tCL:samtools view -Sb sarscov2_aln.sam",
10+
"@PG\tID:samtools.1\tPN:samtools\tPP:samtools\tVN:1.22.1\tCL:samtools view -H test.paired_end.bam"
11+
],
12+
[
13+
"@SQ\tSN:MT192765.1\tLN:29829"
14+
],
15+
[
16+
"versions.yml:md5,1c199f83f54556a51c500ed49ea6102a"
17+
]
18+
],
19+
"meta": {
20+
"nf-test": "0.9.3",
21+
"nextflow": "25.04.2"
22+
},
23+
"timestamp": "2025-10-27T10:16:40.848026"
24+
},
25+
"test-samtools-splitheader - stub": {
26+
"content": [
27+
[
28+
"@RG\tID:test\tSM:test\tPL:ILLUMINA"
29+
],
30+
[
31+
"@PG\tID:samtools.4\tPN:samtools\tPP:samtools.3\tVN:1.22.1\tCL:samtools view -H test.paired_end.bam"
32+
],
33+
[
34+
"@SQ\tSN:chr1\tLN:10000"
35+
],
36+
[
37+
"versions.yml:md5,1c199f83f54556a51c500ed49ea6102a"
38+
]
39+
],
40+
"meta": {
41+
"nf-test": "0.9.3",
42+
"nextflow": "25.04.2"
43+
},
44+
"timestamp": "2025-10-27T10:16:43.794756"
45+
}
46+
}

0 commit comments

Comments
 (0)