Skip to content

Commit 45b4cc6

Browse files
jfy133SPPearce
andauthored
METAPHLAN_METAPHLAN: update to support optional SAM output (nf-core#8937)
* Add new input channel for premade seqid2map file * Add extra tests for using custom seqid2map file * Apply suggestions from code review * Update modules/nf-core/kraken2/build/tests/main.nf.test Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com> * Add support for SAM file output in MetaPhlAn module * Relax checks because of variability * Relax checks because of variability --------- Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com>
1 parent dfa786c commit 45b4cc6

File tree

6 files changed

+119
-31
lines changed

6 files changed

+119
-31
lines changed

modules/nf-core/metaphlan/mergemetaphlantables/tests/main.nf.test

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ nextflow_process {
3333
[ [ id: 'test2', single_end: true], file( params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz',checkIfExists: true ) ],
3434
)
3535
input[1] = UNTAR.out.untar.map{ it[1] }
36+
input[2] = false
3637
"""
3738
}
3839
}

modules/nf-core/metaphlan/metaphlan/main.nf

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,45 @@
11
process METAPHLAN_METAPHLAN {
2-
tag "$meta.id"
2+
tag "${meta.id}"
33
label 'process_medium'
44

55
conda "${moduleDir}/environment.yml"
6-
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
7-
'https://depot.galaxyproject.org/singularity/metaphlan:4.1.1--pyhdfd78af_0' :
8-
'biocontainers/metaphlan:4.1.1--pyhdfd78af_0' }"
6+
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
7+
? 'https://depot.galaxyproject.org/singularity/metaphlan:4.1.1--pyhdfd78af_0'
8+
: 'biocontainers/metaphlan:4.1.1--pyhdfd78af_0'}"
99

1010
input:
1111
tuple val(meta), path(input)
1212
path metaphlan_db_latest
13+
val save_samfile
1314

1415
output:
15-
tuple val(meta), path("*_profile.txt") , emit: profile
16-
tuple val(meta), path("*.biom") , emit: biom
17-
tuple val(meta), path('*.bowtie2out.txt'), optional:true, emit: bt2out
18-
path "versions.yml" , emit: versions
16+
tuple val(meta), path("*_profile.txt"), emit: profile
17+
tuple val(meta), path("*.biom"), emit: biom
18+
tuple val(meta), path('*.bowtie2out.txt'), optional: true, emit: bt2out
19+
tuple val(meta), path("*.sam"), optional: true, emit: sam
20+
path "versions.yml", emit: versions
1921

2022
when:
2123
task.ext.when == null || task.ext.when
2224

2325
script:
2426
def args = task.ext.args ?: ''
2527
def prefix = task.ext.prefix ?: "${meta.id}"
26-
def input_type = "$input" =~ /.*\.(fastq|fq)/ ? "--input_type fastq" : "$input" =~ /.*\.(fasta|fna|fa)/ ? "--input_type fasta" : "$input".endsWith(".bowtie2out.txt") ? "--input_type bowtie2out" : "--input_type sam"
27-
def input_data = ("$input_type".contains("fastq")) && !meta.single_end ? "${input[0]},${input[1]}" : "$input"
28-
def bowtie2_out = "$input_type" == "--input_type bowtie2out" || "$input_type" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt"
29-
28+
def input_type = "${input}" =~ /.*\.(fastq|fq)/ ? "--input_type fastq" : "${input}" =~ /.*\.(fasta|fna|fa)/ ? "--input_type fasta" : "${input}".endsWith(".bowtie2out.txt") ? "--input_type bowtie2out" : "--input_type sam"
29+
def input_data = ("${input_type}".contains("fastq")) && !meta.single_end ? "${input[0]},${input[1]}" : "${input}"
30+
def bowtie2_out = "${input_type}" == "--input_type bowtie2out" || "${input_type}" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt"
31+
def samfile_out = save_samfile ? "-s ${prefix}.sam" : ''
3032
"""
3133
BT2_DB=`find -L "${metaphlan_db_latest}" -name "*rev.1.bt2*" -exec dirname {} \\;`
3234
BT2_DB_INDEX=`find -L ${metaphlan_db_latest} -name "*.rev.1.bt2*" | sed 's/\\.rev.1.bt2.*\$//' | sed 's/.*\\///'`
3335
3436
metaphlan \\
35-
--nproc $task.cpus \\
36-
$input_type \\
37-
$input_data \\
38-
$args \\
39-
$bowtie2_out \\
37+
--nproc ${task.cpus} \\
38+
${input_type} \\
39+
${input_data} \\
40+
${args} \\
41+
${bowtie2_out} \\
42+
${samfile_out} \\
4043
--bowtie2db \$BT2_DB \\
4144
--index \$BT2_DB_INDEX \\
4245
--biom ${prefix}.biom \\
@@ -51,11 +54,12 @@ process METAPHLAN_METAPHLAN {
5154
stub:
5255
def args = task.ext.args ?: ''
5356
def prefix = task.ext.prefix ?: "${meta.id}"
54-
def input_type = "$input" =~ /.*\.(fastq|fq)/ ? "--input_type fastq" : "$input" =~ /.*\.(fasta|fna|fa)/ ? "--input_type fasta" : "$input".endsWith(".bowtie2out.txt") ? "--input_type bowtie2out" : "--input_type sam"
55-
def input_data = ("$input_type".contains("fastq")) && !meta.single_end ? "${input[0]},${input[1]}" : "$input"
56-
def bowtie2_out = "$input_type" == "--input_type bowtie2out" || "$input_type" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt"
57-
57+
def input_type = "${input}" =~ /.*\.(fastq|fq)/ ? "--input_type fastq" : "${input}" =~ /.*\.(fasta|fna|fa)/ ? "--input_type fasta" : "${input}".endsWith(".bowtie2out.txt") ? "--input_type bowtie2out" : "--input_type sam"
58+
def input_data = ("${input_type}".contains("fastq")) && !meta.single_end ? "${input[0]},${input[1]}" : "${input}"
59+
def bowtie2_out = "${input_type}" == "--input_type bowtie2out" || "${input_type}" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt"
60+
def samfile_out = save_samfile ? "-s ${prefix}.sam" : ''
5861
"""
62+
echo "${args}"
5963
touch ${prefix}.biom
6064
touch ${prefix}_profile.txt
6165

modules/nf-core/metaphlan/metaphlan/meta.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ input:
3737
Note that you will also need to specify `--index` and the database version name (e.g. 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103') in your module.conf ext.args for METAPHLAN_METAPHLAN!
3838
pattern: "*/"
3939
ontologies: []
40+
- save_samfile:
41+
type: boolean
42+
description: |
43+
Whether to save the SAM file produced by MetaPhlAn of read alignments to MetaPhlAn database gene sequences
4044
output:
4145
profile:
4246
- - meta:
@@ -75,6 +79,17 @@ output:
7579
files generated with MetaPhlAn versions below 3 )
7680
pattern: "*.{bowtie2out.txt}"
7781
ontologies: []
82+
sam:
83+
- - meta:
84+
type: map
85+
description: |
86+
Groovy Map containing sample information
87+
e.g. [ id:'test', single_end:false ]
88+
- "*.sam":
89+
type: file
90+
description: SAM file produced by MetaPPhlAn of read alignments to MetaPhlAn database gene sequences
91+
pattern: "*.{sam}"
92+
ontologies: []
7893
versions:
7994
- versions.yml:
8095
type: file

modules/nf-core/metaphlan/metaphlan/tests/main.nf.test

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ nextflow_process {
3535
file( params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz',checkIfExists: true )
3636
])
3737
input[1] = UNTAR.out.untar.map{ it[1] }
38+
input[2] = false
3839
"""
3940
}
4041
}
@@ -64,6 +65,7 @@ nextflow_process {
6465
file( params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz',checkIfExists: true ) ]
6566
])
6667
input[1] = UNTAR.out.untar.map{ it[1] }
68+
input[2] = false
6769
"""
6870
}
6971
}
@@ -92,6 +94,7 @@ nextflow_process {
9294
file( params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta',checkIfExists: true )
9395
])
9496
input[1] = UNTAR.out.untar.map{ it[1] }
97+
input[2] = false
9598
"""
9699
}
97100
}
@@ -109,6 +112,37 @@ nextflow_process {
109112
}
110113
}
111114

115+
test("sarscov2 - illumina pair end [fastq] - save sam") {
116+
117+
when {
118+
process {
119+
"""
120+
input[0] = Channel.of([
121+
[ id:'test', single_end:false ], // meta map
122+
[ file( params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz',checkIfExists: true ),
123+
file( params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz',checkIfExists: true )]
124+
])
125+
input[1] = UNTAR.out.untar.map{ it[1] }
126+
input[2] = true
127+
"""
128+
}
129+
}
130+
131+
then {
132+
assertAll(
133+
{ assert process.success },
134+
{ assert snapshot(
135+
path(process.out.profile[0][1]).readLines()[2..5],
136+
path(process.out.biom[0][1]).readLines().last().contains('Biological Observation Matrix'),
137+
process.out.bt2out,
138+
sam(process.out.sam.get(0).get(1)).getFileType(),
139+
process.out.versions
140+
).match()
141+
}
142+
)
143+
}
144+
}
145+
112146
test("sarscov2 - illumina pair end [fastq] - stub") {
113147

114148
options "-stub"
@@ -122,6 +156,7 @@ nextflow_process {
122156
file( params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz',checkIfExists: true )]
123157
])
124158
input[1] = UNTAR.out.untar.map{ it[1] }
159+
input[2] = false
125160
"""
126161
}
127162
}

modules/nf-core/metaphlan/metaphlan/tests/main.nf.test.snap

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424

2525
],
2626
"3": [
27+
28+
],
29+
"4": [
2730
"versions.yml:md5,db17780c9fc65bc70e9641101c47d0e0"
2831
],
2932
"biom": [
@@ -46,17 +49,49 @@
4649
},
4750
"test_profile.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
4851
]
52+
],
53+
"sam": [
54+
4955
],
5056
"versions": [
5157
"versions.yml:md5,db17780c9fc65bc70e9641101c47d0e0"
5258
]
5359
}
5460
],
5561
"meta": {
56-
"nf-test": "0.8.4",
57-
"nextflow": "23.04.1"
62+
"nf-test": "0.9.2",
63+
"nextflow": "25.04.6"
64+
},
65+
"timestamp": "2025-08-21T08:49:40.627850027"
66+
},
67+
"sarscov2 - illumina pair end [fastq] - save sam": {
68+
"content": [
69+
[
70+
"#196 reads processed",
71+
"#SampleID\tMetaphlan_Analysis",
72+
"#clade_name\tNCBI_tax_id\trelative_abundance\tadditional_species",
73+
"UNCLASSIFIED\t-1\t100.0\t"
74+
],
75+
true,
76+
[
77+
[
78+
{
79+
"id": "test",
80+
"single_end": false
81+
},
82+
"test.bowtie2out.txt:md5,8c1bc21e1d8484b5551bf46331d61bd8"
83+
]
84+
],
85+
"SAM",
86+
[
87+
"versions.yml:md5,db17780c9fc65bc70e9641101c47d0e0"
88+
]
89+
],
90+
"meta": {
91+
"nf-test": "0.9.2",
92+
"nextflow": "25.04.6"
5893
},
59-
"timestamp": "2024-08-13T12:51:42.549393"
94+
"timestamp": "2025-08-22T07:33:29.981757425"
6095
},
6196
"sarscov2 - illumina single end [fastq]": {
6297
"content": [

subworkflows/nf-core/fastq_taxonomic_profile_metaphlan/main.nf

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,23 @@ include { METAPHLAN_MERGEMETAPHLANTABLES } from '../../../modules/nf-core/metaph
44

55

66
workflow FASTQ_TAXONOMIC_PROFILE_METAPHLAN {
7-
87
take:
98
ch_fastq
109

1110
main:
1211

1312
ch_versions = Channel.empty()
1413

15-
METAPHLAN_MAKEDB ( )
14+
METAPHLAN_MAKEDB()
1615
ch_versions = ch_versions.mix(METAPHLAN_MAKEDB.out.versions.first())
1716

18-
METAPHLAN_METAPHLAN ( ch_fastq, METAPHLAN_MAKEDB.out.db )
17+
METAPHLAN_METAPHLAN(ch_fastq, METAPHLAN_MAKEDB.out.db, false)
1918
ch_versions = ch_versions.mix(METAPHLAN_METAPHLAN.out.versions.first())
2019

21-
metaphlan_merged_profiles_txt = METAPHLAN_MERGEMETAPHLANTABLES ( METAPHLAN_METAPHLAN.out.profile.map{ [ [id:'all_samples'], it[1] ] }.groupTuple( sort: { it.getName() } ) ).txt
20+
metaphlan_merged_profiles_txt = METAPHLAN_MERGEMETAPHLANTABLES(METAPHLAN_METAPHLAN.out.profile.map { [[id: 'all_samples'], it[1]] }.groupTuple(sort: { it.getName() })).txt
2221
ch_versions = ch_versions.mix(METAPHLAN_MERGEMETAPHLANTABLES.out.versions.first())
2322

2423
emit:
25-
merged_taxa = metaphlan_merged_profiles_txt
26-
27-
versions = ch_versions
24+
merged_taxa = metaphlan_merged_profiles_txt
25+
versions = ch_versions
2826
}

0 commit comments

Comments
 (0)