Skip to content

Commit b1e26e7

Browse files
Adding a new tool: read2tree (#7318)
* Added read2tree * Added bio.tools xref to read2tree * Read2tree: Fixed a best practices violation for the linter * read2tree: removed temporary test output * Added .shed.xml to read2tree * Changed package version * Renamed .shed file * Addressed pull request comments * Correcting format requrements for all parameters * Added ftype for test files * Remove python_template_version --------- Co-authored-by: M Bernt <[email protected]>
1 parent 474cb80 commit b1e26e7

25 files changed

+4276
-0
lines changed

tools/read2tree/.shed.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
name: read2tree
2+
owner: nromashchenko
3+
description: Infer a species tree from sequencing reads.
4+
homepage_url: https://github.com/DessimozLab/read2tree
5+
long_description: |
6+
read2tree is a software tool for generating alignment matrices and performing tree inference directly from sequencing reads. It leverages the OMA database and a set of input reads, bypassing many of the standard steps typically required in phylogenomic analysis. In particular, it avoids for read filtering, assembly, gene prediction, gene annotation, all-vs-all comparison, orthology prediction, alignment, and concatenation.
7+
8+
See https://www.nature.com/articles/s41587-023-01753-4.
9+
10+
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/read2tree
11+
categories:
12+
- "Phylogenetics"

tools/read2tree/read2tree.xml

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
<tool id="read2tree" name="Read2Tree" version="@TOOL_VERSION@" profile="24.0">
2+
<description>Infer a species tree from sequencing reads</description>
3+
<macros>
4+
<token name="@TOOL_VERSION@">2.0.1</token>
5+
</macros>
6+
7+
<xrefs>
8+
<xref type="bio.tools">read2tree</xref>
9+
</xrefs>
10+
11+
<requirements>
12+
<requirement type="package" version="@TOOL_VERSION@">read2tree</requirement>
13+
</requirements>
14+
15+
<command detect_errors="exit_code"><![CDATA[
16+
#import re
17+
mkdir -p ./genes_dir;
18+
19+
#for $i, $f in enumerate($marker_genes)
20+
ln -s '$f' "./genes_dir/$i".fa;
21+
#end for
22+
23+
#for $f in $reads
24+
#set readln = re.sub(r'[^\w\-\\.]', '_', str($f.element_identifier))
25+
ln -s '$f' './$readln';
26+
#end for
27+
28+
read2tree --tree --standalone_path genes_dir --reads
29+
#for $read in $reads
30+
#set readln = re.sub(r'[^\w\-\\.]', '_', str($f.element_identifier))
31+
"$readln"
32+
#end for
33+
--output_path ./output --dna_reference $dna_ref
34+
]]></command>
35+
36+
<inputs>
37+
<param argument="reads" type="data" multiple="true" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz,fastqillumina,fastqillumina.gz"
38+
label="Input reads in FASTA or FASTQ" help="Set of input reads for the species of interest."/>
39+
40+
<param argument="marker_genes" type="data_collection" collection_type="list" format="fasta,fasta.gz"
41+
label="Marker gene files" help="A set of reference orthologous groups, i.e. protein sequences of the marker genes for the reference species. See https://github.com/DessimozLab/read2tree for an example and information on how to obtain a list of marker genes."/>
42+
43+
<param argument="dna_ref" type="data" format="fasta,fasta.gz" label="DNA reference"
44+
help="Reference file containing nucleotide sequences for the set of marker genes presented. See https://github.com/DessimozLab/read2tree for detail."/>
45+
</inputs>
46+
47+
<outputs>
48+
<data name="output_tree" format="newick" from_work_dir="output/tree_*.nwk"/>
49+
</outputs>
50+
51+
52+
<tests>
53+
<test>
54+
<param name="reads" value="sample_1.fastq.gz,sample_2.fastq.gz" ftype="fastqsanger.gz"/>
55+
<param name="marker_genes">
56+
<collection type="list">
57+
<element name="gene1" value="marker_genes/OMAGroup_649157.fa" ftype="fasta"/>
58+
<element name="gene2" value="marker_genes/OMAGroup_649216.fa" ftype="fasta"/>
59+
<element name="gene3" value="marker_genes/OMAGroup_671579.fa" ftype="fasta"/>
60+
<element name="gene4" value="marker_genes/OMAGroup_681083.fa" ftype="fasta"/>
61+
<element name="gene5" value="marker_genes/OMAGroup_681195.fa" ftype="fasta"/>
62+
<element name="gene6" value="marker_genes/OMAGroup_671579.fa" ftype="fasta"/>
63+
<element name="gene7" value="marker_genes/OMAGroup_683078.fa" ftype="fasta"/>
64+
<element name="gene8" value="marker_genes/OMAGroup_894224.fa" ftype="fasta"/>
65+
<element name="gene9" value="marker_genes/OMAGroup_898327.fa" ftype="fasta"/>
66+
<element name="gene10" value="marker_genes/OMAGroup_944789.fa" ftype="fasta"/>
67+
<element name="gene11" value="marker_genes/OMAGroup_974829.fa" ftype="fasta"/>
68+
<element name="gene12" value="marker_genes/OMAGroup_1001241.fa" ftype="fasta"/>
69+
<element name="gene13" value="marker_genes/OMAGroup_1008242.fa" ftype="fasta"/>
70+
<element name="gene14" value="marker_genes/OMAGroup_1065415.fa" ftype="fasta"/>
71+
<element name="gene15" value="marker_genes/OMAGroup_1121053.fa" ftype="fasta"/>
72+
<element name="gene16" value="marker_genes/OMAGroup_1125645.fa" ftype="fasta"/>
73+
<element name="gene17" value="marker_genes/OMAGroup_1133018.fa" ftype="fasta"/>
74+
<element name="gene18" value="marker_genes/OMAGroup_1151179.fa" ftype="fasta"/>
75+
<element name="gene19" value="marker_genes/OMAGroup_1163384.fa" ftype="fasta"/>
76+
<element name="gene20" value="marker_genes/OMAGroup_1171372.fa" ftype="fasta"/>
77+
<element name="gene21" value="marker_genes/OMAGroup_1188079.fa" ftype="fasta"/>
78+
</collection>
79+
</param>
80+
<param name="dna_ref" value="dna_ref.fa" ftype="fasta"/>
81+
<output name="output_tree" ftype="newick">
82+
<assert_contents>
83+
<has_text_matching expression="sample_\d+" />
84+
</assert_contents>
85+
</output>
86+
</test>
87+
</tests>
88+
89+
<help><![CDATA[
90+
read2tree is a software tool for generating alignment matrices and performing tree inference directly from sequencing reads. It leverages the OMA database and a set of input reads, bypassing many of the standard steps typically required in phylogenomic analysis. In particular, it avoids for read filtering, assembly, gene prediction, gene annotation, all-vs-all comparison, orthology prediction, alignment, and concatenation.
91+
]]></help>
92+
93+
<citations>
94+
<citation type="doi">10.1038/s41587-023-01753-4</citation>
95+
</citations>
96+
97+
98+
</tool>

0 commit comments

Comments
 (0)