Skip to content

Commit e3db6fd

Browse files
ete3: allow taxids in species tree input
now one can submit a list of taxids as input (or even mixed)
1 parent dd31db8 commit e3db6fd

File tree

3 files changed

+25
-19
lines changed

3 files changed

+25
-19
lines changed

tools/ete/ete_species_tree_generator.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
parser = optparse.OptionParser()
88
parser.add_option('-s', '--species', dest="input_species_filename",
9-
help='Species list in text format one species in each line')
9+
help='List of species names of taxids in text format one species in each line')
1010
parser.add_option('-d', '--database', dest="database", default=None,
1111
help='ETE sqlite data base to use (default: ~/.etetoolkit/taxa.sqlite)')
1212
parser.add_option('-o', '--output', dest="output", help='output file name (default: stdout)')
@@ -19,26 +19,32 @@
1919
parser.error("-s option must be specified, Species list in text format one species in each line")
2020

2121
ncbi = NCBITaxa(dbfile=options.database)
22-
with open(options.input_species_filename) as f:
23-
species_name = [_.strip().replace('_', ' ') for _ in f.readlines()]
24-
25-
name2taxid = ncbi.get_name_translator(species_name)
2622

27-
taxid = [name2taxid[_][0] for _ in species_name]
28-
29-
tree = ncbi.get_topology(taxid)
23+
# determine taxids and species names in the input file
24+
names = []
25+
taxids = []
26+
with open(options.input_species_filename) as f:
27+
for species in f:
28+
species = species.strip().replace('_', ' ')
29+
try:
30+
taxids.append(int(species))
31+
except ValueError:
32+
names.append(species)
33+
# translate all species names to taxids
34+
name2taxid = ncbi.get_name_translator(names)
35+
taxids += {name2taxid[n][0] for n in names}
36+
37+
# get topology and set the scientific name as output
38+
tree = ncbi.get_topology(taxids)
39+
for isleaf, node in tree.iter_prepostorder():
40+
node.name = node.sci_name
3041

3142
if options.treebest == "yes":
32-
inv_map = {str(v[0]): k.replace(" ", "") + "*" for k, v in name2taxid.items()}
33-
else:
34-
inv_map = {str(v[0]): k for k, v in name2taxid.items()}
35-
36-
37-
for leaf in tree:
38-
leaf.name = inv_map[leaf.name]
43+
for leaf in tree:
44+
leaf.name = leaf.name.replace(" ", "") + "*"
3945

4046
newickTree = tree.write(format=int(options.format))
41-
47+
# print(type(tree))
4248
if options.treebest == "yes":
4349
newickTree = newickTree.rstrip(';')
4450
newickTree = newickTree + "root;"

tools/ete/ete_species_tree_generator.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<tool id="ete_species_tree_generator" name="ETE species tree generator" version="@VERSION@">
1+
<tool id="ete_species_tree_generator" name="ETE species tree generator" version="@VERSION@+galaxy1">
22
<description>from a list of species using the ETE Toolkit</description>
33
<macros>
44
<import>ete_macros.xml</import>
@@ -21,7 +21,7 @@ python '$__tool_directory__/ete_species_tree_generator.py'
2121
-t $output_format.treebest
2222
]]></command>
2323
<inputs>
24-
<param name="speciesFile" type="data" format="txt" label="Species file" help="List with one species per line" />
24+
<param name="speciesFile" type="data" format="txt" label="Species file" help="List with one species name or taxid per line" />
2525
<param name="database" type="data" format="sqlite" label="(ETE3) Taxonomy Database" help="The sqlite formatted Taxonomy used by ETE3 (which is derived from NCBI taxonomy)" />
2626
<conditional name="output_format">
2727
<param name="treebest" type="select" label="Use in TreeBest" help="Select yes if specie tree to be used in TreeBest">

tools/ete/test-data/species.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ Nomascus leucogenys
2828
Pongo abelii
2929
Gorilla gorilla gorilla
3030
Pan troglodytes
31-
Homo sapiens
31+
9606
3232
Sorex araneus
3333
Erinaceus europaeus
3434
Pteropus vampyrus

0 commit comments

Comments
 (0)