|
6 | 6 |
|
7 | 7 | parser = optparse.OptionParser() |
8 | 8 | parser.add_option('-s', '--species', dest="input_species_filename", |
9 | | - help='Species list in text format one species in each line') |
| 9 | + help='List of species names of taxids in text format one species in each line') |
10 | 10 | parser.add_option('-d', '--database', dest="database", default=None, |
11 | 11 | help='ETE sqlite data base to use (default: ~/.etetoolkit/taxa.sqlite)') |
12 | 12 | parser.add_option('-o', '--output', dest="output", help='output file name (default: stdout)') |
|
19 | 19 | parser.error("-s option must be specified, Species list in text format one species in each line") |
20 | 20 |
|
21 | 21 | ncbi = NCBITaxa(dbfile=options.database) |
22 | | -with open(options.input_species_filename) as f: |
23 | | - species_name = [_.strip().replace('_', ' ') for _ in f.readlines()] |
24 | | - |
25 | | -name2taxid = ncbi.get_name_translator(species_name) |
26 | 22 |
|
27 | | -taxid = [name2taxid[_][0] for _ in species_name] |
28 | | - |
29 | | -tree = ncbi.get_topology(taxid) |
| 23 | +# determine taxids and species names in the input file |
| 24 | +names = [] |
| 25 | +taxids = [] |
| 26 | +with open(options.input_species_filename) as f: |
| 27 | + for species in f: |
| 28 | + species = species.strip().replace('_', ' ') |
| 29 | + try: |
| 30 | + taxids.append(int(species)) |
| 31 | + except ValueError: |
| 32 | + names.append(species) |
| 33 | +# translate all species names to taxids |
| 34 | +name2taxid = ncbi.get_name_translator(names) |
| 35 | +taxids += {name2taxid[n][0] for n in names} |
| 36 | + |
| 37 | +# get topology and set the scientific name as output |
| 38 | +tree = ncbi.get_topology(taxids) |
| 39 | +for isleaf, node in tree.iter_prepostorder(): |
| 40 | + node.name = node.sci_name |
30 | 41 |
|
31 | 42 | if options.treebest == "yes": |
32 | | - inv_map = {str(v[0]): k.replace(" ", "") + "*" for k, v in name2taxid.items()} |
33 | | -else: |
34 | | - inv_map = {str(v[0]): k for k, v in name2taxid.items()} |
35 | | - |
36 | | - |
37 | | -for leaf in tree: |
38 | | - leaf.name = inv_map[leaf.name] |
| 43 | + for leaf in tree: |
| 44 | + leaf.name = leaf.name.replace(" ", "") + "*" |
39 | 45 |
|
40 | 46 | newickTree = tree.write(format=int(options.format)) |
41 | | - |
| 47 | +# print(type(tree)) |
42 | 48 | if options.treebest == "yes": |
43 | 49 | newickTree = newickTree.rstrip(';') |
44 | 50 | newickTree = newickTree + "root;" |
|
0 commit comments