Skip to content

Commit f624223

Browse files
committed
2.6 final
* Add report on conflicts ('tattered') * Add Cyphellopsis * Fix Epiphloea alignment * Diadasia is not extinct * Beginnings of better flag input processing (not yet deployed)
1 parent 96b24fe commit f624223

File tree

5 files changed

+165
-26
lines changed

5 files changed

+165
-26
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# Get it from http://files.opentreeoflife.org/ott/
66
# and if there's a file "taxonomy" change that to "taxonomy.tsv".
77

8-
WHICH=2.6draft5
8+
WHICH=2.6
99
PREV_WHICH=2.5
1010

1111
# $^ = all prerequisites

make-ott.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,18 @@
6161

6262
# Index Fungorum
6363
fung = Taxonomy.getTaxonomy('tax/if/', 'if')
64+
65+
# JAR 2014-04-11 Missing in earlier IF, mistake in later IF -
66+
# extraneous authority string
67+
cyph = fung.taxon('Cyphellopsis')
68+
if cyph == None:
69+
cyph = fung.taxon('Cyphellopsis Donk 1931')
70+
if cyph != None:
71+
cyph.rename('Cyphellopsis')
72+
else:
73+
cyph = fung.newTaxon('Cyphellopsis', 'genus', 'if:17439')
74+
fung.taxon('Niaceae').take(cyph)
75+
6476
# smush will fold sibling taxa that have the same name.
6577
fung.smush()
6678

@@ -116,6 +128,10 @@
116128
ott.notSame(ncbi.taxon('Burkea'), fung.taxon('Burkea'))
117129
ott.notSame(ncbi.taxon('Coscinium'), fung.taxon('Coscinium'))
118130
ott.notSame(ncbi.taxon('Perezia'), fung.taxon('Perezia'))
131+
132+
# JAR 2014-04-11 Discovered during regression testing
133+
ott.notSame(ncbi.taxon('Epiphloea', 'Rhodophyta'), fung.taxon('Epiphloea', 'Lichinaceae'))
134+
119135
# analyzeOTUs sets flags on questionable taxa ("unclassified",
120136
# hybrids, and so on) to allow the option of suppression downstream
121137
ncbi.analyzeOTUs()
@@ -410,15 +426,15 @@
410426
# TBD: Change species names from Norops X to Anolis X for all X
411427
ott.taxon('Anolis').absorb(ott.taxon('Norops', 'Iguanidae'))
412428

413-
# JAR 2014-4-08 - these are in study OTUs - see IRMNG
429+
# JAR 2014-04-08 - these are in study OTUs - see IRMNG
414430
ott.taxon('Inseliellum').extant()
415431
ott.taxon('Conus', 'Gastropoda').extant()
416432
ott.taxon('Patelloida').extant()
417433
ott.taxon('Phyllanthus', 'Phyllanthaceae').extant()
418434
ott.taxon('Stelis','Orchidaceae').extant()
419435
ott.taxon('Chloris', 'Poaceae').extant()
420436
ott.taxon('Acropora', 'Acroporidae').extant()
421-
437+
ott.taxon('Diadasia').extant()
422438

423439
# -----------------------------------------------------------------------------
424440
# Finish up
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package org.opentreeoflife.smasher;
2+
3+
import java.util.HashMap;
4+
import java.util.Map;
5+
6+
public enum Flag {
7+
8+
// NCBI - individually troublesome - not sticky - combine using &
9+
NOT_OTU ("not_otu", "not_otu", Taxonomy.NOT_OTU),
10+
VIRAL ("viral", "viral", Taxonomy.VIRAL),
11+
HYBRID ("hybrid", "hybrid", Taxonomy.HYBRID),
12+
13+
// Final analysis...
14+
// Containers - unconditionally so.
15+
INCERTAE_SEDIS ("incertae_sedis_direct", "incertae_sedis_inherited", Taxonomy.INCERTAE_SEDIS),
16+
UNCLASSIFIED ("unclassified_direct", "unclassified_inherited", Taxonomy.UNCLASSIFIED),
17+
ENVIRONMENTAL ("environmental", "environmental_inherited", Taxonomy.ENVIRONMENTAL),
18+
19+
// Set during assembly
20+
HIDDEN ("hidden", "hidden_inherited", Taxonomy.HIDDEN), // combine using &
21+
MAJOR_RANK_CONFLICT ("major_rank_conflict_direct", "major_rank_conflict_inherited", Taxonomy.MAJOR_RANK_CONFLICT), // Parent-dependent. Retain value
22+
23+
// Australopithecus
24+
SIBLING_HIGHER ("sibling_higher", null, Taxonomy.SIBLING_HIGHER),
25+
SIBLING_LOWER ("sibling_lower", null, Taxonomy.SIBLING_LOWER),
26+
27+
TATTERED ("tattered", "tattered_inherited", Taxonomy.TATTERED), // combine using |
28+
EDITED ("edited", null, Taxonomy.EDITED), // combine using |
29+
FORCED_VISIBLE ("forced_visible", null, Taxonomy.FORCED_VISIBLE), // combine using |
30+
EXTINCT ("extinct_direct", "extinct_inherited", Taxonomy.EXTINCT), // combine using |
31+
32+
// Is 'species' or lower rank ('infraspecific' when inherited)
33+
// Unconditional ?
34+
SPECIFIC (null, "infraspecific", Taxonomy.SPECIFIC),
35+
36+
// Opposite of 'barren' - propagated upward
37+
ANYSPECIES (null, null, Taxonomy.ANYSPECIES);
38+
39+
String name, inheritedName;
40+
int bit;
41+
42+
Flag(String name, String inheritedName, int bit) {
43+
this.name = name;
44+
this.inheritedName = inheritedName;
45+
this.bit = bit;
46+
}
47+
48+
static final Map<String, Flag> lookupTable = new HashMap<String, Flag>();
49+
static final Map<String, Flag> lookupInheritedTable = new HashMap<String, Flag>();
50+
static {
51+
for (Flag flag : Flag.values())
52+
lookupTable.put(flag.name, flag);
53+
for (Flag flag : Flag.values())
54+
lookupInheritedTable.put(flag.inheritedName, flag);
55+
}
56+
57+
static Flag lookup(String name) {
58+
return lookupTable.get(name);
59+
}
60+
61+
static Flag lookupInherited(String name) {
62+
return lookupInheritedTable.get(name);
63+
}
64+
65+
}

org/opentreeoflife/smasher/Taxon.java

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -309,12 +309,13 @@ QualifiedId getQualifiedId() {
309309
return new QualifiedId(this.taxonomy.getTag(), this.id);
310310
else {
311311
// What if from a Newick string?
312-
System.err.println("!? [getQualifiedId] Taxon has no id: " + this.name);
313-
return new QualifiedId("?", this.name);
312+
System.err.println("** [getQualifiedId] Taxon has no id, using name: " + this.name);
313+
return new QualifiedId(this.taxonomy.getTag(), this.name);
314314
}
315315
}
316316

317-
// Method on Node, called for every node in the source taxonomy
317+
// Method on Node, called for every node in the source taxonomy.
318+
// Input is node in source taxonomy. Returns node in union taxonomy.
318319
Taxon augment(UnionTaxonomy union) {
319320

320321
Taxon newnode = null;
@@ -380,7 +381,6 @@ Taxon augment(UnionTaxonomy union) {
380381

381382
} else if (this.refinementp(oldChildren, newChildren)) {
382383

383-
384384
// Move the new internal node over to union taxonomy.
385385
// It will end up becoming a descendent of oldParent.
386386
newnode = new Taxon(union);
@@ -408,14 +408,7 @@ Taxon augment(UnionTaxonomy union) {
408408
for (Taxon augChild: newChildren)
409409
newnode.addChild(augChild);
410410

411-
if (this.taxonomy.tag.equals("if")) {
412-
boolean javasux = false;
413-
for (Taxon p = this.parent; p != null; p = p.parent)
414-
if (p.name.equals("Fungi")) { javasux = true; break; }
415-
if (javasux)
416-
for (Taxon o : oldChildren)
417-
System.err.format("** %s losing %s to %s\n", this, o, o.parent);
418-
}
411+
union.reportConflict(oldChildren.get(0), this);
419412

420413
newflags |= Taxonomy.TATTERED;
421414
union.logAndMark(Answer.yes(this, null, "new/tattered", null));
@@ -913,6 +906,7 @@ Taxon scan(Taxonomy other) {
913906
return up;
914907
}
915908

909+
// Use getDepth() only after the tree is in its final form
916910
int depth = -1;
917911
int getDepth() {
918912
if (this.depth < 0) {
@@ -923,12 +917,12 @@ int getDepth() {
923917
}
924918
return this.depth;
925919
}
920+
// Does not cache - depths may change during merge
926921
int measureDepth() { // Robust in presence of insertions
927922
if (this.parent == null)
928-
this.depth = 0;
923+
return 0;
929924
else
930-
this.depth = this.parent.measureDepth() + 1;
931-
return this.depth;
925+
return this.parent.measureDepth() + 1;
932926
}
933927

934928
Taxon mrca(Taxon b) {
@@ -1267,8 +1261,16 @@ public void extinct() {
12671261
}
12681262

12691263
public void extant() {
1264+
boolean wasExtant = true;
12701265
for (Taxon node = this; node != null; node = node.parent)
1271-
this.properFlags &= ~Taxonomy.EXTINCT;
1266+
if ((this.properFlags & Taxonomy.EXTINCT) != 0) {
1267+
this.properFlags &= ~Taxonomy.EXTINCT;
1268+
if (node != this)
1269+
System.err.format("** Ancestor %s of %s was marked extinct\n", node, this);
1270+
wasExtant = false;
1271+
}
1272+
if (wasExtant)
1273+
System.err.format("** Note: %s wasn't marked extinct\n", this);
12721274
}
12731275

12741276
// add a tree to the forest?

org/opentreeoflife/smasher/Taxonomy.java

Lines changed: 63 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -161,13 +161,13 @@ void setTag() {
161161
}
162162
}
163163

164-
Taxon highest(String name) {
164+
Taxon highest(String name) { // See pin()
165165
Taxon best = null;
166166
List<Taxon> l = this.lookup(name);
167167
if (l != null) {
168168
int depth = 1 << 30;
169169
for (Taxon node : l)
170-
if (node.getDepth() < depth) {
170+
if (node.measureDepth() < depth) {
171171
depth = node.getDepth();
172172
best = node;
173173
}
@@ -323,6 +323,8 @@ void loadMetadata(String filename) throws IOException {
323323

324324
abstract void dumpMetadata(String filename) throws IOException;
325325

326+
static Pattern commaPattern = Pattern.compile(",");
327+
326328
// load | dump taxonomy proper
327329

328330
void loadTaxonomyProper(String filename) throws IOException {
@@ -401,11 +403,8 @@ void loadTaxonomyProper(String filename) throws IOException {
401403
addSynonym(rawname, node);
402404
++normalizations;
403405
}
404-
if (this.flagscolumn != null) {
405-
if (parts[this.flagscolumn].contains("extinct"))
406-
// kludge. could be _direct or _inherited
407-
node.properFlags |= Taxonomy.EXTINCT;
408-
}
406+
if (this.flagscolumn != null && parts[this.flagscolumn].length() > 0)
407+
this.parseFlags(parts[this.flagscolumn], node);
409408
}
410409
++row;
411410
if (row % 500000 == 0)
@@ -724,14 +723,17 @@ void dumpSynonyms(String filename, String sep) throws IOException {
724723

725724
void dumpHidden(String filename) throws IOException {
726725
PrintStream out = Taxonomy.openw(filename);
726+
int count = 0;
727727
for (Taxon node : this) {
728728
if (node.isHidden()) {
729+
++count;
729730
out.format("%s\t%s\t%s\t%s\t", node.id, node.name, node.getSourceIdsString(), node.division);
730731
this.printFlags(node, out);
731732
out.println();
732733
}
733734
}
734735
out.close();
736+
System.out.format("| %s hidden taxa\n", count);
735737
}
736738

737739
/*
@@ -852,6 +854,13 @@ void analyze() {
852854
// Opposite of 'barren' - propagated upward
853855
static final int ANYSPECIES = 8 * 1024;
854856

857+
void parseFlags(String flags, Taxon node) {
858+
// String[] tags = commaPattern.split(flags);
859+
if (flags.contains("extinct"))
860+
// kludge. could be _direct or _inherited
861+
node.properFlags |= Taxonomy.EXTINCT;
862+
}
863+
855864
// Returns the node's rank (as an int). In general the return
856865
// value should be >= parentRank, but conceivably funny things
857866
// could happen when combinings taxonomies.
@@ -2365,6 +2374,7 @@ public void dump(String outprefix, String sep) throws IOException {
23652374
this.dumpNodes(this.roots, outprefix, sep);
23662375
this.dumpSynonyms(outprefix + "synonyms.tsv", sep);
23672376
this.dumpHidden(outprefix + "hidden.tsv");
2377+
this.dumpConflicts(outprefix + "conflicts.tsv");
23682378
}
23692379

23702380
// Overrides method in Taxonomy class
@@ -2506,6 +2516,52 @@ void logAndReport(Answer answer) {
25062516
answer.x.report(answer.reason, answer.y, answer.witness);
25072517
}
25082518

2519+
// 3799 conflicts as of 2014-04-12
2520+
// unode.comapped.parent == fromparent
2521+
void reportConflict(Taxon unode, Taxon fromparent) {
2522+
conflicts.add(new Conflict(unode, fromparent));
2523+
}
2524+
2525+
List<Conflict> conflicts = new ArrayList<Conflict>();
2526+
void dumpConflicts(String filename) throws IOException {
2527+
PrintStream out = Taxonomy.openw(filename);
2528+
for (Conflict conflict : this.conflicts)
2529+
if (!conflict.unode.isHidden())
2530+
out.println(conflict.toString());
2531+
out.close();
2532+
}
2533+
}
2534+
2535+
class Conflict {
2536+
Taxon unode; // in source taxonomy
2537+
Taxon fromParent; // in union taxonomy
2538+
Conflict(Taxon unode, Taxon fromParent) {
2539+
this.unode = unode; this.fromParent = fromParent;
2540+
}
2541+
public String toString() {
2542+
// cf. Taxon.mrca
2543+
Taxon b = fromParent;
2544+
while (b != null && b.mapped == null)
2545+
b = b.parent;
2546+
b = b.mapped;
2547+
Taxon a = unode;
2548+
int da = a.measureDepth();
2549+
int db = b.measureDepth();
2550+
while (db > da) {
2551+
b = b.parent;
2552+
--db;
2553+
}
2554+
while (da > db) {
2555+
a = a.parent;
2556+
--da;
2557+
}
2558+
while (a != null && a.parent != b.parent) {
2559+
a = a.parent;
2560+
b = b.parent;
2561+
--da;
2562+
}
2563+
return (da + " " + fromParent + " in " + b + " lost child " + unode.comapped + " to " + unode.parent + " in " + a);
2564+
}
25092565
}
25102566

25112567
// For each source node, consider all possible union nodes it might map to

0 commit comments

Comments
 (0)