Skip to content

Commit 114c3fd

Browse files
committed
2.6 draft 5
* Un-hide fungal species from NCBI and GBIF (this recovers about 2000 OTUs) * Rewrite of uniquename generation logic, issue #38
1 parent b310e6b commit 114c3fd

File tree

4 files changed

+73
-31
lines changed

4 files changed

+73
-31
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# Get it from http://files.opentreeoflife.org/ott/
66
# and if there's a file "taxonomy" change that to "taxonomy.tsv".
77

8-
WHICH=2.6draft4
8+
WHICH=2.6draft5
99
PREV_WHICH=2.5
1010

1111
# $^ = all prerequisites

make-ott.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@
110110
# mark them 'hidden' so they can be suppressed downstream. This
111111
# preserves the identifier assignments, which may have been used
112112
# somewhere.
113-
ncbi.taxon('Fungi').hideDescendants()
113+
ncbi.taxon('Fungi').hideDescendantsToRank('species')
114114

115115
#ott.same(ncbi.taxon('Cyanobacteria'), silva.taxon('D88288/#3'))
116116
ott.notSame(ncbi.taxon('Burkea'), fung.taxon('Burkea'))
@@ -134,7 +134,7 @@
134134
gbif.smush()
135135

136136
# Fungi suppressed at David Hibbett's request
137-
gbif.taxon('Fungi').hideDescendants()
137+
gbif.taxon('Fungi').hideDescendantsToRank('species')
138138

139139
# Microbes suppressed at Laura Katz's request
140140
gbif.taxon('Bacteria','life').hideDescendants()
@@ -183,7 +183,7 @@
183183
irmng.smush()
184184

185185
# Fungi suppressed at David Hibbett's request
186-
irmng.taxon('Fungi').hideDescendants()
186+
irmng.taxon('Fungi').hideDescendantsToRank('species')
187187

188188
irmng.taxon('1413316').prune() #Neopithecus in Mammalia
189189
irmng.taxon('1413315').extinct() #Neopithecus in Primates (Pongidae)

org/opentreeoflife/smasher/Taxon.java

Lines changed: 66 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,20 +1003,47 @@ String uniqueName() {
10031003
this.parent == null ? "(roots)" : this.parent.name);
10041004
return "?";
10051005
}
1006-
for (Taxon other : nodes)
1007-
if (other != this && other.name.equals(this.name)) {
1006+
boolean homonymp = false;
1007+
boolean informativeFail = false;
1008+
1009+
if (true) {
1010+
for (Taxon other : nodes)
1011+
if (other != this && other.name.equals(this.name)) {
1012+
homonymp = true;
1013+
Taxon i = this.informative();
1014+
if (i != null && i.equals(other.informative())) {
1015+
informativeFail = true;
1016+
break;
1017+
}
1018+
}
1019+
if (informativeFail || homonymp) {
1020+
String urank = "";
1021+
if (this.rank != null) urank = this.rank + " ";
1022+
if (informativeFail)
1023+
urank = urank + " " + this.sourceIds.get(0);
1024+
10081025
Taxon i = this.informative();
1009-
if ((i != other.informative() &&
1010-
i != null &&
1011-
!this.name.endsWith(" sp."))) {
1012-
String urank = "";
1013-
if (this.rank != null) urank = this.rank + " ";
1014-
String irank = "";
1015-
if (i.rank != null) irank = i.rank + " ";
1016-
return this.name + " (" + urank + "in " + irank + i.name + ")";
1017-
} else
1018-
return this.name + " (" + this.getSourceIdsString() + ")";
1019-
}
1026+
String irank = "";
1027+
if (i.rank != null) irank = i.rank + " ";
1028+
return this.name + " (" + urank + "in " + irank + i.name + ")";
1029+
} else return "";
1030+
}
1031+
else
1032+
// Old buggy version, delete after above has been tested
1033+
for (Taxon other : nodes)
1034+
if (other != this && other.name.equals(this.name)) {
1035+
Taxon i = this.informative();
1036+
if ((i != other.informative() &&
1037+
i != null &&
1038+
!this.name.endsWith(" sp."))) {
1039+
String urank = "";
1040+
if (this.rank != null) urank = this.rank + " ";
1041+
String irank = "";
1042+
if (i.rank != null) irank = i.rank + " ";
1043+
return this.name + " (" + urank + "in " + irank + i.name + ")";
1044+
} else
1045+
return this.name + " (" + this.getSourceIdsString() + ")";
1046+
}
10201047
return "";
10211048
}
10221049

@@ -1101,6 +1128,20 @@ Taxon sample(int k, Taxonomy tax) {
11011128
return sam;
11021129
}
11031130

1131+
public boolean isHidden() {
1132+
return ((this.properFlags | this.inheritedFlags) &
1133+
(Taxonomy.HIDDEN |
1134+
Taxonomy.EXTINCT |
1135+
Taxonomy.MAJOR_RANK_CONFLICT |
1136+
Taxonomy.TATTERED |
1137+
Taxonomy.NOT_OTU |
1138+
Taxonomy.HYBRID |
1139+
Taxonomy.VIRAL |
1140+
Taxonomy.UNCLASSIFIED |
1141+
Taxonomy.ENVIRONMENTAL |
1142+
Taxonomy.INCERTAE_SEDIS)) != 0;
1143+
}
1144+
11041145
// ----- Methods intended for use in jython scripts -----
11051146

11061147
// Patch system commands are add, move, synonym, prune, fold, flag
@@ -1157,6 +1198,17 @@ public void hideDescendants() {
11571198
}
11581199
}
11591200

1201+
// Hide up to but not including the given rank
1202+
public void hideDescendantsToRank(String rank) {
1203+
if (this.children != null)
1204+
for (Taxon child : this.children) {
1205+
if (child.rank != null && !child.rank.equals(rank)) {
1206+
child.properFlags = Taxonomy.HIDDEN;
1207+
child.hideDescendantsToRank(rank);
1208+
}
1209+
}
1210+
}
1211+
11601212
public void synonym(String name) {
11611213
this.taxonomy.addSynonym(name, this);
11621214
}
@@ -1176,7 +1228,7 @@ public void elide() {
11761228
System.err.format("** %s is a root\n", this);
11771229
else {
11781230
if (this.children == null)
1179-
System.err.format("** Warning: %s has no children\n", this);
1231+
; //System.err.format("** Warning: %s has no children\n", this);
11801232
else
11811233
for (Taxon child : new ArrayList<Taxon>(this.children))
11821234
child.changeParent(this.parent);

org/opentreeoflife/smasher/Taxonomy.java

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -725,18 +725,8 @@ void dumpSynonyms(String filename, String sep) throws IOException {
725725
void dumpHidden(String filename) throws IOException {
726726
PrintStream out = Taxonomy.openw(filename);
727727
for (Taxon node : this) {
728-
if (((node.properFlags | node.inheritedFlags) &
729-
(Taxonomy.HIDDEN |
730-
Taxonomy.EXTINCT |
731-
Taxonomy.MAJOR_RANK_CONFLICT |
732-
Taxonomy.TATTERED |
733-
Taxonomy.NOT_OTU |
734-
Taxonomy.HYBRID |
735-
Taxonomy.VIRAL |
736-
Taxonomy.UNCLASSIFIED |
737-
Taxonomy.ENVIRONMENTAL |
738-
Taxonomy.INCERTAE_SEDIS)) != 0) {
739-
out.format("%s\t%s\t%s\t", node.id, node.name, node.division);
728+
if (node.isHidden()) {
729+
out.format("%s\t%s\t%s\t%s\t", node.id, node.name, node.getSourceIdsString(), node.division);
740730
this.printFlags(node, out);
741731
out.println();
742732
}
@@ -966,7 +956,7 @@ static void analyzeContainers(Taxon node, int inheritedFlags) {
966956
boolean elidep = false;
967957

968958
if (unclassifiedRegex.matcher(node.name).find()) {// Rule 3+5
969-
node.properFlags |= UNCLASSIFIED;
959+
node.properFlags |= UNCLASSIFIED; // includes uncultured
970960
elidep = true;
971961
}
972962
if (environmentalRegex.matcher(node.name).find()) {// Rule 3+5

0 commit comments

Comments
 (0)