@@ -161,13 +161,13 @@ void setTag() {
161161 }
162162 }
163163
164- Taxon highest (String name ) {
164+ Taxon highest (String name ) { // See pin()
165165 Taxon best = null ;
166166 List <Taxon > l = this .lookup (name );
167167 if (l != null ) {
168168 int depth = 1 << 30 ;
169169 for (Taxon node : l )
170- if (node .getDepth () < depth ) {
170+ if (node .measureDepth () < depth ) {
171171 depth = node .getDepth ();
172172 best = node ;
173173 }
@@ -323,6 +323,8 @@ void loadMetadata(String filename) throws IOException {
323323
324324 abstract void dumpMetadata (String filename ) throws IOException ;
325325
326+ static Pattern commaPattern = Pattern .compile ("," );
327+
326328 // load | dump taxonomy proper
327329
328330 void loadTaxonomyProper (String filename ) throws IOException {
@@ -401,11 +403,8 @@ void loadTaxonomyProper(String filename) throws IOException {
401403 addSynonym (rawname , node );
402404 ++normalizations ;
403405 }
404- if (this .flagscolumn != null ) {
405- if (parts [this .flagscolumn ].contains ("extinct" ))
406- // kludge. could be _direct or _inherited
407- node .properFlags |= Taxonomy .EXTINCT ;
408- }
406+ if (this .flagscolumn != null && parts [this .flagscolumn ].length () > 0 )
407+ this .parseFlags (parts [this .flagscolumn ], node );
409408 }
410409 ++row ;
411410 if (row % 500000 == 0 )
@@ -724,14 +723,17 @@ void dumpSynonyms(String filename, String sep) throws IOException {
724723
725724 void dumpHidden (String filename ) throws IOException {
726725 PrintStream out = Taxonomy .openw (filename );
726+ int count = 0 ;
727727 for (Taxon node : this ) {
728728 if (node .isHidden ()) {
729+ ++count ;
729730 out .format ("%s\t %s\t %s\t %s\t " , node .id , node .name , node .getSourceIdsString (), node .division );
730731 this .printFlags (node , out );
731732 out .println ();
732733 }
733734 }
734735 out .close ();
736+ System .out .format ("| %s hidden taxa\n " , count );
735737 }
736738
737739 /*
@@ -852,6 +854,13 @@ void analyze() {
852854 // Opposite of 'barren' - propagated upward
853855 static final int ANYSPECIES = 8 * 1024 ;
854856
857+ void parseFlags (String flags , Taxon node ) {
858+ // String[] tags = commaPattern.split(flags);
859+ if (flags .contains ("extinct" ))
860+ // kludge. could be _direct or _inherited
861+ node .properFlags |= Taxonomy .EXTINCT ;
862+ }
863+
855864 // Returns the node's rank (as an int). In general the return
856865 // value should be >= parentRank, but conceivably funny things
857866 // could happen when combinings taxonomies.
@@ -2365,6 +2374,7 @@ public void dump(String outprefix, String sep) throws IOException {
23652374 this .dumpNodes (this .roots , outprefix , sep );
23662375 this .dumpSynonyms (outprefix + "synonyms.tsv" , sep );
23672376 this .dumpHidden (outprefix + "hidden.tsv" );
2377+ this .dumpConflicts (outprefix + "conflicts.tsv" );
23682378 }
23692379
23702380 // Overrides method in Taxonomy class
@@ -2506,6 +2516,52 @@ void logAndReport(Answer answer) {
25062516 answer .x .report (answer .reason , answer .y , answer .witness );
25072517 }
25082518
2519+ // 3799 conflicts as of 2014-04-12
2520+ // unode.comapped.parent == fromparent
2521+ void reportConflict (Taxon unode , Taxon fromparent ) {
2522+ conflicts .add (new Conflict (unode , fromparent ));
2523+ }
2524+
2525+ List <Conflict > conflicts = new ArrayList <Conflict >();
2526+ void dumpConflicts (String filename ) throws IOException {
2527+ PrintStream out = Taxonomy .openw (filename );
2528+ for (Conflict conflict : this .conflicts )
2529+ if (!conflict .unode .isHidden ())
2530+ out .println (conflict .toString ());
2531+ out .close ();
2532+ }
2533+ }
2534+
2535+ class Conflict {
2536+ Taxon unode ; // in source taxonomy
2537+ Taxon fromParent ; // in union taxonomy
2538+ Conflict (Taxon unode , Taxon fromParent ) {
2539+ this .unode = unode ; this .fromParent = fromParent ;
2540+ }
2541+ public String toString () {
2542+ // cf. Taxon.mrca
2543+ Taxon b = fromParent ;
2544+ while (b != null && b .mapped == null )
2545+ b = b .parent ;
2546+ b = b .mapped ;
2547+ Taxon a = unode ;
2548+ int da = a .measureDepth ();
2549+ int db = b .measureDepth ();
2550+ while (db > da ) {
2551+ b = b .parent ;
2552+ --db ;
2553+ }
2554+ while (da > db ) {
2555+ a = a .parent ;
2556+ --da ;
2557+ }
2558+ while (a != null && a .parent != b .parent ) {
2559+ a = a .parent ;
2560+ b = b .parent ;
2561+ --da ;
2562+ }
2563+ return (da + " " + fromParent + " in " + b + " lost child " + unode .comapped + " to " + unode .parent + " in " + a );
2564+ }
25092565}
25102566
25112567// For each source node, consider all possible union nodes it might map to
0 commit comments