Skip to content

Commit d513978

Browse files
committed
[FIX] ensure orthologGroup at the MRCA of all genes
this was not always the case, i.e. if only at a later stage on the species tree traversal, two subhog were put together although they came from the same sub clade of the taxonomy.
1 parent af91dfb commit d513978

File tree

1 file changed

+10
-3
lines changed

1 file changed

+10
-3
lines changed

FastOMA/_hog_class.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -337,10 +337,17 @@ def _sorter_key(sh):
337337
elif len(element_list) > 1:
338338
#hog_elemnt = ET.Element('orthologGroup', attrib={"id": str(self._hogid)})
339339
hog_elemnt = ET.Element('orthologGroup', attrib={"id": str(self._hogid)}, )
340-
num_species_tax_hog = len(set([i.split("||")[1] for i in self._members])) # 'tr|H2MU14|H2MU14_ORYLA||ORYLA||1056022282'
341-
completeness_score = round(num_species_tax_hog/self._tax_now.size, 4)
340+
species_of_members = set([i.split("||")[1] for i in self._members]) # 'tr|H2MU14|H2MU14_ORYLA||ORYLA||1056022282'
341+
num_species_tax_hog = len(species_of_members)
342+
mrca = self.taxlevel.get_common_ancestor(
343+
*[self.taxlevel.search_nodes(name=x)[0] for x in species_of_members])
344+
if mrca != self.taxlevel:
345+
logger.info(f"mrca ({mrca.name}) != self.taxlevel ({self.taxlevel.name})")
346+
logger.info(f"<{hog_elemnt.tag} {hog_elemnt.attrib}>")
347+
348+
completeness_score = round(num_species_tax_hog/mrca.size, 4)
342349
property_element = ET.SubElement(hog_elemnt, "score", attrib={"id": "CompletenessScore", "value": str(completeness_score)})
343-
property_element = ET.SubElement(hog_elemnt, "property", attrib={"name": "TaxRange", "value": str(self._tax_now.name)})
350+
property_element = ET.SubElement(hog_elemnt, "property", attrib={"name": "TaxRange", "value": str(mrca.name)})
344351

345352
for element in element_list:
346353
hog_elemnt.append(element)

0 commit comments

Comments
 (0)