|
13 | 13 | import pickle |
14 | 14 | from abc import ABC |
15 | 15 | from collections import OrderedDict |
16 | | -from typing import Any, Dict, Generator, List, Optional, Tuple |
| 16 | +from typing import Any, Dict, Generator, List, Optional, Tuple, Union |
17 | 17 |
|
18 | 18 | import fastobo |
19 | 19 | import networkx as nx |
@@ -244,11 +244,16 @@ def _extract_class_hierarchy(self, data_path: str) -> nx.DiGraph: |
244 | 244 | with open(data_path, encoding="utf-8") as chebi: |
245 | 245 | chebi = "\n".join(l for l in chebi if not l.startswith("xref:")) |
246 | 246 |
|
247 | | - elements = [ |
248 | | - term_callback(clause) |
249 | | - for clause in fastobo.loads(chebi) |
250 | | - if clause and ":" in str(clause.id) |
251 | | - ] |
| 247 | + elements = [] |
| 248 | + for term_doc in fastobo.loads(chebi): |
| 249 | + if ( |
| 250 | + term_doc |
| 251 | + and isinstance(term_doc.id, fastobo.id.PrefixedIdent) |
| 252 | + and term_doc.id.prefix == "CHEBI" |
| 253 | + ): |
| 254 | + term_dict = term_callback(term_doc) |
| 255 | + if term_dict: |
| 256 | + elements.append(term_dict) |
252 | 257 |
|
253 | 258 | g = nx.DiGraph() |
254 | 259 | for n in elements: |
@@ -818,7 +823,7 @@ def chebi_to_int(s: str) -> int: |
818 | 823 | return int(s[s.index(":") + 1 :]) |
819 | 824 |
|
820 | 825 |
|
821 | | -def term_callback(doc) -> dict: |
| 826 | +def term_callback(doc: fastobo.term.TermFrame) -> Union[Dict, bool]: |
822 | 827 | """ |
823 | 828 | Extracts information from a ChEBI term document. |
824 | 829 | This function takes a ChEBI term document as input and extracts relevant information such as the term ID, parents, |
@@ -858,6 +863,12 @@ def term_callback(doc) -> dict: |
858 | 863 | parents.append(chebi_to_int(str(clause.term))) |
859 | 864 | elif isinstance(clause, fastobo.term.NameClause): |
860 | 865 | name = str(clause.name) |
| 866 | + |
| 867 | + if isinstance(clause, fastobo.term.IsObsoleteClause): |
| 868 | + if clause.obsolete: |
| 869 | + # if the term document contains clause as obsolete as true, skips this document. |
| 870 | + return False |
| 871 | + |
861 | 872 | return { |
862 | 873 | "id": chebi_to_int(str(doc.id)), |
863 | 874 | "parents": parents, |
|
0 commit comments