Skip to content

Commit b62c931

Browse files
committed
update term_callback to handle obsolete terms
1 parent a95415b commit b62c931

File tree

1 file changed

+18
-7
lines changed

1 file changed

+18
-7
lines changed

chebai/preprocessing/datasets/chebi.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import pickle
1414
from abc import ABC
1515
from collections import OrderedDict
16-
from typing import Any, Dict, Generator, List, Optional, Tuple
16+
from typing import Any, Dict, Generator, List, Optional, Tuple, Union
1717

1818
import fastobo
1919
import networkx as nx
@@ -244,11 +244,16 @@ def _extract_class_hierarchy(self, data_path: str) -> nx.DiGraph:
244244
with open(data_path, encoding="utf-8") as chebi:
245245
chebi = "\n".join(l for l in chebi if not l.startswith("xref:"))
246246

247-
elements = [
248-
term_callback(clause)
249-
for clause in fastobo.loads(chebi)
250-
if clause and ":" in str(clause.id)
251-
]
247+
elements = []
248+
for term_doc in fastobo.loads(chebi):
249+
if (
250+
term_doc
251+
and isinstance(term_doc.id, fastobo.id.PrefixedIdent)
252+
and term_doc.id.prefix == "CHEBI"
253+
):
254+
term_dict = term_callback(term_doc)
255+
if term_dict:
256+
elements.append(term_dict)
252257

253258
g = nx.DiGraph()
254259
for n in elements:
@@ -818,7 +823,7 @@ def chebi_to_int(s: str) -> int:
818823
return int(s[s.index(":") + 1 :])
819824

820825

821-
def term_callback(doc) -> dict:
826+
def term_callback(doc: fastobo.term.TermFrame) -> Union[Dict, bool]:
822827
"""
823828
Extracts information from a ChEBI term document.
824829
This function takes a ChEBI term document as input and extracts relevant information such as the term ID, parents,
@@ -858,6 +863,12 @@ def term_callback(doc) -> dict:
858863
parents.append(chebi_to_int(str(clause.term)))
859864
elif isinstance(clause, fastobo.term.NameClause):
860865
name = str(clause.name)
866+
867+
if isinstance(clause, fastobo.term.IsObsoleteClause):
868+
if clause.obsolete:
869+
# if the term document contains clause as obsolete as true, skips this document.
870+
return False
871+
861872
return {
862873
"id": chebi_to_int(str(doc.id)),
863874
"parents": parents,

0 commit comments

Comments
 (0)