Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions kg_microbe/transform_utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@
UNIPROT_DISEASE_COLUMN_NAME = "Involvement in disease"
UNIPROT_GENE_PRIMARY_COLUMN_NAME = "Gene Names (primary)"
UNIPROT_PREFIX = "UniprotKB:"
TREMBL_PREFIX = "TrEMBL:"
CHEMICAL_TO_PROTEIN_EDGE = "biolink:binds"
# PROTEIN_TO_GO_EDGE = "biolink:enables"
PROTEIN_TO_ORGANISM_EDGE = "biolink:derives_from"
Expand Down
8 changes: 5 additions & 3 deletions kg_microbe/transform_utils/ontologies/ontologies_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
UNIPATHWAYS_REACTION_PREFIX,
UNIPATHWAYS_XREFS_FILEPATH,
UNIPROT_PREFIX,
TREMBL_PREFIX,
XREF_COLUMN,
)
from kg_microbe.utils.ontology_utils import replace_category_ontology
Expand Down Expand Up @@ -472,9 +473,10 @@ def _fix_ec_iri(line, iri_index):
line = _replace_special_prefixes(line)
new_ef_lines.append(line)
if name == "ec":
# Remove Uniprot nodes since accounted for elsewhere
new_nf_lines = [line for line in new_nf_lines if UNIPROT_PREFIX not in line]
new_ef_lines = [line for line in new_ef_lines if UNIPROT_PREFIX not in line]
# Remove UniProt and TrEMBL nodes since accounted for elsewhere
protein_prefixes = [UNIPROT_PREFIX, TREMBL_PREFIX]
new_nf_lines = [line for line in new_nf_lines if not any(prefix in line for prefix in protein_prefixes)]
new_ef_lines = [line for line in new_ef_lines if not any(prefix in line for prefix in protein_prefixes)]
# elif name == "rhea":
# # Remove debio nodes that account for direction, since already there in inverse triples
# # Note that CHEBI and EC predicates do not match Rhea pyobo, so removing them
Expand Down
2 changes: 1 addition & 1 deletion kg_microbe/transform_utils/rhea_mappings/rhea_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def run(self, data_file: Union[Optional[Path], Optional[str]] = None, show_statu
all_terms_writer.writerow(
[*subject_info, *predicate_info, *object_info]
)
# Filter out TrEMBL/UniProt entries - they should not be added
# Only include CHEBI, EC, and GO entries (filter out UniProt entries)
if any(
object_info[0].startswith(prefix)
for prefix in [
Expand Down
Loading