Skip to content

Commit 4bcd99b

Browse files
committed
refactor: ♻️ simplify and refactor metakg parsing logics
1 parent 8f06ce7 commit 4bcd99b

File tree

4 files changed

+88
-138
lines changed

4 files changed

+88
-138
lines changed

src/handlers/api.py

Lines changed: 50 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,10 @@
1616
from controller import SmartAPI
1717
from controller.exceptions import ControllerError, NotFoundError
1818
from pipeline import MetaKGQueryPipeline
19-
from utils.decoder import to_dict
2019
from utils.downloader import DownloadError, download_async
2120
from utils.metakg.biolink_helpers import get_expanded_values
2221
from utils.metakg.cytoscape_formatter import CytoscapeDataFormatter
2322
from utils.metakg.export import edges2graphml
24-
from utils.metakg.metakg_errors import MetadataRetrievalError
2523
from utils.metakg.parser import MetaKGParser
2624
from utils.metakg.path_finder import MetaKGPathFinder
2725
from utils.notification import SlackNewAPIMessage, SlackNewTranslatorAPIMessage
@@ -751,19 +749,17 @@ class MetaKGParserHandler(BaseHandler, MetaKGHandlerMixin):
751749
"""
752750

753751
kwargs = {
752+
"*": {
753+
"api_details": {"type": bool, "default": False},
754+
"bte": {"type": bool, "default": False},
755+
},
754756
"GET": {
755757
"url": {
756758
"type": str,
757759
"required": True,
758760
"max": 1000,
759761
"description": "URL of the SmartAPI metadata to parse"
760762
},
761-
"api_details": {"type": bool, "default": False},
762-
"bte": {"type": bool, "default": False},
763-
},
764-
"POST": {
765-
"api_details": {"type": bool, "default": False},
766-
"bte": {"type": bool, "default": False},
767763
},
768764
}
769765

@@ -789,95 +785,69 @@ def process_apis(self, apis):
789785

790786
async def get(self, *args, **kwargs):
791787
url = self.args.url
792-
if not url:
793-
raise HTTPError(400, reason="A url value is expected for the request, please provide a url.")
794-
795-
# Set initial args and handle potential errors in query parameters
796788
parser = MetaKGParser()
797789

798790
try:
799-
trapi_data = parser.get_TRAPI_metadatas(data=None, url=url)
800-
except MetadataRetrievalError as retrieve_err:
801-
raise HTTPError(retrieve_err.status_code, reason=retrieve_err.message)
802-
except DownloadError:
803-
raise HTTPError(400, reason="There was an error downloading the data from the given input.")
804-
805-
# Get non-TRAPI metadata
806-
try:
807-
nontrapi_data = parser.get_non_TRAPI_metadatas(data=None, url=url)
808-
except MetadataRetrievalError as retrieve_err:
809-
raise HTTPError(retrieve_err.status_code, reason=retrieve_err.message)
791+
parsed_metakg = parser.get_metakg(url=url)
810792
except DownloadError:
811-
raise HTTPError(400, reason="There was an error downloading the data from the given input.")
793+
self.write_error(400, reason="There was an error downloading the data from the given url.")
794+
except (ValueError, TypeError) as err:
795+
self.write_error(
796+
status_code=400,
797+
reason="The data retrived from the given url is not a valid JSON or YAML object.",
798+
message=str(err)
799+
)
812800

813801
# Apply filtering -- if data found
814-
combined_data = trapi_data + nontrapi_data
815-
if combined_data:
816-
for i, api_dict in enumerate(combined_data):
817-
combined_data[i] = self.get_filtered_api(api_dict)
802+
if parsed_metakg:
803+
for i, api_dict in enumerate(parsed_metakg):
804+
parsed_metakg[i] = self.get_filtered_api(api_dict)
818805

819806
# Add url to metadata if api_details is set to 1
820807
if self.args.api_details:
821-
for data_dict in combined_data:
808+
for data_dict in parsed_metakg:
822809
if "metadata" in data_dict["api"]["smartapi"] and data_dict["api"]["smartapi"]["metadata"] is None:
823810
data_dict["api"]["smartapi"]["metadata"] = url
824811

825812
response = {
826-
"total": len(combined_data),
827-
"hits": combined_data,
813+
"total": len(parsed_metakg),
814+
"hits": parsed_metakg,
828815
}
829816

830817
self.finish(response)
831818

832819
async def post(self, *args, **kwargs):
833-
raw_body = self.request.body
834-
if not raw_body:
835-
raise HTTPError(400, reason="Request body cannot be empty.")
836-
837820
content_type = self.request.headers.get("Content-Type", "").lower()
821+
if content_type in ["application/json", "application/x-yaml"]:
822+
# if content type is set properly, it should have alrady been parsed
823+
metadata_from_body = self.args_json or self.args_yaml
824+
elif self.request.body:
825+
# if request body is provided but no proper content type is set
826+
# we will parse it as YAML anyway
827+
metadata_from_body = self._parse_yaml()
828+
else:
829+
metadata_from_body = None
830+
831+
if metadata_from_body:
832+
# Process the parsed metadata
833+
parser = MetaKGParser()
834+
parsed_metakg = parser.get_metakg(metadata_from_body)
835+
836+
# Apply filtering to the combined data
837+
if parsed_metakg:
838+
for i, api_dict in enumerate(parsed_metakg):
839+
parsed_metakg[i] = self.get_filtered_api(api_dict)
840+
841+
# Send the response back to the client
842+
response = {
843+
"total": len(parsed_metakg),
844+
"hits": parsed_metakg,
845+
}
838846

839-
# Try to parse the request body based on content type
840-
try:
841-
if content_type == "application/json":
842-
data = to_dict(raw_body, ctype="application/json")
843-
elif content_type == "application/x-yaml":
844-
data = to_dict(raw_body, ctype="application/x-yaml")
845-
else:
846-
# Default to YAML parsing if the content type is unknown or not specified
847-
data = to_dict(raw_body)
848-
except ValueError as val_err:
849-
if 'mapping values are not allowed here' in str(val_err):
850-
raise HTTPError(400, reason="Formatting issue, please consider using --data-binary to maintain YAML format.")
851-
else:
852-
raise HTTPError(400, reason="Invalid value, please provide a valid YAML object.")
853-
except TypeError:
854-
raise HTTPError(400, reason="Invalid type, provide valid type metadata.")
855-
856-
# Ensure the parsed data is a dictionary
857-
if not isinstance(data, dict):
858-
raise ValueError("Invalid input data type. Please provide a valid JSON/YAML object.")
859-
860-
# Process the parsed metadata
861-
parser = MetaKGParser()
862-
try:
863-
trapi_data = parser.get_TRAPI_metadatas(data=data)
864-
nontrapi_data = parser.get_non_TRAPI_metadatas(data=data)
865-
except MetadataRetrievalError as retrieve_err:
866-
raise HTTPError(retrieve_err.status_code, reason=retrieve_err.message)
867-
except DownloadError:
868-
raise HTTPError(400, reason="Error downloading the data from the provided input.")
869-
870-
combined_data = trapi_data + nontrapi_data
871-
872-
# Apply filtering to the combined data
873-
if combined_data:
874-
for i, api_dict in enumerate(combined_data):
875-
combined_data[i] = self.get_filtered_api(api_dict)
876-
877-
# Send the response back to the client
878-
response = {
879-
"total": len(combined_data),
880-
"hits": combined_data,
881-
}
882-
883-
self.finish(response)
847+
self.finish(response)
848+
else:
849+
self.write_error(
850+
status_code=400,
851+
reason="Request body cannot be empty.",
852+
message="Please provide a valid JSON/YAML object in the request body."
853+
)

src/pipeline.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,10 @@
22
from enum import Enum
33
from typing import Dict, OrderedDict
44

5-
from biothings.web.query import (
6-
AsyncESQueryBackend,
7-
AsyncESQueryPipeline,
8-
ESQueryBuilder,
9-
ESResultFormatter,
10-
)
11-
from controller.base import OpenAPI, Swagger
5+
from biothings.web.query import AsyncESQueryBackend, AsyncESQueryPipeline, ESQueryBuilder, ESResultFormatter
126
from elasticsearch_dsl import Q, Search
7+
8+
from controller.base import OpenAPI, Swagger
139
from utils import decoder
1410

1511

@@ -219,8 +215,8 @@ def apply_extras(self, search, options):
219215
apply extra filters
220216
"""
221217
# if not options._source:
222-
# by default exclude api.bte or bte field, but can be included by specifying in the fields parameter
223-
# options._source = ["-api.bte", "-bte"]
218+
# by default exclude api.bte or bte field, but can be included by specifying in the fields parameter
219+
# options._source = ["-api.bte", "-bte"]
224220

225221
search = super().apply_extras(search, options)
226222
# apply extra filters from query parameters
@@ -262,6 +258,7 @@ def adjust_index(self, original_index: str, query: str, **options: Dict) -> str:
262258
query_index = self.indices.get("metakg", None)
263259
return query_index
264260

261+
265262
class MetaKGQueryPipeline(AsyncESQueryPipeline):
266263
def __init__(self, *args, **kwargs):
267264
# ns is an instance of BiothingsNamespace

src/utils/metakg/metakg_errors.py

Lines changed: 0 additions & 16 deletions
This file was deleted.

src/utils/metakg/parser.py

Lines changed: 32 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import json
22
import logging
33
from copy import copy
4-
from utils.metakg.metakg_errors import MetadataRetrievalError
4+
from typing import Dict, List, Optional, Union
5+
56
import requests
67

78
from .api import API
@@ -13,46 +14,49 @@ class MetaKGParser:
1314
get_url_timeout = 60
1415
metakg_errors = None
1516

16-
def get_non_TRAPI_metadatas(self, data=None, extra_data=None, url=None):
17+
def get_metakg(self,
18+
data: Optional[Union[Dict, API]] = None,
19+
extra_data: Optional[Dict] = None,
20+
url: Optional[str] = None) -> List[Dict]:
1721
"""
18-
Extract MetaKG edges from a SmartAPI document provided as `data` or fetched from a `url`.
19-
Raises an error if no valid input is given, or if parser fails to parse the document.
22+
Extract and process metadata from a SmartAPI document or URL.
23+
Returns MetaKG edges or propagates errors.
2024
"""
2125
if not data and not url:
22-
raise MetadataRetrievalError(400, "Either data or url value is expected for this request, please provide data or a url.")
26+
raise ValueError("Either data or url value is expected for this request, please provide data or a url.")
2327

28+
# if both data and url are provided, prefer data
2429
if data:
25-
parser = API(smartapi_doc=data)
30+
_api = data if isinstance(data, API) else API(data)
2631
elif url:
27-
parser = API(url=url)
32+
_api = API(url=url)
33+
34+
if _api.is_trapi:
35+
return self.get_TRAPI_metadatas(data=_api, extra_data=extra_data)
2836
else:
29-
raise MetadataRetrievalError(404, "No metadata available from provided data or url.")
37+
return self.get_non_TRAPI_metadatas(data=_api, extra_data=extra_data)
3038

31-
mkg = self.extract_metakgedges(parser.metadata["operations"], extra_data=extra_data)
39+
def get_non_TRAPI_metadatas(self, data: Union[Dict, API], extra_data: Optional[Dict] = None) -> List[Dict]:
40+
"""
41+
Extract MetaKG edges from a SmartAPI document provided as `data` or fetched from a `url`.
42+
Raises an error if no valid input is given, or if parser fails to parse the document.
43+
"""
44+
_api = data if isinstance(data, API) else API(data)
45+
mkg = self.extract_metakgedges(_api.metadata["operations"], extra_data=extra_data)
3246
no_nodes = len({x["subject"] for x in mkg} | {x["object"] for x in mkg})
3347
no_edges = len({x["predicate"] for x in mkg})
3448
logger.info("Done [%s nodes, %s edges]", no_nodes, no_edges)
3549
return mkg
3650

37-
def get_TRAPI_metadatas(self, data=None, extra_data=None, url=None):
51+
def get_TRAPI_metadatas(self, data: Union[Dict, API], extra_data: Optional[Dict] = None) -> List[Dict]:
3852
"""
3953
Extract and process TRAPI metadata from a SmartAPI document or URL.
4054
Returns MetaKG edges or propagates errors.
4155
"""
42-
if not data and not url:
43-
raise MetadataRetrievalError(400, "Either data or url value is expected for this request, please provide data or a url.")
44-
45-
try:
46-
if data:
47-
metadata_list = self.get_TRAPI_with_metakg_endpoint(data=data)
48-
else:
49-
metadata_list = self.get_TRAPI_with_metakg_endpoint(url=url)
50-
except MetadataRetrievalError:
51-
raise MetadataRetrievalError(404, "No metadata available from provided data or url.")
52-
56+
ops = []
57+
metadata_list = self.get_TRAPI_with_metakg_endpoint(data)
5358
count_metadata_list = len(metadata_list)
5459
self.metakg_errors = {}
55-
ops = []
5660

5761
for i, metadata in enumerate(metadata_list):
5862
ops.extend(self.get_ops_from_metakg_endpoint(metadata, f"[{i + 1}/{count_metadata_list}]"))
@@ -63,27 +67,22 @@ def get_TRAPI_metadatas(self, data=None, extra_data=None, url=None):
6367

6468
return self.extract_metakgedges(ops, extra_data=extra_data)
6569

66-
def get_TRAPI_with_metakg_endpoint(self, data=None, url=None):
70+
def get_TRAPI_with_metakg_endpoint(self, data: Union[Dict, API]):
6771
"""
6872
Retrieve TRAPI metadata from a SmartAPI document or URL.
6973
Returns metadata if TRAPI endpoints are found, else an empty list.
7074
"""
71-
if not data and not url:
72-
raise MetadataRetrievalError(400, "Either data or url value is expected for this request, please provide data or a url.")
75+
metadatas = []
76+
_api = data if isinstance(data, API) else API(data)
7377

74-
# Initialize API with either data or URL
75-
parser = API(smartapi_doc=data) if data else API(url=url)
76-
77-
# Download the metadata
78-
metadata = parser.metadata
78+
metadata = _api.metadata
7979
_paths = metadata.get("paths", {})
8080
_team = metadata.get("x-translator", {}).get("team")
8181

8282
# Check for required TRAPI paths
8383
if "/meta_knowledge_graph" in _paths and "/query" in _paths and _team:
84-
return [metadata]
85-
else:
86-
return []
84+
metadatas.append(metadata)
85+
return metadatas
8786

8887
def construct_query_url(self, server_url):
8988
if server_url.endswith("/"):

0 commit comments

Comments
 (0)