add methods to execute and return queries from trustyai service (#202)

christinaexyou · Christina Xu · web-flow · commit 295b423ac3e1 · 2024-03-05T08:41:49.000Z
Co-authored-by: Christina Xu &lt;chrxu@chrxu-mac.fios-router.home&gt;
diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml
@@ -28,6 +28,7 @@ jobs:
           pip install .
           pip install ".[dev]"
           pip install ".[extras]"
+          pip install ".[api]"
       - name: Lint
         run: |
           pylint --ignore-imports=yes $(find src/trustyai -type f -name "*.py")
diff --git a/pyproject.toml b/pyproject.toml
@@ -63,6 +63,10 @@ detoxify = [
     "trl"
 ]
 
+api = [
+    "kubernetes"
+]
+
 [project.urls]
 homepage = "https://github.com/trustyai-explainability/trustyai-explainability-python"
 documentation = "https://trustyai-explainability-python.readthedocs.io/en/latest/"
diff --git a/src/trustyai/utils/api/api.py b/src/trustyai/utils/api/api.py
@@ -0,0 +1,45 @@
+"""
+Server module
+"""
+
+# pylint: disable = import-error, too-few-public-methods, assignment-from-no-return
+__SUCCESSFUL_IMPORT = True
+
+try:
+    from kubernetes import config, dynamic
+    from kubernetes.dynamic.exceptions import ResourceNotFoundError
+    from kubernetes.client import api_client
+
+except ImportError as e:
+    print(
+        "Warning: api dependencies not found. "
+        "Dependencies can be installed with 'pip install trustyai[api]"
+    )
+    __SUCCESSFUL_IMPORT = False
+
+if __SUCCESSFUL_IMPORT:
+
+    class TrustyAIApi:
+        """
+        Gets TrustyAI service information
+        """
+
+        def __init__(self):
+            try:
+                k8s_client = config.load_incluster_config()
+            except config.ConfigException:
+                k8s_client = config.load_kube_config()
+            self.dyn_client = dynamic.DynamicClient(
+                api_client.ApiClient(configuration=k8s_client)
+            )
+
+        def get_service_route(self, name: str, namespace: str):
+            """
+            Gets routes for services under a specified namespace
+            """
+            route_api = self.dyn_client.resources.get(api_version="v1", kind="Route")
+            try:
+                service = route_api.get(name=name, namespace=namespace)
+                return f"https://{service.spec.host}"
+            except ResourceNotFoundError:
+                return f"Error accessing service {name} in namespace {namespace}."
diff --git a/src/trustyai/utils/extras/metrics_service.py b/src/trustyai/utils/extras/metrics_service.py
@@ -0,0 +1,208 @@
+"""Python client for TrustyAI metrics"""
+
+from typing import List
+import json
+import datetime as dt
+import pandas as pd
+import requests
+
+from trustyai.utils.api.api import TrustyAIApi
+
+
+def json_to_df(data_path: str, batch_list: List[int]) -> pd.DataFrame:
+    """
+    Converts batched data in json files to a single pandas DataFrame
+    """
+    final_df = pd.DataFrame()
+    for batch in batch_list:
+        file = data_path + f"{batch}.json"
+        with open(file, encoding="utf8") as train_file:
+            batch_data = json.load(train_file)["inputs"][0]
+            batch_df = pd.DataFrame.from_dict(batch_data["data"]).T
+            final_df = pd.concat([final_df, batch_df])
+    return final_df
+
+
+def df_to_json(final_df: pd.DataFrame, name: str, json_file: str) -> None:
+    """
+    Converts pandas DataFrame to json file
+    """
+    inputs = [
+        {
+            "name": name,
+            "shape": list(final_df.shape),
+            "datatype": "FP64",
+            "data": final_df.values.tolist(),
+        }
+    ]
+    data_dict = {"inputs": inputs}
+    with open(json_file, "w", encoding="utf8") as outfile:
+        json.dump(data_dict, outfile)
+
+
+class TrustyAIMetricsService:
+    """
+    Executes and returns queries from TrustyAI service on ODH
+    """
+
+    def __init__(self, token: str, namespace: str, verify=True):
+        """
+        :param token: OpenShift login token
+        :param namespace: model namespace
+        :param verify: enable SSL verification for requests
+        """
+        self.token = token
+        self.namespace = namespace
+        self.trusty_url = TrustyAIApi().get_service_route(
+            name="trustyai-service", namespace=self.namespace
+        )
+        self.thanos_url = TrustyAIApi().get_service_route(
+            name="thanos-querier", namespace="openshift-monitoring"
+        )
+        self.headers = {
+            "Authorization": "Bearer " + token,
+            "Content-Type": "application/json",
+        }
+        self.verify = verify
+
+    def upload_payload_data(self, json_file: str, timeout=5) -> None:
+        """
+        Uploads data to TrustyAI service
+        """
+        with open(json_file, "r", encoding="utf8") as file:
+            response = requests.post(
+                f"{self.trusty_url}/data/upload",
+                data=file,
+                headers=self.headers,
+                verify=self.verify,
+                timeout=timeout,
+            )
+        if response.status_code == 200:
+            print("Data sucessfully uploaded to TrustyAI service")
+        else:
+            print(f"Error {response.status_code}: {response.reason}")
+
+    def get_model_metadata(self, timeout=5):
+        """
+        Retrieves model data from TrustyAI
+        """
+        response = requests.get(
+            f"{self.trusty_url}/info",
+            headers=self.headers,
+            verify=self.verify,
+            timeout=timeout,
+        )
+        if response.status_code == 200:
+            model_metadata = json.loads(response.text)
+            return model_metadata
+        raise RuntimeError(f"Error {response.status_code}: {response.reason}")
+
+    def label_data_fields(self, payload: str, timeout=5):
+        """
+        Assigns feature names to model input data
+        """
+
+        def print_name_mapping(self):
+            response = requests.get(
+                f"{self.trusty_url}/info",
+                headers=self.headers,
+                verify=self.verify,
+                timeout=timeout,
+            )
+            name_mapping = json.loads(response.text)[0]
+            for key, val in name_mapping["data"]["inputSchema"]["nameMapping"].items():
+                print(f"{key} -> {val}")
+
+        response = requests.get(
+            f"{self.trusty_url}/info",
+            headers=self.headers,
+            verify=self.verify,
+            timeout=timeout,
+        )
+        input_data_fields = list(
+            json.loads(response.text)[0]["data"]["inputSchema"]["items"].keys()
+        )
+        input_mapping_keys = list(payload["inputMapping"].keys())
+        if len(list(set(input_mapping_keys) - set(input_data_fields))) == 0:
+            response = requests.post(
+                f"{self.trusty_url}/info/names",
+                json=payload,
+                headers=self.headers,
+                verify=True,
+                timeout=timeout,
+            )
+            if response.status_code == 200:
+                print_name_mapping(self)
+                return response.text
+            print(f"Error {response.status_code}: {response.reason}")
+        raise ValueError("Field does not exist")
+
+    def get_metric_request(
+        self, payload: str, metric: str, reoccuring: bool, timeout=5
+    ):
+        """
+        Retrieve or schedule a metric request
+        """
+        if reoccuring:
+            response = requests.post(
+                f"{self.trusty_url}/metrics/{metric}/request",
+                json=payload,
+                headers=self.headers,
+                verify=self.verify,
+                timeout=timeout,
+            )
+        else:
+            response = requests.post(
+                f"{self.trusty_url}/metrics/{metric}",
+                json=payload,
+                headers=self.headers,
+                verify=self.verify,
+                timeout=timeout,
+            )
+        if response.status_code == 200:
+            return response.text
+        raise RuntimeError(f"Error {response.status_code}: {response.reason}")
+
+    def upload_data_to_model(self, model_name: str, json_file: str, timeout=5):
+        """
+        Sends an inference request to the model
+        """
+        model_route = TrustyAIApi().get_service_route(
+            name=model_name, namespace=self.namespace
+        )
+        with open(json_file, encoding="utf8") as batch_file:
+            response = requests.post(
+                url=f"https://{model_route}/infer",
+                data=batch_file,
+                headers=self.headers,
+                verify=self.verify,
+                timeout=timeout,
+            )
+            if response.status_code == 200:
+                return response.text
+            raise RuntimeError(f"Error {response.status_code}: {response.reason}")
+
+    def get_metric_data(
+        self, namespace: str, metric: str, time_interval: List[str], timeout=5
+    ):
+        """
+        Retrives metric data for a specific range in time
+        """
+        params = {"query": f"{metric}{{namespace='{namespace}'}}{time_interval}"}
+        response = requests.get(
+            f"{self.thanos_url}/api/v1/query?",
+            params=params,
+            headers=self.headers,
+            verify=self.verify,
+            timeout=timeout,
+        )
+        if response.status_code == 200:
+            data_dict = json.loads(response.text)["data"]["result"][0]["values"]
+            metric_df = pd.DataFrame(data_dict, columns=["timestamp", metric])
+            metric_df["timestamp"] = metric_df["timestamp"].apply(
+                lambda epoch: dt.datetime.fromtimestamp(epoch).strftime(
+                    "%Y-%m-%d %H:%M:%S"
+                )
+            )
+            return metric_df
+        raise RuntimeError(f"Error {response.status_code}: {response.reason}")
diff --git a/tests/extras/test_metrics_service.py b/tests/extras/test_metrics_service.py
@@ -0,0 +1,76 @@
+"""Test suite for TrustyAI metrics service data conversions"""
+import json
+import os
+import random
+import unittest
+import numpy as np
+import pandas as pd
+
+from trustyai.utils.extras.metrics_service import (
+    json_to_df,
+    df_to_json
+)
+
+def generate_json_data(batch_list, data_path):
+    for batch in batch_list:
+        data = {
+        "inputs": [
+            {"name": "test_data_input",
+                "shape": [1, 100],
+                "datatype": "FP64",
+                "data": [random.uniform(a=100, b=200) for i in range(100)]
+                }
+            ]
+        }
+        for batch in batch_list:
+            with open(data_path + f"{batch}.json", 'w', encoding="utf-8") as f:
+                json.dump(data, f, ensure_ascii=False)
+
+
+def generate_test_df():
+    data = {
+        '0': np.random.uniform(low=100, high=200, size=100),
+        '1': np.random.uniform(low=5000, high=10000, size=100),
+        '2': np.random.uniform(low=100, high=200, size=100),
+        '3': np.random.uniform(low=5000, high=10000, size=100),
+        '4': np.random.uniform(low=5000, high=10000, size=100)
+    }
+    return pd.DataFrame(data=data)
+
+
+class TestMetricsService(unittest.TestCase):
+    def setUp(self):
+        self.df = generate_test_df()
+        self.data_path = "data/"
+        if not os.path.exists(self.data_path):
+            os.mkdir("data/")
+        self.batch_list = list(range(0, 5))
+
+    def test_json_to_df(self):
+        """Test json data to pandas dataframe conversion"""
+        generate_json_data(batch_list=self.batch_list, data_path=self.data_path)
+        df = json_to_df(self.data_path, self.batch_list)
+        n_rows, n_cols = 0, 0
+        for batch in self.batch_list:
+            file = self.data_path + f"{batch}.json"
+            with open(file, encoding="utf8") as f:
+                data = json.load(f)["inputs"][0]
+                n_rows += data["shape"][0]
+                n_cols = data["shape"][1]
+        self.assertEqual(df.shape, (n_rows, n_cols))
+
+
+    def test_df_to_json(self):
+        """Test pandas dataframe to json data conversion"""
+        df = generate_test_df()
+        name = 'test_data_input'
+        json_file = 'data/test.json'
+        df_to_json(df, name, json_file)
+        with open(json_file, encoding="utf8") as f:
+            data = json.load(f)["inputs"][0]
+        n_rows = data["shape"][0]
+        n_cols =  data["shape"][1]
+        self.assertEqual(df.shape, (n_rows, n_cols))
+
+if __name__ == "__main__":
+    unittest.main()