feat: first draft for polling score execution and returning results

smlindauer · smlindauer · commit 4c8559ce28d7 · 2024-08-14T12:18:15.000-04:00
diff --git a/src/sasctl/_services/score_execution.py b/src/sasctl/_services/score_execution.py
@@ -1,7 +1,14 @@
 import json
+import time
+import warnings
+from distutils.version import StrictVersion
+from typing import Union
 
+import pandas as pd
 from requests import HTTPError
 
+from .cas_management import CASManagement
+from ..core import current_session
 from .score_definitions import ScoreDefinitions
 from .service import Service
 
@@ -18,7 +25,9 @@ class ScoreExecution(Service):
     """
 
     _SERVICE_ROOT = "/scoreExecution"
+    _cas_management = CASManagement()
     _score_definitions = ScoreDefinitions()
+    _services = Service()
 
     (
         list_executions,
@@ -62,30 +71,32 @@ def create_score_execution(
         if not score_definition:
             raise HTTPError
         score_exec_name = score_definition.get("name")
-        model_uri = score_definition.get("objectDescriptor", "uri")
-        model_name = score_definition.get("objectDescriptor", "name")
-        model_input_library = score_definition.get("inputData", "libraryName")
-        model_table_name = score_definition.get("inputData", "tableName")
+        # NEEDS modelManagement not modelRepository
+        model_uuid = score_definition.get("objectDescriptor").get("uri").split('/')[-1]
+        model_uri = f"/modelManagement/models/{model_uuid}"
+        model_name = score_definition.get("objectDescriptor").get("name")
+        model_input_library = score_definition.get("inputData").get("libraryName")
+        model_table_name = score_definition.get("inputData").get("tableName")
 
         # Defining a default output table name if none is provided
         if not output_table_name:
             output_table_name = f"{model_name}_{score_definition_id}"
 
         # Getting all score executions that are using the inputted score_definition_id
 
-        score_execution = cls.list_executions(
-            filter=f"eq(scoreDefinitionId, '{score_definition_id}')"
-        )
-        if not score_execution:
-            raise HTTPError(f"Something went wrong in the LIST_EXECUTIONS statement.")
-
-        # Checking the count of the execution list to see if there are any score executions for this score_definition_id already running
-        execution_count = score_execution.get("count")  # Exception catch location
-        if execution_count == 1:
-            execution_id = score_execution.get("items", 0, "id")
-            deleted_execution = cls.delete_execution(execution_id)
-            if deleted_execution.status_code >= 400:
-                raise HTTPError(f"Something went wrong in the DELETE statement.")
+        # score_execution = cls.list_executions(
+        #     filter=f"eq(scoreDefinitionId, '{score_definition_id}')"
+        # )
+        # if not score_execution:
+        #     raise HTTPError(f"Something went wrong in the LIST_EXECUTIONS statement.")
+        #
+        # # Checking the count of the execution list to see if there are any score executions for this score_definition_id already running
+        # execution_count = score_execution.get("count")  # Exception catch location
+        # if execution_count == 1:
+        #     execution_id = score_execution.get("items", 0, "id")
+        #     deleted_execution = cls.delete_execution(execution_id)
+        #     if deleted_execution.status_code >= 400:
+        #         raise HTTPError(f"Something went wrong in the DELETE statement.")
 
         headers_score_exec = {"Content-Type": "application/json"}
 
@@ -106,9 +117,108 @@ def create_score_execution(
         }
 
         # Creating the score execution
-        new_score_execution = cls.post(
-            "scoreExecution/executions",
+        score_execution = cls.post(
+            "executions",
             data=json.dumps(create_score_exec),
             headers=headers_score_exec,
         )
-        return new_score_execution
+
+        return score_execution
+
+    @classmethod
+    def poll_score_execution_state(
+        cls,
+        score_execution: Union[dict, str],
+        timeout: int = 300
+    ):
+        if type(score_execution) is str:
+            exec_id = score_execution
+        else:
+            exec_id = score_execution.get("id")
+
+        start_poll = time.time()
+        while time.time() - start_poll < timeout:
+            score_execution_state = cls.get(f"executions/{exec_id}/state")
+            if score_execution_state.text == "complete":
+                print("Score execution state is 'complete'")
+                return "complete"
+            elif score_execution_state.text == "failed":
+                # TODO: Grab score execution logs and return those
+                print("The score execution state is failed.")
+                return "failed"
+            elif time.time() - start_poll > timeout:
+                print("The score execution is still running, but polling time ran out.")
+                return "timeout"
+
+    @classmethod
+    def get_score_execution_results(
+        cls,
+        score_execution: Union[dict, str],
+    ):
+        try:
+            import swat
+        except ImportError:
+            swat = None
+
+        if type(score_execution) is str:
+            score_execution = cls.get_execution(score_execution)
+
+        server_name = score_execution.get("outputTable").get("serverName")
+        library_name = score_execution.get("outputTable").get("libraryName")
+        table_name = score_execution.get("outputTable").get("tableName")
+
+        # If swat is not available, then
+        if not swat:
+            if pd.__version__ >= StrictVersion("1.0.3"):
+                from pandas import json_normalize
+            else:
+                from pandas.io.json import json_normalize
+
+            warnings.warn(
+                "Without swat installed, the amount of rows from the output table that "
+                "can be collected are memory limited by the CAS worker."
+            )
+
+            output_columns = cls._cas_management.get(
+                f"servers/{server_name}/"
+                f"caslibs/{library_name}/"
+                f"tables/{table_name}/columns?limit=10000"
+            )
+            columns = json_normalize(output_columns.json(), "items")
+            column_names = columns["names"].to_list()
+
+            output_rows = cls._services.get(
+                f"casRowSets/servers/{server_name}"
+                f"caslibs/{library_name}"
+                f"tables/{table_name}/rows?limit=10000"
+            )
+            output_table = pd.DataFrame(
+                json_normalize(output_rows.json()["items"])["cells"].to_list(),
+                columns=column_names
+            )
+            return output_table
+        else:
+            session = current_session()
+            cas = session.as_swat()
+            cas.loadActionSet("gateway")
+
+            gateway_code = f"""
+            import pandas as pd
+            import numpy as np
+            
+            table = gateway.read_table({{"caslib": {library_name}, "name": {table_name}}}
+            
+            gateway.return_table(
+                "Execution Results", 
+                df = table, 
+                label = "label", 
+                title = "title"
+            )
+            """
+
+            output_table = cas.gateway.runlang(
+                code=gateway_code,
+                single=True,
+                timeout_millis=10000
+            )
+            return output_table