Merge branch 'scoring_services' of https://github.com/sassoftware/python-sasctl into scoring_services

samyarpotlapalli · samyarpotlapalli · commit b956ad6d9fa0 · 2024-08-19T09:14:27.000-04:00
diff --git a/src/sasctl/_services/score_definitions.py b/src/sasctl/_services/score_definitions.py
@@ -26,7 +26,7 @@ class ScoreDefinitions(Service):
 
     _SERVICE_ROOT = "/scoreDefinitions"
     _cas_management = CASManagement()
-    _model_respository = ModelRepository()
+    _model_repository = ModelRepository()
 
     (
         list_definitions,
@@ -39,7 +39,7 @@ class ScoreDefinitions(Service):
     def create_score_definition(
         cls,
         score_def_name: str,
-        model_id: str,
+        model: Union[str, dict],
         table_name: str,
         table_file: Union[str, Path] = None,
         description: str = "",
@@ -53,8 +53,8 @@ def create_score_definition(
         --------
         score_def_name: str
             Name of score definition.
-        model_id: str
-            A user-inputted model if where the model exists in a project.
+        model : str or dict
+            The name or id of the model, or a dictionary representation of the model.
         table_name: str
             A user-inputted table name in CAS Management.
         table_file: str or Path, optional
@@ -74,7 +74,8 @@ def create_score_definition(
 
         """
 
-        model = cls._model_respository.get_model(model_id)
+        model = cls._model_repository.get_model(model)
+        model_id = model.id
 
         if not model:
             raise HTTPError(
@@ -122,9 +123,9 @@ def create_score_definition(
             "name": score_def_name,
             "description": description,
             "objectDescriptor": {
-                "uri": f"/modelRepository/models/{model_id}",
+                "uri": f"/modelManagement/models/{model_id}",
                 "name": f"{model_name}({model_version})",
-                "type": "sas.models.model",
+                "type": "sas.models.model.python",
             },
             "inputData": {
                 "type": "CASTable",
diff --git a/src/sasctl/_services/score_execution.py b/src/sasctl/_services/score_execution.py
@@ -1,7 +1,14 @@
 import json
+import time
+import warnings
+from distutils.version import StrictVersion
+from typing import Union
 
+import pandas as pd
 from requests import HTTPError
 
+from .cas_management import CASManagement
+from ..core import current_session
 from .score_definitions import ScoreDefinitions
 from .service import Service
 
@@ -18,7 +25,9 @@ class ScoreExecution(Service):
     """
 
     _SERVICE_ROOT = "/scoreExecution"
+    _cas_management = CASManagement()
     _score_definitions = ScoreDefinitions()
+    _services = Service()
 
     (
         list_executions,
@@ -62,34 +71,17 @@ def create_score_execution(
         if not score_definition:
             raise HTTPError
         score_exec_name = score_definition.get("name")
-        model_uri = score_definition.get("objectDescriptor", "uri")
-        model_name = score_definition.get("objectDescriptor", "name")
-        model_input_library = score_definition.get("inputData", "libraryName")
-        model_table_name = score_definition.get("inputData", "tableName")
+        # NEEDS modelManagement not modelRepository
+        model_uuid = score_definition.get("objectDescriptor").get("uri").split('/')[-1]
+        model_uri = f"/modelManagement/models/{model_uuid}"
+        model_name = score_definition.get("objectDescriptor").get("name")
+        model_input_library = score_definition.get("inputData").get("libraryName")
+        model_table_name = score_definition.get("inputData").get("tableName")
 
         # Defining a default output table name if none is provided
         if not output_table_name:
             output_table_name = f"{model_name}_{score_definition_id}"
 
-        # Getting all score executions that are using the inputted score_definition_id
-
-        # score_execution = cls.list_executions(
-        #     filter=f"eq(scoreDefinitionId, '{score_definition_id}')"
-        # )
-        score_execution = cls.get("scoreExecution/executions",
-            filter=f"filter=eq(scoreExecutionRequest.scoreDefinitionId,%{score_definition_id}%27)"
-        )
-        if not score_execution:
-            raise HTTPError(f"Something went wrong in the LIST_EXECUTIONS statement.")
-
-        # Checking the count of the execution list to see if there are any score executions for this score_definition_id already running
-        execution_count = score_execution.get("count")  # Exception catch location
-        if execution_count == 1:
-            execution_id = score_execution.get("items", 0, "id")
-            deleted_execution = cls.delete_execution(execution_id)
-            if deleted_execution.status_code >= 400:
-                raise HTTPError(f"Something went wrong in the DELETE statement.")
-
         headers_score_exec = {"Content-Type": "application/json"}
 
         create_score_exec = {
@@ -109,9 +101,124 @@ def create_score_execution(
         }
 
         # Creating the score execution
-        new_score_execution = cls.post(
-            "scoreExecution/executions",
+        score_execution = cls.post(
+            "executions",
             data=json.dumps(create_score_exec),
             headers=headers_score_exec,
         )
-        return new_score_execution
+
+        return score_execution
+
+    @classmethod
+    def poll_score_execution_state(
+        cls,
+        score_execution: Union[dict, str],
+        timeout: int = 300
+    ):
+        if type(score_execution) is str:
+            exec_id = score_execution
+        else:
+            exec_id = score_execution.get("id")
+
+        start_poll = time.time()
+        while time.time() - start_poll < timeout:
+            score_execution_state = cls.get(f"executions/{exec_id}/state")
+            if score_execution_state == "completed":
+                print("Score execution state is 'completed'")
+                return "completed"
+            elif score_execution_state == "failed":
+                # TODO: Grab score execution logs and return those
+                print("The score execution state is failed.")
+                return "failed"
+            elif time.time() - start_poll > timeout:
+                print("The score execution is still running, but polling time ran out.")
+                return "timeout"
+
+    @classmethod
+    def get_score_execution_results(
+        cls,
+        score_execution: Union[dict, str],
+    ):
+        try:
+            import swat
+        except ImportError:
+            swat = None
+
+        if type(score_execution) is str:
+            score_execution = cls.get_execution(score_execution)
+
+        server_name = score_execution.get("outputTable").get("serverName")
+        library_name = score_execution.get("outputTable").get("libraryName")
+        table_name = score_execution.get("outputTable").get("tableName")
+
+        # If swat is not available, then
+        if not swat:
+            output_table = cls._no_gateway_get_results(
+                server_name,
+                library_name,
+                table_name
+            )
+            return output_table
+        else:
+            session = current_session()
+            cas = session.as_swat()
+            response = cas.loadActionSet("gateway")
+            if not response:
+                output_table = cls._no_gateway_get_results(
+                    server_name,
+                    library_name,
+                    table_name
+                )
+                return output_table
+            else:
+                gateway_code = f"""
+import pandas as pd
+import numpy as np
+            
+table = gateway.read_table({{"caslib": "{library_name}", "name": "{table_name}"}})
+            
+gateway.return_table("Execution Results", df = table, label = "label", title = "title")"""
+
+                output_table = cas.gateway.runlang(
+                    code=gateway_code,
+                    single=True,
+                    timeout_millis=10000
+                )
+                output_table = pd.DataFrame(output_table["Execution Results"])
+                return output_table
+
+    @classmethod
+    def _no_gateway_get_results(
+            cls,
+            server_name,
+            library_name,
+            table_name
+    ):
+        if pd.__version__ >= StrictVersion("1.0.3"):
+            from pandas import json_normalize
+        else:
+            from pandas.io.json import json_normalize
+
+        warnings.warn(
+            "Without swat installed, the amount of rows from the output table that "
+            "can be collected are memory limited by the CAS worker."
+        )
+
+        output_columns = cls._cas_management.get(
+            f"servers/{server_name}/"
+            f"caslibs/{library_name}/"
+            f"tables/{table_name}/columns?limit=10000"
+        )
+        columns = json_normalize(output_columns.json(), "items")
+        column_names = columns["names"].to_list()
+
+        output_rows = cls._services.get(
+            f"casRowSets/servers/{server_name}"
+            f"caslibs/{library_name}"
+            f"tables/{table_name}/rows?limit=10000"
+        )
+        output_table = pd.DataFrame(
+            json_normalize(output_rows.json()["items"])["cells"].to_list(),
+            columns=column_names
+        )
+        return output_table
diff --git a/src/sasctl/services.py b/src/sasctl/services.py
@@ -21,6 +21,8 @@
 from ._services.report_images import ReportImages as report_images
 from ._services.reports import Reports as reports
 from ._services.saslogon import SASLogon as saslogon
+from ._services.score_definitions import ScoreDefinitions as score_definitions
+from ._services.score_execution import ScoreExecution as score_execution
 from ._services.sentiment_analysis import SentimentAnalysis as sentiment_analysis
 from ._services.text_categorization import TextCategorization as text_categorization
 from ._services.text_parsing import TextParsing as text_parsing
diff --git a/src/sasctl/tasks.py b/src/sasctl/tasks.py
@@ -13,6 +13,8 @@
 import pickle  # skipcq BAN-B301
 import re
 import sys
+from pathlib import Path
+from typing import Union
 from warnings import warn
 
 import pandas as pd
@@ -30,6 +32,8 @@
 from .services import model_management as mm
 from .services import model_publish as mp
 from .services import model_repository as mr
+from .services import score_definitions as sd
+from .services import score_execution as se
 from .utils.misc import installed_packages
 from .utils.pymas import from_pickle
 
@@ -1008,3 +1012,30 @@ def get_project_kpis(
     kpiTableDf = kpiTableDf.apply(lambda x: x.str.strip()).replace([".", ""], None)
 
     return kpiTableDf
+
+
+def score_model_with_cas(
+    score_def_name: str,
+    model: Union[str, dict],
+    table_name: str,
+    table_file: Union[str, Path] = None,
+    description: str = "",
+    server_name: str = "cas-shared-default",
+    library_name: str = "Public",
+    model_version: str = "latest"
+):
+    score_definition = sd.create_score_definition(
+        score_def_name,
+        model,
+        table_name,
+        table_file=table_file,
+        description=description,
+        server_name=server_name,
+        library_name=library_name,
+        model_version=model_version
+    )
+    score_execution = se.create_score_execution(score_definition.id)
+    score_execution_poll = se.poll_score_execution_state(score_execution)
+    print(score_execution_poll)
+    score_results = se.get_score_execution_results(score_execution)
+    return score_results