Merge pull request #224 from dataiku/feature/sc-82825-add-the-possibility-to-deploy-a-run-s-model

lpenet · web-flow · commit d06e3047adaa · 2022-03-21T17:16:45.000+01:00
Add a set_run_classes method to MLflow extension, to store the classe…
diff --git a/dataikuapi/dss/mlflow.py b/dataikuapi/dss/mlflow.py
@@ -1,3 +1,5 @@
+import json
+
 class DSSMLflowExtension(object):
     """
     A handle to interact with specific endpoints of the DSS MLflow integration.
@@ -131,3 +133,68 @@ def clean_experiment_tracking_db(self):
         This call requires an API key with admin rights
         """
         self.client._perform_raw("DELETE", "/api/2.0/mlflow/extension/clean-db/%s" % self.project_key)
+
+    def set_run_inference_info(self, run_id, model_type, classes=None, code_env_name=None, target=None):
+        """
+        Sets the type of the model, and optionally other information useful to deploy or evaluate it.
+
+        model_type must be one of:
+        - REGRESSION
+        - BINARY_CLASSIFICATION
+        - MULTICLASS
+        - OTHER
+
+        Classes must be specified if and only if the model is a BINARY_CLASSIFICATION or MULTICLASS model.
+
+        This information is leveraged to filter saved models on their prediction type and prefill the classes
+        when deploying using the GUI an MLflow model as a version of a DSS Saved Model.
+
+        :param model_type: prediction type (see doc)
+        :type model_type: str
+        :param run_id: run_id for which to set the classes
+        :type run_id: str
+        :param classes: ordered list of classes (not for all prediction types, see doc)
+        :type classes: list(str)
+        :param code_env_name: name of an adequate DSS python code environment
+        :type code_env_name: str
+        :param target: name of the target
+        :type target: str
+        """
+        if model_type not in {"REGRESSION", "BINARY_CLASSIFICATION", "MULTICLASS", "OTHER"}:
+            raise ValueError('Invalid prediction type: {}'.format(model_type))
+
+        if classes and model_type not in {"BINARY_CLASSIFICATION", "MULTICLASS"}:
+            raise ValueError('Classes can be specified only for BINARY_CLASSIFICATION or MULTICLASS prediction types')
+        if model_type in {"BINARY_CLASSIFICATION", "MULTICLASS"}:
+            if not classes:
+                raise ValueError('Classes must be specified for {} prediction type'.format(model_type))
+            if not isinstance(classes, list):
+                raise ValueError('Wrong type for classes: {}'.format(type(classes)))
+            for cur_class in classes:
+                if cur_class is None:
+                    raise ValueError('class can not be None')
+                if not isinstance(cur_class, str):
+                    raise ValueError('Wrong type for class {}: {}'.format(cur_class, type(cur_class)))
+
+        if code_env_name and not isinstance(code_env_name, str):
+            raise ValueError('code_env_name must be a string')
+        if target and not isinstance(target, str):
+            raise ValueError('target must be a string')
+
+        params = {
+            "run_id": run_id,
+            "prediction_type": model_type
+        }
+
+        if classes:
+            params["classes"] = json.dumps(classes)
+        if code_env_name:
+            params["code_env_name"] = code_env_name
+        if target:
+            params["target"] = target
+
+        self.client._perform_http(
+            "POST", "/api/2.0/mlflow/extension/set-run-inference-info",
+            headers={"x-dku-mlflow-project-key": self.project_key},
+            body=params
+        )