dataiku
diff --git a/‎dataikuapi/dss/mlflow.py‎
Lines changed: 200 additions & 0 deletions b/‎dataikuapi/dss/mlflow.py‎
Lines changed: 200 additions & 0 deletions
diff --git a/‎dataikuapi/dss/project.py‎
Lines changed: 27 additions & 5 deletions b/‎dataikuapi/dss/project.py‎
Lines changed: 27 additions & 5 deletions
diff --git a/‎dataikuapi/dss/savedmodel.py‎
Lines changed: 19 additions & 8 deletions b/‎dataikuapi/dss/savedmodel.py‎
Lines changed: 19 additions & 8 deletions
diff --git a/‎dataikuapi/dss_plugin_mlflow/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎dataikuapi/dss_plugin_mlflow/__init__.py‎
Lines changed: 1 addition & 0 deletions
@@ -0,0 +1,200 @@
+import json
+
+class DSSMLflowExtension(object):
+    """
+    A handle to interact with specific endpoints of the DSS MLflow integration.
+
+    Do not create this directly, use :meth:`dataikuapi.dss.DSSProject.get_mlflow_extension`
+    """
+
+    def __init__(self, client, project_key):
+        self.client = client
+        self.project = client.get_project(project_key)
+        self.project_key = project_key
+
+    def list_models(self, run_id):
+        """
+        Returns the list of models of given run
+
+        :param run_id: run_id for which to return a list of models
+        :type run_id: str
+        """
+        response = self.client._perform_http(
+            "GET", "/api/2.0/mlflow/extension/models/{}".format(run_id),
+            headers={"x-dku-mlflow-project-key": self.project_key}
+        )
+        return response.json()
+
+    def list_experiments(self, view_type="ACTIVE_ONLY", max_results=1000):
+        """
+        Returns the list of experiments in the DSS project for which MLflow integration
+        is setup
+
+        :param view_type: ACTIVE_ONLY, DELETED_ONLY or ALL
+        :type view_type: str
+        :param max_results: max results count
+        :type max_results: int
+        :rtype: dict
+        """
+        response = self.client._perform_http(
+            "GET", "/api/2.0/mlflow/experiments/list?view_type={view_type}&max_results={max_results}".format(view_type=view_type, max_results=max_results),
+            headers={"x-dku-mlflow-project-key": self.project_key}
+        )
+        return response.json()
+
+    def rename_experiment(self, experiment_id, new_name):
+        """
+        Renames an experiment
+
+        :param experiment_id: experiment id
+        :type experiment_id: str
+        :param new_name: new name
+        :type new_name: str
+        """
+        response = self.client._perform_http(
+            "POST", "/api/2.0/mlflow/experiments/update",
+            headers={"x-dku-mlflow-project-key": self.project_key},
+            body={"experiment_id": experiment_id, "new_name": new_name}
+        )
+        return response.json()
+
+    def restore_experiment(self, experiment_id):
+        """
+        Restores a deleted experiment
+
+        :param experiment_id: experiment id
+        :type experiment_id: str
+        """
+        response = self.client._perform_http(
+            "POST", "/api/2.0/mlflow/experiments/restore",
+            headers={"x-dku-mlflow-project-key": self.project_key},
+            body={"experiment_id": experiment_id}
+        )
+        return response.json()
+
+    def restore_run(self, run_id):
+        """
+        Restores a deleted run
+
+        :param run_id: run id
+        :type run_id: str
+        """
+        response = self.client._perform_http(
+            "POST", "/api/2.0/mlflow/runs/restore",
+            headers={"x-dku-mlflow-project-key": self.project_key},
+            body={"run_id": run_id}
+        )
+        return response.json()
+
+    def garbage_collect(self):
+        """
+        Permanently deletes the experiments and runs marked as "Deleted"
+        """
+        self.client._perform_http(
+            "GET", "/api/2.0/mlflow/extension/garbage-collect",
+            headers={"x-dku-mlflow-project-key": self.project_key}
+        )
+
+    def create_experiment_tracking_dataset(self, dataset_name, experiment_ids=[], view_type="ACTIVE_ONLY", filter_expr="", order_by=[], format="LONG"):
+        """
+
+        Creates a virtual dataset exposing experiment tracking data.
+
+        :param dataset_name: name of the dataset
+        :type dataset_name: str
+        :param experiment_ids: list of ids of experiments to filter on. No filtering if empty
+        :type experiment_ids: list(str)
+        :param view_type: one of ACTIVE_ONLY, DELETED_ONLY and ALL. Default is ACTIVE_ONLY
+        :type view_type: str
+        :param filter_expr: MLflow search expression
+        :type filter_expr: str
+        :param order_by: list of order by clauses. Default is ordered by start_time, then runId
+        :type order_by: list(str)
+        :param format: LONG or JSON. Default is LONG
+        :type format: str
+        """
+        self.client._perform_http(
+            "POST", "/api/2.0/mlflow/extension/create-project-experiments-dataset",
+            headers={"x-dku-mlflow-project-key": self.project_key},
+            body={
+                "datasetName": dataset_name,
+                "experimentIds": experiment_ids,
+                "viewType": view_type,
+                "filter": filter_expr,
+                "orderBy": order_by,
+                "format": format
+            }
+        )
+
+    def clean_experiment_tracking_db(self):
+        """
+        Cleans the experiments, runs, params, metrics, tags, etc. for this project
+
+        This call requires an API key with admin rights
+        """
+        self.client._perform_raw("DELETE", "/api/2.0/mlflow/extension/clean-db/%s" % self.project_key)
+
+    def set_run_inference_info(self, run_id, model_type, classes=None, code_env_name=None, target=None):
+        """
+        Sets the type of the model, and optionally other information useful to deploy or evaluate it.
+
+        model_type must be one of:
+        - REGRESSION
+        - BINARY_CLASSIFICATION
+        - MULTICLASS
+        - OTHER
+
+        Classes must be specified if and only if the model is a BINARY_CLASSIFICATION or MULTICLASS model.
+
+        This information is leveraged to filter saved models on their prediction type and prefill the classes
+        when deploying using the GUI an MLflow model as a version of a DSS Saved Model.
+
+        :param model_type: prediction type (see doc)
+        :type model_type: str
+        :param run_id: run_id for which to set the classes
+        :type run_id: str
+        :param classes: ordered list of classes (not for all prediction types, see doc)
+        :type classes: list(str)
+        :param code_env_name: name of an adequate DSS python code environment
+        :type code_env_name: str
+        :param target: name of the target
+        :type target: str
+        """
+        if model_type not in {"REGRESSION", "BINARY_CLASSIFICATION", "MULTICLASS", "OTHER"}:
+            raise ValueError('Invalid prediction type: {}'.format(model_type))
+
+        if classes and model_type not in {"BINARY_CLASSIFICATION", "MULTICLASS"}:
+            raise ValueError('Classes can be specified only for BINARY_CLASSIFICATION or MULTICLASS prediction types')
+        if model_type in {"BINARY_CLASSIFICATION", "MULTICLASS"}:
+            if not classes:
+                raise ValueError('Classes must be specified for {} prediction type'.format(model_type))
+            if not isinstance(classes, list):
+                raise ValueError('Wrong type for classes: {}'.format(type(classes)))
+            for cur_class in classes:
+                if cur_class is None:
+                    raise ValueError('class can not be None')
+                if not isinstance(cur_class, str):
+                    raise ValueError('Wrong type for class {}: {}'.format(cur_class, type(cur_class)))
+
+        if code_env_name and not isinstance(code_env_name, str):
+            raise ValueError('code_env_name must be a string')
+        if target and not isinstance(target, str):
+            raise ValueError('target must be a string')
+
+        params = {
+            "run_id": run_id,
+            "prediction_type": model_type
+        }
+
+        if classes:
+            params["classes"] = json.dumps(classes)
+        if code_env_name:
+            params["code_env_name"] = code_env_name
+        if target:
+            params["target"] = target
+
+        self.client._perform_http(
+            "POST", "/api/2.0/mlflow/extension/set-run-inference-info",
+            headers={"x-dku-mlflow-project-key": self.project_key},
+            body=params
+        )
@@ -1,4 +1,7 @@
-import time, warnings, sys, os.path as osp
+import warnings, os.path as osp
+
+from ..dss_plugin_mlflow import MLflowHandle
+
 from .dataset import DSSDataset, DSSDatasetListItem, DSSManagedDatasetCreationHelper
 from .modelcomparison import DSSModelComparison
 from .jupyternotebook import DSSJupyterNotebook, DSSJupyterNotebookListItem
@@ -9,6 +12,7 @@
 from .managedfolder import DSSManagedFolder
 from .savedmodel import DSSSavedModel
 from .modelevaluationstore import DSSModelEvaluationStore
+from .mlflow import DSSMLflowExtension
 from .job import DSSJob, DSSJobWaiter
 from .scenario import DSSScenario, DSSScenarioListItem
 from .continuousactivity import DSSContinuousActivity
@@ -30,8 +34,8 @@ class DSSProject(object):
     Do not create this class directly, instead use :meth:`dataikuapi.DSSClient.get_project`
     """
     def __init__(self, client, project_key):
-       self.client = client
-       self.project_key = project_key
+        self.client = client
+        self.project_key = project_key
 
     def get_summary(self):
         """
@@ -1589,7 +1593,7 @@ def list_hive_tables(self, hive_database):
         """
         connection_name = "@virtual(hive-jdbc):" + hive_database
         ret = self.client._perform_json("GET", "/projects/%s/datasets/tables-import/actions/list-tables" % (self.project_key),
-                params = {"connectionName": connection_name} )
+                params={"connectionName": connection_name} )
 
         def to_schema_table_pair(x):
             return {"schema":x.get("databaseName", None), "table":x["table"]}
@@ -1598,11 +1602,29 @@ def to_schema_table_pair(x):
     ########################################################
     # App designer
     ########################################################
-
     def get_app_manifest(self):
         raw_data = self.client._perform_json("GET", "/projects/%s/app-manifest" % self.project_key)
         return DSSAppManifest(self.client, raw_data, self.project_key)
 
+    # MLflow experiment tracking
+    ########################################################
+    def setup_mlflow(self, managed_folder, host=None):
+        """
+        Setup the dss-plugin for MLflow
+
+        :param object managed_folder: a :class:`dataikuapi.dss.DSSManagedFolder` where MLflow artifacts should be stored.
+        :param str host: setup a custom host if the backend used is not DSS
+        """
+        return MLflowHandle(client=self.client, project=self, managed_folder=managed_folder, host=host)
+
+    def get_mlflow_extension(self):
+        """
+        Get a handle to interact with the extension of MLflow provided by DSS
+
+        :returns: A :class:`dataikuapi.dss.mlflow.DSSMLflowExtension` Mlflow Extension handle
+
+        """
+        return DSSMLflowExtension(client=self.client, project_key=self.project_key)
 
 class TablesImportDefinition(object):
     """
 
@@ -122,7 +122,7 @@ def get_origin_ml_task(self):
         if fmi is not None:
             return DSSMLTask.from_full_model_id(self.client, fmi, project_key=self.project_key)
 
-    def import_mlflow_version_from_path(self, version_id, path, code_env_name="INHERIT"):
+    def import_mlflow_version_from_path(self, version_id, path, code_env_name="INHERIT", container_exec_config_name="INHERIT"):
         """
         Create a new version for this saved model from a path containing a MLFlow model.
 
@@ -133,6 +133,10 @@ def import_mlflow_version_from_path(self, version_id, path, code_env_name="INHER
         :param str code_env_name: Name of the code env to use for this model version. The code env must contain at least
                                   mlflow and the package(s) corresponding to the used MLFlow-compatible frameworks.
                                   If value is "INHERIT", the default active code env of the project will be used
+        :param str container_exec_config_name: Name of the containerized execution configuration to use while creating
+                                  this model version.
+                                  If value is "INHERIT", the container execution configuration of the project will be used.
+                                  If value is "NONE", local execution will be used (no container)
         :return a :class:MLFlowVersionHandler in order to interact with the new MLFlow model version
         """
         # TODO: Add a check that it's indeed a MLFlow model folder
@@ -144,13 +148,13 @@ def import_mlflow_version_from_path(self, version_id, path, code_env_name="INHER
             archive_filename = _make_zipfile(os.path.join(archive_temp_dir, "tmpmodel.zip"), path)
 
             with open(archive_filename, "rb") as fp:
-                self.client._perform_empty("POST", "/projects/%s/savedmodels/%s/versions/%s?codeEnvName=%s" % (self.project_key, self.sm_id, version_id, code_env_name),
+                self.client._perform_empty("POST", "/projects/%s/savedmodels/%s/versions/%s?codeEnvName=%s&containerExecConfigName=%s" % (self.project_key, self.sm_id, version_id, code_env_name, container_exec_config_name),
                                            files={"file": (archive_filename, fp)})
             return self.get_mlflow_version_handler(version_id)
         finally:
             shutil.rmtree(archive_temp_dir)
 
-    def import_mlflow_version_from_managed_folder(self, version_id, managed_folder, path, code_env_name="INHERIT"):
+    def import_mlflow_version_from_managed_folder(self, version_id, managed_folder, path, code_env_name="INHERIT", container_exec_config_name="INHERIT"):
         """
         Create a new version for this saved model from a path containing a MLFlow model in a managed folder.
 
@@ -162,6 +166,10 @@ def import_mlflow_version_from_managed_folder(self, version_id, managed_folder,
         :param str code_env_name: Name of the code env to use for this model version. The code env must contain at least
                                   mlflow and the package(s) corresponding to the used MLFlow-compatible frameworks.
                                   If value is "INHERIT", the default active code env of the project will be used
+        :param str container_exec_config_name: Name of the containerized execution configuration to use for evaluating
+                                  this model version.
+                                  If value is "INHERIT", the container execution configuration of the project will be used.
+                                  If value is "NONE", local execution will be used (no container)
         :return a :class:MLFlowVersionHandler in order to interact with the new MLFlow model version
         """
         # TODO: Add a check that it's indeed a MLFlow model folder
@@ -172,8 +180,8 @@ def import_mlflow_version_from_managed_folder(self, version_id, managed_folder,
             folder_ref = managed_folder
 
         self.client._perform_empty(
-            "POST", "/projects/{project_id}/savedmodels/{saved_model_id}/versions/{version_id}?codeEnvName={codeEnvName}".format(
-                project_id=self.project_key, saved_model_id=self.sm_id, version_id=version_id, codeEnvName=code_env_name
+            "POST", "/projects/{project_id}/savedmodels/{saved_model_id}/versions/{version_id}?codeEnvName={codeEnvName}&containerExecConfigName={containerExecConfigName}".format(
+                project_id=self.project_key, saved_model_id=self.sm_id, version_id=version_id, codeEnvName=code_env_name, containerExecConfigName=container_exec_config_name
             ),
             params={"folderRef": folder_ref, "path": path},
             files={"file": (None, None)}  # required for backend-mandated multipart request
@@ -343,21 +351,24 @@ def set_core_metadata(self,
             "/projects/%s/savedmodels/%s/versions/%s/external-ml/metadata" % (self.saved_model.project_key, self.saved_model.sm_id, self.version_id),
             body=metadata)
 
-    def evaluate(self, dataset_ref):
+    def evaluate(self, dataset_ref, container_exec_config_name="INHERIT"):
         """
         Evaluates the performance of this model version on a particular dataset.
         After calling this, the "result screens" of the MLFlow model version will be available
         (confusion matrix, error distribution, performance metrics, ...)
         and more information will be available when calling :meth:`DSSSavedModel.get_version_details`
 
         :meth:`set_core_metadata` must be called before you can evaluate a dataset
-
         :param str dataset_ref: Evaluation dataset to use (either a dataset name, "PROJECT.datasetName", :class:`DSSDataset` instance or :class:`dataiku.Dataset` instance)
+        :param str container_exec_config_name: Name of the containerized execution configuration to use for running the evaluation process.
+                                  If value is "INHERIT", the container execution configuration of the project will be used.
+                                  If value is "NONE", local execution will be used (no container)
         """
         if hasattr(dataset_ref, 'name'):
             dataset_ref = dataset_ref.name
         req = {
-            "datasetRef" : dataset_ref
+            "datasetRef": dataset_ref,
+            "containerExecConfigName": container_exec_config_name
         }
         self.saved_model.client._perform_empty("POST",
             "/projects/%s/savedmodels/%s/versions/%s/external-ml/actions/evaluate" % (self.saved_model.project_key, self.saved_model.sm_id, self.version_id),
 
@@ -0,0 +1 @@
+from .utils import MLflowHandle