dataiku
diff --git a/‎dataikuapi/dss/dataset.py‎
Lines changed: 19 additions & 0 deletions b/‎dataikuapi/dss/dataset.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎dataikuapi/dss/feature_store.py‎
Lines changed: 40 additions & 0 deletions b/‎dataikuapi/dss/feature_store.py‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎dataikuapi/dss/mlflow.py‎
Lines changed: 200 additions & 0 deletions b/‎dataikuapi/dss/mlflow.py‎
Lines changed: 200 additions & 0 deletions
diff --git a/‎dataikuapi/dss/project.py‎
Lines changed: 27 additions & 5 deletions b/‎dataikuapi/dss/project.py‎
Lines changed: 27 additions & 5 deletions
diff --git a/‎dataikuapi/dss/recipe.py‎
Lines changed: 29 additions & 0 deletions b/‎dataikuapi/dss/recipe.py‎
Lines changed: 29 additions & 0 deletions
@@ -737,6 +737,25 @@ def add_time_partitioning_dimension(self, dim_name, period="DAY"):
     def add_raw_schema_column(self, column):
         self.settings["schema"]["columns"].append(column)
 
+    @property
+    def is_feature_group(self):
+        """
+        Indicates whether the Dataset is defined as a Feature Group, available in the Feature Store.
+
+        :rtype: bool
+        """
+        return self.settings["featureGroup"]
+
+    def set_feature_group(self, status):
+        """
+        (Un)sets the dataset as a Feature Group, available in the Feature Store.
+        Changes of this property will be applied when calling :meth:`save` and require the "Manage Feature Store" permission.
+
+        :param status: whether the dataset should be defined as a feature group
+        :type status: bool
+        """
+        self.settings["featureGroup"] = status
+
     def save(self):
         self.dataset.client._perform_empty(
                 "PUT", "/projects/%s/datasets/%s" % (self.dataset.project_key, self.dataset.dataset_name),
 
@@ -0,0 +1,40 @@
+from dataikuapi.dss.dataset import DSSDataset
+
+
+class DSSFeatureGroupListItem(object):
+    def __init__(self, client, project_key, name):
+        self.client = client
+        self.project_key = project_key
+        self.name = name
+
+    @property
+    def id(self):
+        return self.project_key + "." + self.name
+
+    def get_as_dataset(self):
+        """
+        Gets the feature group as a dataset
+
+        :return: a handle on the dataset
+        :rtype: :class:`dataikuapi.dss.dataset.DSSDataset`
+        """
+        return DSSDataset(self.client, self.project_key, self.name)
+
+
+class DSSFeatureStore(object):
+    def __init__(self, client):
+        """
+        A handle on the Feature Store.
+        Do not create this class directly, use :meth:`DSSClient.get_feature_store`
+        """
+        self.client = client
+
+    def list_feature_groups(self):
+        """
+        Get a list of feature groups on which the user has at least read permissions
+
+        :return: list of feature groups
+        :rtype: list of :class:`dataikuapi.feature_store.DSSFeatureGroupListItem`
+        """
+        items = self.client._perform_json("GET", "/feature-store/feature-groups")
+        return [DSSFeatureGroupListItem(self.client, item["projectKey"], item["name"]) for item in items]
@@ -0,0 +1,200 @@
+import json
+
+class DSSMLflowExtension(object):
+    """
+    A handle to interact with specific endpoints of the DSS MLflow integration.
+
+    Do not create this directly, use :meth:`dataikuapi.dss.DSSProject.get_mlflow_extension`
+    """
+
+    def __init__(self, client, project_key):
+        self.client = client
+        self.project = client.get_project(project_key)
+        self.project_key = project_key
+
+    def list_models(self, run_id):
+        """
+        Returns the list of models of given run
+
+        :param run_id: run_id for which to return a list of models
+        :type run_id: str
+        """
+        response = self.client._perform_http(
+            "GET", "/api/2.0/mlflow/extension/models/{}".format(run_id),
+            headers={"x-dku-mlflow-project-key": self.project_key}
+        )
+        return response.json()
+
+    def list_experiments(self, view_type="ACTIVE_ONLY", max_results=1000):
+        """
+        Returns the list of experiments in the DSS project for which MLflow integration
+        is setup
+
+        :param view_type: ACTIVE_ONLY, DELETED_ONLY or ALL
+        :type view_type: str
+        :param max_results: max results count
+        :type max_results: int
+        :rtype: dict
+        """
+        response = self.client._perform_http(
+            "GET", "/api/2.0/mlflow/experiments/list?view_type={view_type}&max_results={max_results}".format(view_type=view_type, max_results=max_results),
+            headers={"x-dku-mlflow-project-key": self.project_key}
+        )
+        return response.json()
+
+    def rename_experiment(self, experiment_id, new_name):
+        """
+        Renames an experiment
+
+        :param experiment_id: experiment id
+        :type experiment_id: str
+        :param new_name: new name
+        :type new_name: str
+        """
+        response = self.client._perform_http(
+            "POST", "/api/2.0/mlflow/experiments/update",
+            headers={"x-dku-mlflow-project-key": self.project_key},
+            body={"experiment_id": experiment_id, "new_name": new_name}
+        )
+        return response.json()
+
+    def restore_experiment(self, experiment_id):
+        """
+        Restores a deleted experiment
+
+        :param experiment_id: experiment id
+        :type experiment_id: str
+        """
+        response = self.client._perform_http(
+            "POST", "/api/2.0/mlflow/experiments/restore",
+            headers={"x-dku-mlflow-project-key": self.project_key},
+            body={"experiment_id": experiment_id}
+        )
+        return response.json()
+
+    def restore_run(self, run_id):
+        """
+        Restores a deleted run
+
+        :param run_id: run id
+        :type run_id: str
+        """
+        response = self.client._perform_http(
+            "POST", "/api/2.0/mlflow/runs/restore",
+            headers={"x-dku-mlflow-project-key": self.project_key},
+            body={"run_id": run_id}
+        )
+        return response.json()
+
+    def garbage_collect(self):
+        """
+        Permanently deletes the experiments and runs marked as "Deleted"
+        """
+        self.client._perform_http(
+            "GET", "/api/2.0/mlflow/extension/garbage-collect",
+            headers={"x-dku-mlflow-project-key": self.project_key}
+        )
+
+    def create_experiment_tracking_dataset(self, dataset_name, experiment_ids=[], view_type="ACTIVE_ONLY", filter_expr="", order_by=[], format="LONG"):
+        """
+
+        Creates a virtual dataset exposing experiment tracking data.
+
+        :param dataset_name: name of the dataset
+        :type dataset_name: str
+        :param experiment_ids: list of ids of experiments to filter on. No filtering if empty
+        :type experiment_ids: list(str)
+        :param view_type: one of ACTIVE_ONLY, DELETED_ONLY and ALL. Default is ACTIVE_ONLY
+        :type view_type: str
+        :param filter_expr: MLflow search expression
+        :type filter_expr: str
+        :param order_by: list of order by clauses. Default is ordered by start_time, then runId
+        :type order_by: list(str)
+        :param format: LONG or JSON. Default is LONG
+        :type format: str
+        """
+        self.client._perform_http(
+            "POST", "/api/2.0/mlflow/extension/create-project-experiments-dataset",
+            headers={"x-dku-mlflow-project-key": self.project_key},
+            body={
+                "datasetName": dataset_name,
+                "experimentIds": experiment_ids,
+                "viewType": view_type,
+                "filter": filter_expr,
+                "orderBy": order_by,
+                "format": format
+            }
+        )
+
+    def clean_experiment_tracking_db(self):
+        """
+        Cleans the experiments, runs, params, metrics, tags, etc. for this project
+
+        This call requires an API key with admin rights
+        """
+        self.client._perform_raw("DELETE", "/api/2.0/mlflow/extension/clean-db/%s" % self.project_key)
+
+    def set_run_inference_info(self, run_id, model_type, classes=None, code_env_name=None, target=None):
+        """
+        Sets the type of the model, and optionally other information useful to deploy or evaluate it.
+
+        model_type must be one of:
+        - REGRESSION
+        - BINARY_CLASSIFICATION
+        - MULTICLASS
+        - OTHER
+
+        Classes must be specified if and only if the model is a BINARY_CLASSIFICATION or MULTICLASS model.
+
+        This information is leveraged to filter saved models on their prediction type and prefill the classes
+        when deploying using the GUI an MLflow model as a version of a DSS Saved Model.
+
+        :param model_type: prediction type (see doc)
+        :type model_type: str
+        :param run_id: run_id for which to set the classes
+        :type run_id: str
+        :param classes: ordered list of classes (not for all prediction types, see doc)
+        :type classes: list(str)
+        :param code_env_name: name of an adequate DSS python code environment
+        :type code_env_name: str
+        :param target: name of the target
+        :type target: str
+        """
+        if model_type not in {"REGRESSION", "BINARY_CLASSIFICATION", "MULTICLASS", "OTHER"}:
+            raise ValueError('Invalid prediction type: {}'.format(model_type))
+
+        if classes and model_type not in {"BINARY_CLASSIFICATION", "MULTICLASS"}:
+            raise ValueError('Classes can be specified only for BINARY_CLASSIFICATION or MULTICLASS prediction types')
+        if model_type in {"BINARY_CLASSIFICATION", "MULTICLASS"}:
+            if not classes:
+                raise ValueError('Classes must be specified for {} prediction type'.format(model_type))
+            if not isinstance(classes, list):
+                raise ValueError('Wrong type for classes: {}'.format(type(classes)))
+            for cur_class in classes:
+                if cur_class is None:
+                    raise ValueError('class can not be None')
+                if not isinstance(cur_class, str):
+                    raise ValueError('Wrong type for class {}: {}'.format(cur_class, type(cur_class)))
+
+        if code_env_name and not isinstance(code_env_name, str):
+            raise ValueError('code_env_name must be a string')
+        if target and not isinstance(target, str):
+            raise ValueError('target must be a string')
+
+        params = {
+            "run_id": run_id,
+            "prediction_type": model_type
+        }
+
+        if classes:
+            params["classes"] = json.dumps(classes)
+        if code_env_name:
+            params["code_env_name"] = code_env_name
+        if target:
+            params["target"] = target
+
+        self.client._perform_http(
+            "POST", "/api/2.0/mlflow/extension/set-run-inference-info",
+            headers={"x-dku-mlflow-project-key": self.project_key},
+            body=params
+        )
@@ -1,4 +1,7 @@
-import time, warnings, sys, os.path as osp
+import warnings, os.path as osp
+
+from ..dss_plugin_mlflow import MLflowHandle
+
 from .dataset import DSSDataset, DSSDatasetListItem, DSSManagedDatasetCreationHelper
 from .modelcomparison import DSSModelComparison
 from .jupyternotebook import DSSJupyterNotebook, DSSJupyterNotebookListItem
@@ -9,6 +12,7 @@
 from .managedfolder import DSSManagedFolder
 from .savedmodel import DSSSavedModel
 from .modelevaluationstore import DSSModelEvaluationStore
+from .mlflow import DSSMLflowExtension
 from .job import DSSJob, DSSJobWaiter
 from .scenario import DSSScenario, DSSScenarioListItem
 from .continuousactivity import DSSContinuousActivity
@@ -30,8 +34,8 @@ class DSSProject(object):
     Do not create this class directly, instead use :meth:`dataikuapi.DSSClient.get_project`
     """
     def __init__(self, client, project_key):
-       self.client = client
-       self.project_key = project_key
+        self.client = client
+        self.project_key = project_key
 
     def get_summary(self):
         """
@@ -1589,7 +1593,7 @@ def list_hive_tables(self, hive_database):
         """
         connection_name = "@virtual(hive-jdbc):" + hive_database
         ret = self.client._perform_json("GET", "/projects/%s/datasets/tables-import/actions/list-tables" % (self.project_key),
-                params = {"connectionName": connection_name} )
+                params={"connectionName": connection_name} )
 
         def to_schema_table_pair(x):
             return {"schema":x.get("databaseName", None), "table":x["table"]}
@@ -1598,11 +1602,29 @@ def to_schema_table_pair(x):
     ########################################################
     # App designer
     ########################################################
-
     def get_app_manifest(self):
         raw_data = self.client._perform_json("GET", "/projects/%s/app-manifest" % self.project_key)
         return DSSAppManifest(self.client, raw_data, self.project_key)
 
+    # MLflow experiment tracking
+    ########################################################
+    def setup_mlflow(self, managed_folder, host=None):
+        """
+        Setup the dss-plugin for MLflow
+
+        :param object managed_folder: a :class:`dataikuapi.dss.DSSManagedFolder` where MLflow artifacts should be stored.
+        :param str host: setup a custom host if the backend used is not DSS
+        """
+        return MLflowHandle(client=self.client, project=self, managed_folder=managed_folder, host=host)
+
+    def get_mlflow_extension(self):
+        """
+        Get a handle to interact with the extension of MLflow provided by DSS
+
+        :returns: A :class:`dataikuapi.dss.mlflow.DSSMLflowExtension` Mlflow Extension handle
+
+        """
+        return DSSMLflowExtension(client=self.client, project_key=self.project_key)
 
 class TablesImportDefinition(object):
     """
 
@@ -3,6 +3,10 @@
 from .discussion import DSSObjectDiscussions
 import json, logging, warnings
 from .utils import DSSTaggableObjectListItem, DSSTaggableObjectSettings
+try:
+    basestring
+except NameError:
+    basestring = str
 
 class DSSRecipeListItem(DSSTaggableObjectListItem):
     """An item in a list of recipes. Do not instantiate this class, use :meth:`dataikuapi.dss.project.DSSProject.list_recipes`"""
@@ -34,6 +38,11 @@ def __init__(self, client, project_key, recipe_name):
         self.project_key = project_key
         self.recipe_name = recipe_name
 
+    @property
+    def id(self):
+        """The id of the recipe"""
+        return self.recipe_name
+
     @property
     def name(self):
         """The name of the recipe"""
@@ -225,6 +234,16 @@ def get_continuous_activity(self):
         from .continuousactivity import DSSContinuousActivity
         return DSSContinuousActivity(self.client, self.project_key, self.recipe_name)
 
+    def move_to_zone(self, zone):
+        """
+        Moves this object to a flow zone
+
+        :param object zone: a :class:`dataikuapi.dss.flow.DSSFlowZone` where to move the object
+        """
+        if isinstance(zone, basestring):
+           zone = self.client.get_project(self.project_key).get_flow().get_zone(zone)
+        zone.add_item(self)
+
 class DSSRecipeStatus(object):
     """Status of a recipce.
     Do not create that directly, use :meth:`DSSRecipe.get_status`"""
@@ -1411,6 +1430,12 @@ class StandaloneEvaluationRecipeCreator(DSSRecipeCreator):
         builder.with_input("scored_dataset_to_evaluate")
         builder.with_output_evaluation_store(evaluation_store_id)
 
+        # Add a reference dataset (optional) to compute data drift
+
+        builder.with_reference_dataset("reference_dataset")
+
+        # Finish creation of the recipe
+
         new_recipe = builder.create()
 
         # Modify the model parameters in the SER settings
@@ -1465,6 +1490,10 @@ def with_output_evaluation_store(self, mes_id):
         """Sets the output model evaluation store"""
         return self._with_output(mes_id, role="main")
 
+    def with_reference_dataset(self, dataset_name):
+        """Sets the dataset to use as a reference in data drift computation (optional)."""
+        return self._with_input(dataset_name, self.project.project_key, role="reference")
+
 
 class ClusteringScoringRecipeCreator(SingleOutputRecipeCreator):
     """