Merge pull request #205 from dataiku/feature/mes-mc-10.0.3

lpenet · web-flow · commit 3e0eb00dc9f0 · 2022-01-17T16:32:40.000+01:00
MES / MC updates
diff --git a/dataikuapi/dss/modelcomparison.py b/dataikuapi/dss/modelcomparison.py
@@ -1,4 +1,5 @@
 from dataikuapi.dss.discussion import DSSObjectDiscussions
+import re
 
 
 class DSSModelComparison(object):
@@ -36,6 +37,36 @@ def get_object_discussions(self):
         """
         return DSSObjectDiscussions(self.client, self.project_key, "MODEL_COMPARISON", self.mec_id)
 
+    def get_evaluation_like_from_full_id(self, full_id):
+        """
+        Retrieves a Saved Model from the flow, a Lab Model from an Analysis or a Model Evaluation from a Model Evaluation Store) using its full id.
+
+        :param string full_id: the full id of the item to retrieve
+
+        :returns: A handle on the Saved Model, the Model Evaluation or the Lab Model
+        :rtype: :class:`dataikuapi.dss.savedmodel.DSSSavedModel`
+        :rtype: :class:`dataikuapi.dss.modelevaluationstore.DSSModelEvaluation`
+        :rtype: :class:`dataikuapi.dss.ml.DSSTrainedPredictionModelDetails`
+        """
+
+        saved_model_pattern = re.compile("^S-(\\w+)-(\\w+)-(\\w+)(?:-part-(\\w+)-(v?\\d+))?$\\Z")
+        analysis_model_pattern = re.compile("^A-(\\w+)-(\\w+)-(\\w+)-(s[0-9]+)-(pp[0-9]+(?:-part-(\\w+)|-base)?)-(m[0-9]+)$\\Z")
+        model_evaluation_pattern = re.compile("^ME-(\\w+)-(\\w+)-(\\w+)$\\Z")
+
+        if saved_model_pattern.match(full_id):
+            return self.project.get_saved_model(full_id)
+        elif model_evaluation_pattern.match(full_id):
+            mes_id = full_id.split('-')[2]
+            evaluation_id = full_id.split('-')[3]
+            mes = self.project.get_model_evaluation_store(mes_id)
+            return mes.get_model_evaluation(evaluation_id)
+        elif analysis_model_pattern.match(full_id):
+            analysis_id = full_id.split('-')[2]
+            task_id = full_id.split('-')[3]
+            return self.project.get_ml_task(analysis_id, task_id).get_trained_model_details(full_id)
+
+        raise ValueError("{} is not a valid full model id or full model evaluation id.".format(full_id))
+
     ########################################################
     # Deletion
     ########################################################
diff --git a/dataikuapi/dss/project.py b/dataikuapi/dss/project.py
@@ -1404,6 +1404,8 @@ def new_recipe(self, type, name=None):
             return recipe.PredictionScoringRecipeCreator(name, self)
         elif type == "evaluation":
             return recipe.EvaluationRecipeCreator(name, self)
+        elif type == "standalone_evaluation":
+            return recipe.StandaloneEvaluationRecipeCreator(name, self)
         elif type == "clustering_scoring":
             return recipe.ClusteringScoringRecipeCreator(name, self)
         elif type == "download":
diff --git a/dataikuapi/dss/recipe.py b/dataikuapi/dss/recipe.py
@@ -1328,10 +1328,10 @@ class EvaluationRecipeCreator(DSSRecipeCreator):
 
     .. code-block:: python
 
-        # Create a new prediction scoring recipe outputing to a new dataset
+        # Create a new evaluation recipe outputing to a new dataset, to a metrics dataset and/or to a model evaluation store
 
         project = client.get_project("MYPROJECT")
-        builder = EvaluationRecipeCreator("my_scoring_recipe", project)
+        builder = project.new_recipe("evaluation")
         builder.with_input_model(saved_model_id)
         builder.with_input("dataset_to_evaluate")
 
@@ -1340,7 +1340,28 @@ class EvaluationRecipeCreator(DSSRecipeCreator):
         builder.with_output_evaluation_store(evaluation_store_id)
 
         new_recipe = builder.build()
-    
+
+        # Access the settings
+
+        er_settings = new_recipe.get_settings()
+        payload = er_settings.obj_payload
+
+        # Change the settings
+
+        payload['dontComputePerformance'] = True
+        payload['outputProbabilities'] = False
+        payload['metrics'] = ["precision", "recall", "auc", "f1", "costMatrixGain"]
+
+        # Manage evaluation labels
+
+        payload['labels'] = [dict(key="label_1", value="value_1"), dict(key="label_2", value="value_2")]
+
+        # Save the settings and run the recipe
+
+        er_settings.save()
+
+        new_recipe.run()
+
     Outputs must exist. They can be created using the following:
 
     .. code-block:: python
@@ -1376,6 +1397,75 @@ def with_output_evaluation_store(self, mes_id):
         return self._with_output(mes_id, role="evaluationStore")
 
 
+class StandaloneEvaluationRecipeCreator(DSSRecipeCreator):
+    """
+    Builder for the creation of a new "Standalone Evaluate" recipe, from an
+    input dataset
+
+    .. code-block:: python
+
+        # Create a new standalone evaluation of a scored dataset
+
+        project = client.get_project("MYPROJECT")
+        builder = project.new_recipe("standalone_evaluation")
+        builder.with_input("scored_dataset_to_evaluate")
+        builder.with_output_evaluation_store(evaluation_store_id)
+
+        new_recipe = builder.create()
+
+        # Modify the model parameters in the SER settings
+
+        ser_settings = new_recipe.get_settings()
+        payload = ser_settings.obj_payload
+
+        payload['predictionType'] = "BINARY_CLASSIFICATION"
+        payload['targetVariable'] = "Survived"
+        payload['predictionVariable'] = "prediction"
+        payload['isProbaAware'] = True
+        payload['dontComputePerformance'] = False
+
+        # For a classification model with probabilities, the 'probas' section can be filled with the mapping of the class and the probability column
+        # e.g. for a binary classification model with 2 columns: proba_0 and proba_1
+
+        class_0 = dict(key=0, value="proba_0")
+        class_1 = dict(key=1, value="proba_1")
+        payload['probas'] = [class_0, class_1]
+
+        # Change the 'features' settings for this standalone evaluation
+        # e.g. reject the features that you do not want to use in the evaluation
+
+        feature_passengerid = dict(name="Passenger_Id", role="REJECT", type="TEXT")
+        feature_ticket = dict(name="Ticket", role="REJECT", type="TEXT")
+        feature_cabin = dict(name="Cabin", role="REJECT", type="TEXT")
+
+        payload['features'] = [feature_passengerid, feature_ticket, feature_cabin]
+
+        # To set the cost matrix properly, access the 'metricParams' section of the payload and set the cost matrix weights:
+
+        payload['metricParams'] = dict(costMatrixWeights=dict(tpGain=0.4, fpGain=-1.0, tnGain=0.2, fnGain=-0.5))
+
+        # Save the recipe and run the recipe
+        # Note that with this method, all the settings that were not explicitly set are instead set to their default value.
+
+        ser_settings.save()
+
+        new_recipe.run()
+
+    Output model evaluation store must exist. It can be created using the following:
+
+    .. code-block:: python
+
+        evaluation_store_id = project.create_model_evaluation_store("output_model_evaluation").mes_id
+    """
+
+    def __init__(self, name, project):
+        DSSRecipeCreator.__init__(self, 'standalone_evaluation', name, project)
+
+    def with_output_evaluation_store(self, mes_id):
+        """Sets the output model evaluation store"""
+        return self._with_output(mes_id, role="main")
+
+
 class ClusteringScoringRecipeCreator(SingleOutputRecipeCreator):
     """
     Builder for the creation of a new "Clustering scoring" recipe, from an