Skip to content

Commit 3e0eb00

Browse files
authored
Merge pull request #205 from dataiku/feature/mes-mc-10.0.3
MES / MC updates
2 parents 7ddb29c + 775b9d8 commit 3e0eb00

File tree

3 files changed

+126
-3
lines changed

3 files changed

+126
-3
lines changed

dataikuapi/dss/modelcomparison.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from dataikuapi.dss.discussion import DSSObjectDiscussions
2+
import re
23

34

45
class DSSModelComparison(object):
@@ -36,6 +37,36 @@ def get_object_discussions(self):
3637
"""
3738
return DSSObjectDiscussions(self.client, self.project_key, "MODEL_COMPARISON", self.mec_id)
3839

40+
def get_evaluation_like_from_full_id(self, full_id):
41+
"""
42+
Retrieves a Saved Model from the flow, a Lab Model from an Analysis or a Model Evaluation from a Model Evaluation Store) using its full id.
43+
44+
:param string full_id: the full id of the item to retrieve
45+
46+
:returns: A handle on the Saved Model, the Model Evaluation or the Lab Model
47+
:rtype: :class:`dataikuapi.dss.savedmodel.DSSSavedModel`
48+
:rtype: :class:`dataikuapi.dss.modelevaluationstore.DSSModelEvaluation`
49+
:rtype: :class:`dataikuapi.dss.ml.DSSTrainedPredictionModelDetails`
50+
"""
51+
52+
saved_model_pattern = re.compile("^S-(\\w+)-(\\w+)-(\\w+)(?:-part-(\\w+)-(v?\\d+))?$\\Z")
53+
analysis_model_pattern = re.compile("^A-(\\w+)-(\\w+)-(\\w+)-(s[0-9]+)-(pp[0-9]+(?:-part-(\\w+)|-base)?)-(m[0-9]+)$\\Z")
54+
model_evaluation_pattern = re.compile("^ME-(\\w+)-(\\w+)-(\\w+)$\\Z")
55+
56+
if saved_model_pattern.match(full_id):
57+
return self.project.get_saved_model(full_id)
58+
elif model_evaluation_pattern.match(full_id):
59+
mes_id = full_id.split('-')[2]
60+
evaluation_id = full_id.split('-')[3]
61+
mes = self.project.get_model_evaluation_store(mes_id)
62+
return mes.get_model_evaluation(evaluation_id)
63+
elif analysis_model_pattern.match(full_id):
64+
analysis_id = full_id.split('-')[2]
65+
task_id = full_id.split('-')[3]
66+
return self.project.get_ml_task(analysis_id, task_id).get_trained_model_details(full_id)
67+
68+
raise ValueError("{} is not a valid full model id or full model evaluation id.".format(full_id))
69+
3970
########################################################
4071
# Deletion
4172
########################################################

dataikuapi/dss/project.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1404,6 +1404,8 @@ def new_recipe(self, type, name=None):
14041404
return recipe.PredictionScoringRecipeCreator(name, self)
14051405
elif type == "evaluation":
14061406
return recipe.EvaluationRecipeCreator(name, self)
1407+
elif type == "standalone_evaluation":
1408+
return recipe.StandaloneEvaluationRecipeCreator(name, self)
14071409
elif type == "clustering_scoring":
14081410
return recipe.ClusteringScoringRecipeCreator(name, self)
14091411
elif type == "download":

dataikuapi/dss/recipe.py

Lines changed: 93 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1328,10 +1328,10 @@ class EvaluationRecipeCreator(DSSRecipeCreator):
13281328
13291329
.. code-block:: python
13301330
1331-
# Create a new prediction scoring recipe outputing to a new dataset
1331+
# Create a new evaluation recipe outputing to a new dataset, to a metrics dataset and/or to a model evaluation store
13321332
13331333
project = client.get_project("MYPROJECT")
1334-
builder = EvaluationRecipeCreator("my_scoring_recipe", project)
1334+
builder = project.new_recipe("evaluation")
13351335
builder.with_input_model(saved_model_id)
13361336
builder.with_input("dataset_to_evaluate")
13371337
@@ -1340,7 +1340,28 @@ class EvaluationRecipeCreator(DSSRecipeCreator):
13401340
builder.with_output_evaluation_store(evaluation_store_id)
13411341
13421342
new_recipe = builder.build()
1343-
1343+
1344+
# Access the settings
1345+
1346+
er_settings = new_recipe.get_settings()
1347+
payload = er_settings.obj_payload
1348+
1349+
# Change the settings
1350+
1351+
payload['dontComputePerformance'] = True
1352+
payload['outputProbabilities'] = False
1353+
payload['metrics'] = ["precision", "recall", "auc", "f1", "costMatrixGain"]
1354+
1355+
# Manage evaluation labels
1356+
1357+
payload['labels'] = [dict(key="label_1", value="value_1"), dict(key="label_2", value="value_2")]
1358+
1359+
# Save the settings and run the recipe
1360+
1361+
er_settings.save()
1362+
1363+
new_recipe.run()
1364+
13441365
Outputs must exist. They can be created using the following:
13451366
13461367
.. code-block:: python
@@ -1376,6 +1397,75 @@ def with_output_evaluation_store(self, mes_id):
13761397
return self._with_output(mes_id, role="evaluationStore")
13771398

13781399

1400+
class StandaloneEvaluationRecipeCreator(DSSRecipeCreator):
1401+
"""
1402+
Builder for the creation of a new "Standalone Evaluate" recipe, from an
1403+
input dataset
1404+
1405+
.. code-block:: python
1406+
1407+
# Create a new standalone evaluation of a scored dataset
1408+
1409+
project = client.get_project("MYPROJECT")
1410+
builder = project.new_recipe("standalone_evaluation")
1411+
builder.with_input("scored_dataset_to_evaluate")
1412+
builder.with_output_evaluation_store(evaluation_store_id)
1413+
1414+
new_recipe = builder.create()
1415+
1416+
# Modify the model parameters in the SER settings
1417+
1418+
ser_settings = new_recipe.get_settings()
1419+
payload = ser_settings.obj_payload
1420+
1421+
payload['predictionType'] = "BINARY_CLASSIFICATION"
1422+
payload['targetVariable'] = "Survived"
1423+
payload['predictionVariable'] = "prediction"
1424+
payload['isProbaAware'] = True
1425+
payload['dontComputePerformance'] = False
1426+
1427+
# For a classification model with probabilities, the 'probas' section can be filled with the mapping of the class and the probability column
1428+
# e.g. for a binary classification model with 2 columns: proba_0 and proba_1
1429+
1430+
class_0 = dict(key=0, value="proba_0")
1431+
class_1 = dict(key=1, value="proba_1")
1432+
payload['probas'] = [class_0, class_1]
1433+
1434+
# Change the 'features' settings for this standalone evaluation
1435+
# e.g. reject the features that you do not want to use in the evaluation
1436+
1437+
feature_passengerid = dict(name="Passenger_Id", role="REJECT", type="TEXT")
1438+
feature_ticket = dict(name="Ticket", role="REJECT", type="TEXT")
1439+
feature_cabin = dict(name="Cabin", role="REJECT", type="TEXT")
1440+
1441+
payload['features'] = [feature_passengerid, feature_ticket, feature_cabin]
1442+
1443+
# To set the cost matrix properly, access the 'metricParams' section of the payload and set the cost matrix weights:
1444+
1445+
payload['metricParams'] = dict(costMatrixWeights=dict(tpGain=0.4, fpGain=-1.0, tnGain=0.2, fnGain=-0.5))
1446+
1447+
# Save the recipe and run the recipe
1448+
# Note that with this method, all the settings that were not explicitly set are instead set to their default value.
1449+
1450+
ser_settings.save()
1451+
1452+
new_recipe.run()
1453+
1454+
Output model evaluation store must exist. It can be created using the following:
1455+
1456+
.. code-block:: python
1457+
1458+
evaluation_store_id = project.create_model_evaluation_store("output_model_evaluation").mes_id
1459+
"""
1460+
1461+
def __init__(self, name, project):
1462+
DSSRecipeCreator.__init__(self, 'standalone_evaluation', name, project)
1463+
1464+
def with_output_evaluation_store(self, mes_id):
1465+
"""Sets the output model evaluation store"""
1466+
return self._with_output(mes_id, role="main")
1467+
1468+
13791469
class ClusteringScoringRecipeCreator(SingleOutputRecipeCreator):
13801470
"""
13811471
Builder for the creation of a new "Clustering scoring" recipe, from an

0 commit comments

Comments
 (0)