Skip to content

Commit c95e7ce

Browse files
lpenetalariveValentin Thoreyinstanceofmefcandela-dku
authored
MLflow for DSS 10.0.3 (#206)
* Support DSSDataset or dataiku.Dataset in MLFlowVersionHandler.evaluate (#189) * Support DSSDataset or dataiku.Dataset in MLFlowVersionHandler.evaluate * Adjust a comment * upload_folder function for managed folders (#186) * upload_folder function for managed folders * Add import from managed folder for mlflow * Improve doc mlflow * Improve doc mlflow * Add possibility to load mlflow version from managed_folder object * Improve upload_folder utils function * Rename smartFolderId into folderRef * Remove useless argument check * Update documentation Co-authored-by: Adrien Lavoillotte <[email protected]> * Add inline comment Co-authored-by: Adrien Lavoillotte <[email protected]> Co-authored-by: Adrien Lavoillotte <[email protected]> * Check server side that a MLflow saved model name is not empty when creating or updating * Simplify and reword doc of upload_folder * Turn DSSDataset & dataiku.Dataset in :class:`DSSDataset` and :class:`dataiku.Dataset` Co-authored-by: Aurélien Larive <[email protected]> Co-authored-by: Valentin Thorey <[email protected]> Co-authored-by: Adrien Lavoillotte <[email protected]> Co-authored-by: Ferran Candela <[email protected]> Co-authored-by: Ferran Candela <[email protected]>
1 parent 8f166af commit c95e7ce

File tree

3 files changed

+53
-7
lines changed

3 files changed

+53
-7
lines changed

dataikuapi/dss/managedfolder.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from ..utils import DataikuUTF8CSVReader
33
from ..utils import DataikuStreamedHttpUTF8CSVReader
44
import json
5+
import os
56
from requests import utils
67
from .metrics import ComputedMetrics
78
from .future import DSSFuture
@@ -99,7 +100,7 @@ def delete_file(self, path):
99100
def put_file(self, path, f):
100101
"""
101102
Upload the file to the managed folder
102-
103+
103104
Args:
104105
f: the file contents, as a stream
105106
path: the path of the file
@@ -108,6 +109,22 @@ def put_file(self, path, f):
108109
"POST", "/projects/%s/managedfolders/%s/contents/%s" % (self.project_key, self.odb_id, utils.quote(path)),
109110
"", f)
110111

112+
def upload_folder(self, path, folder):
113+
"""
114+
Upload the content of a folder at path in the managed folder.
115+
116+
Note: upload_folder("target", "source") will result in "target" containing the content
117+
of "source", not in "target" containing "source".
118+
119+
:param str path: the destination path of the folder in the managed folder
120+
:param str folder: path (absolute or relative) of the source folder to upload
121+
"""
122+
for root, _, files in os.walk(folder):
123+
for file in files:
124+
filename = os.path.join(root, file)
125+
with open(filename, "rb") as f:
126+
self.put_file(os.path.join(path, os.path.relpath(filename, folder)), f)
127+
111128
########################################################
112129
# Managed folder actions
113130
########################################################

dataikuapi/dss/project.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -725,8 +725,6 @@ def create_mlflow_pyfunc_model(self, name, prediction_type = None):
725725
:param string name: Human readable name for the new saved model in the flow
726726
:param string prediction_type: Optional (but needed for most operations). One of BINARY_CLASSIFICATION, MULTICLASS or REGRESSION
727727
"""
728-
if not name:
729-
raise ValueError("name can not be empty")
730728
model = {
731729
"savedModelType" : "MLFLOW_PYFUNC",
732730
"predictionType" : prediction_type,

dataikuapi/dss/savedmodel.py

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from .ml import DSSMLTask
55
from .ml import DSSTrainedClusteringModelDetails
66
from .ml import DSSTrainedPredictionModelDetails
7+
from .managedfolder import DSSManagedFolder
78

89
from ..utils import _make_zipfile
910

@@ -135,7 +136,6 @@ def import_mlflow_version_from_path(self, version_id, path, code_env_name="INHER
135136
:return a :class:MLFlowVersionHandler in order to interact with the new MLFlow model version
136137
"""
137138
# TODO: Add a check that it's indeed a MLFlow model folder
138-
# TODO: cleanup the archive
139139
import shutil
140140
import os
141141

@@ -150,6 +150,36 @@ def import_mlflow_version_from_path(self, version_id, path, code_env_name="INHER
150150
finally:
151151
shutil.rmtree(archive_temp_dir)
152152

153+
def import_mlflow_version_from_managed_folder(self, version_id, managed_folder, path, code_env_name="INHERIT"):
154+
"""
155+
Create a new version for this saved model from a path containing a MLFlow model in a managed folder.
156+
157+
Requires the saved model to have been created using :meth:`dataikuapi.dss.project.DSSProject.create_mlflow_pyfunc_model`.
158+
159+
:param str version_id: Identifier of the version to create
160+
:param str managed_folder: Identifier of the managed folder or `dataikuapi.dss.managedfolder.DSSManagedFolder`
161+
:param str path: Path of the MLflow folder in the managed folder
162+
:param str code_env_name: Name of the code env to use for this model version. The code env must contain at least
163+
mlflow and the package(s) corresponding to the used MLFlow-compatible frameworks.
164+
If value is "INHERIT", the default active code env of the project will be used
165+
:return a :class:MLFlowVersionHandler in order to interact with the new MLFlow model version
166+
"""
167+
# TODO: Add a check that it's indeed a MLFlow model folder
168+
folder_ref = None
169+
if type(managed_folder) is DSSManagedFolder:
170+
folder_ref = "{}.{}".format(managed_folder.project_key, managed_folder.id)
171+
else:
172+
folder_ref = managed_folder
173+
174+
self.client._perform_empty(
175+
"POST", "/projects/{project_id}/savedmodels/{saved_model_id}/versions/{version_id}?codeEnvName={codeEnvName}".format(
176+
project_id=self.project_key, saved_model_id=self.sm_id, version_id=version_id, codeEnvName=code_env_name
177+
),
178+
params={"folderRef": folder_ref, "path": path},
179+
files={"file": (None, None)} # required for backend-mandated multipart request
180+
)
181+
return self.get_mlflow_version_handler(version_id)
182+
153183
def get_mlflow_version_handler(self, version_id):
154184
"""
155185
Returns a :class:MLFlowVersionHandler to interact with a MLFlow model version
@@ -315,9 +345,10 @@ def evaluate(self, dataset_ref):
315345
316346
:meth:`set_core_metadata` must be called before you can evaluate a dataset
317347
318-
:param str dataset_ref: Name of the evaluation dataset to use (either a dataset name or "PROJECT.datasetName")
348+
:param str dataset_ref: Evaluation dataset to use (either a dataset name, "PROJECT.datasetName", :class:`DSSDataset` instance or :class:`dataiku.Dataset` instance)
319349
"""
320-
# TODO Add support for handling a DSSDataset or dataiku.Dataset as dataset_ref
350+
if hasattr(dataset_ref, 'name'):
351+
dataset_ref = dataset_ref.name
321352
req = {
322353
"datasetRef" : dataset_ref
323354
}
@@ -348,4 +379,4 @@ def prediction_metrics_settings(self):
348379
def save(self):
349380
"""Saves the settings of this saved model"""
350381
self.saved_model.client._perform_empty("PUT", "/projects/%s/savedmodels/%s" % (self.saved_model.project_key, self.saved_model.sm_id),
351-
body=self.settings)
382+
body=self.settings)

0 commit comments

Comments
 (0)