14
14
import re
15
15
import sys
16
16
import warnings
17
- from tempfile import TemporaryDirectory
18
-
19
- import pandas as pd
20
17
21
18
try :
22
19
import swat
23
20
except ImportError :
24
21
swat = None
25
22
23
+ import pandas as pd
26
24
from urllib .error import HTTPError
27
25
28
26
from . import pzmm , utils
33
31
from .services import model_repository as mr
34
32
from .utils .pymas import from_pickle
35
33
from .utils .misc import installed_packages
36
-
34
+ from . utils . model_info import get_model_info
37
35
38
36
logger = logging .getLogger (__name__ )
39
37
47
45
_PROP_NAME_MAXLEN = 60
48
46
49
47
48
+
49
+
50
50
def _property (k , v ):
51
51
return {"name" : str (k )[:_PROP_NAME_MAXLEN ], "value" : str (v )[:_PROP_VALUE_MAXLEN ]}
52
52
53
53
54
+
54
55
def _sklearn_to_dict (model ):
55
56
# Convert Scikit-learn values to built-in Model Manager values
56
57
mappings = {
@@ -68,6 +69,7 @@ def _sklearn_to_dict(model):
68
69
"regressor" : "prediction" ,
69
70
}
70
71
72
+ # If this is a Pipeline extract the final estimator step
71
73
if hasattr (model , "_final_estimator" ):
72
74
estimator = model ._final_estimator
73
75
else :
@@ -97,78 +99,67 @@ def _sklearn_to_dict(model):
97
99
trainCodeType = "Python" ,
98
100
targetLevel = target_level ,
99
101
function = analytic_function ,
100
- tool = "Python %s.%s" % ( sys .version_info .major , sys .version_info .minor ) ,
102
+ tool = f "Python { sys .version_info .major } . { sys .version_info .minor } " ,
101
103
properties = [_property (k , v ) for k , v in model .get_params ().items ()],
102
104
)
103
105
104
106
return result
105
107
106
108
107
- def _register_sklearn_35 ():
108
- pass
109
-
110
-
111
- def _register_sklearn_40 (model , model_name , project_name , input_data , output_data = None ):
112
-
113
- # TODO: if not sklearn, raise ValueError
109
+ def _register_sklearn_40 (model , model_name , project_name , input_data , output_data , overwrite = False ):
110
+ model_info = get_model_info (model , input_data , output_data )
114
111
115
- model_info = _sklearn_to_dict ( model )
112
+ # TODO: allow passing description in register_model( )
116
113
117
- with TemporaryDirectory () as folder :
114
+ # Will store filename: file contents as we generate files
115
+ files = {}
118
116
119
- # Write model to a pickle file
120
- pzmm .PickleModel .pickle_trained_model (model , model_name , folder ) # generates folder/name.pickle
117
+ # Write model to a pickle file
118
+ files . update ( pzmm .PickleModel .pickle_trained_model (model , model_name ))
121
119
122
- # Create a JSON file containing model input fields
123
- pzmm .JSONFiles .write_var_json (input_data , is_input = True , json_path = folder )
120
+ # Create a JSON file containing model input fields
121
+ files .update (pzmm .JSONFiles .write_var_json (input_data ))
122
+ files .update (pzmm .JSONFiles .write_var_json (output_data , is_input = False ))
124
123
125
- # Create a JSON file containing model output fields
126
- if output_data is not None :
127
- if model_info ["function" ] == "classification" :
128
- output_fields = output_data .copy ()
124
+ if model_info .is_binary_classifier :
125
+ num_categories = 2
126
+ elif model_info .is_classifier :
127
+ num_categories = len (model_info .target_values )
128
+ else :
129
+ num_categories = 0
130
+
131
+ files .update (pzmm .JSONFiles .write_model_properties_json (model_name ,
132
+ target_variable = model_info .output_column_names ,
133
+ target_event = model_info .target_values ,
134
+ num_target_categories = num_categories ,
135
+ event_prob_var = None ,
136
+ model_desc = model_info .description [:_DESC_MAXLEN ],
137
+ model_function = model_info .analytic_function ,
138
+ model_type = model_info .algorithm
139
+ ))
140
+ """
141
+ target_variable : string
142
+ Target variable to be predicted by the model.
143
+ target_event : string
144
+ Model target event. For example: 1 for a binary event.
145
+ num_target_categories : int
146
+ Number of possible target categories. For example: 2 for a binary event.
147
+ event_prob_var : string, optional
148
+ User-provided output event probability variable. This value should match the
149
+ value in outputVar.json. Default is "P_" + target_variable + target_event.
150
+ """
151
+ files .update (pzmm .JSONFiles .write_file_metadata_json (model_name ))
129
152
130
- if hasattr (output_fields , "columns" ):
131
- output_fields .columns = ["EM_CLASSIFICATION" ]
132
- else :
133
- output_fields .name = "EM_CLASSIFICATION"
134
- pzmm .JSONFiles .write_var_json (output_fields , is_input = False , json_path = folder )
135
- else :
136
- pzmm .JSONFiles .write_var_json (output_data , is_input = False , json_path = folder )
137
- # target_variable
138
- # target_event (e.g 1 for binary)
139
- # num_target_event
140
- # event_prob
141
-
142
- # TODO: allow passing description in register_model()
143
-
144
- pzmm .JSONFiles .write_model_properties_json (model_name ,
145
- target_event = None ,
146
- target_variable = None ,
147
- num_target_categories = 1 ,
148
- model_desc = model_info ["description" ],
149
- model_function = model_info ["function" ],
150
- model_type = model_info ["algorithm" ],
151
- json_path = folder
152
- )
153
-
154
- pzmm .JSONFiles .write_file_metadata_json (model_name , json_path = folder , is_h2o_model = False )
155
-
156
- predict_method = (
157
- "{}.predict_proba({})"
158
- if hasattr (model , "predict_proba" )
159
- else "{}.predict({})"
160
- )
161
- predict_method = "{}.predict({})"
162
- metrics = ["EM_CLASSIFICATION" ] # NOTE: only valid for classification models.
163
- pzmm .ImportModel .import_model (
164
- folder ,
165
- model_name ,
166
- project_name ,
167
- input_data ,
168
- output_data ,
169
- predict_method ,
170
- metrics = metrics ,
171
- )
153
+ # TODO: How to determine if should call .predict() or .predict_proba()? Base on output data?
154
+ pzmm .ImportModel .import_model (model_files = files ,
155
+ model_prefix = model_name ,
156
+ project = project_name ,
157
+ predict_method = model .predict ,
158
+ input_data = input_data ,
159
+ output_variables = [],
160
+ score_cas = True ,
161
+ missing_values = False # assuming Pipeline will be used for imputing.
162
+ )
172
163
173
164
174
165
def _create_project (project_name , model , repo , input_vars = None , output_vars = None ):
@@ -275,6 +266,8 @@ def register_model(
275
266
information. If a single type is provided, all columns will be assumed
276
267
to be that type, otherwise a list of column types or a dictionary of
277
268
column_name: type may be provided.
269
+ output : array-like
270
+ A Numpy array or Pandas DataFrame that contains
278
271
version : {'new', 'latest', int}, optional
279
272
Version number of the project in which the model should be created.
280
273
Defaults to 'new'.
@@ -315,8 +308,6 @@ def register_model(
315
308
Update ASTORE handling for ease of use and removal of SAS Viya 4 score code errors
316
309
317
310
"""
318
- # TODO: Create new version if model already exists
319
-
320
311
# If version not specified, default to creating a new version
321
312
version = version or "new"
322
313
@@ -458,6 +449,7 @@ def register_model(
458
449
# If the model is a scikit-learn model, generate the model dictionary
459
450
# from it and pickle the model for storage
460
451
if all (hasattr (model , attr ) for attr in ["_estimator_type" , "get_params" ]):
452
+
461
453
# Pickle the model so we can store it
462
454
model_pkl = pickle .dumps (model )
463
455
files .append ({"name" : "model.pkl" , "file" : model_pkl , "role" : "Python Pickle" })
0 commit comments