@@ -68,7 +68,7 @@ def writeScoreCode(
68
68
that the model files are being created from an MLFlow model.
69
69
targetDF : DataFrame
70
70
The `DataFrame` object contains the training data for the target variable. Note that
71
- for MLFlow models, this can set as None.
71
+ for MLFlow models, this can be set as None.
72
72
modelPrefix : string
73
73
The variable for the model name that is used when naming model files.
74
74
(For example: hmeqClassTree + [Score.py || .pickle]).
@@ -80,8 +80,16 @@ def writeScoreCode(
80
80
modelFileName : string
81
81
Name of the model file that contains the model.
82
82
metrics : string list, optional
83
- The scoring metrics for the model. The default is a set of two
84
- metrics: EM_EVENTPROBABILITY and EM_CLASSIFICATION.
83
+ The scoring metrics for the model. For classification models, it is assumed that the last value in the list
84
+ represents the classification output. The default is a list of two metrics: EM_EVENTPROBABILITY and
85
+ EM_CLASSIFICATION. The following scenarios are supported:
86
+ 1) If only one value is provided, then it is assumed that the model returns either a binary response
87
+ prediction or a character output and is returned as the output.
88
+ 1) If only two values are provided, a threshold value needs to be set: either by providing a
89
+ threshPrediction argument or the function taking the mean of the provided target column. Then the
90
+ threshold value sets the classification output for the prediction.
91
+ 2) If more than two values are provided, the largest probability is accepted as the event and the
92
+ appropriate classification value is returned for the output.
85
93
pyPath : string, optional
86
94
The local path of the score code file. The default is the current
87
95
working directory.
@@ -462,19 +470,23 @@ def score{modelPrefix}({inputVarList}):
462
470
)
463
471
)
464
472
if not isH2OModel and not isMLFlow :
465
- cls .pyFile .write (
466
- """\n
473
+ # TODO: Refactor arguments to better handle different classification types
474
+ if len (metrics ) == 1 :
475
+ # For models that output the classification from the prediction
476
+ cls .pyFile .write (
477
+ """\n
478
+ {metric} = prediction""" .format (metric = metrics [0 ]))
479
+ elif len (metrics ) == 2 :
480
+ cls .pyFile .write (
481
+ """\n
467
482
try:
468
483
{metric} = float(prediction)
469
484
except TypeError:
470
- # If the model expects non-binary responses, a TypeError will be raised.
471
- # The except block shifts the prediction to accept a non-binary response.
472
- {metric} = float(prediction[:,1])""" .format (
473
- metric = metrics [0 ]
474
- )
475
- )
476
- if threshPrediction is None :
477
- threshPrediction = np .mean (targetDF )
485
+ # If the prediction returns as a list of values or improper value type, a TypeError will be raised.
486
+ # Attempt to handle the prediction output in the except block.
487
+ {metric} = float(prediction[0])""" .format (metric = metrics [0 ]))
488
+ if threshPrediction is None :
489
+ threshPrediction = np .mean (targetDF )
478
490
cls .pyFile .write (
479
491
"""\n
480
492
if ({metric0} >= {threshold}):
@@ -486,6 +498,21 @@ def score{modelPrefix}({inputVarList}):
486
498
threshold = threshPrediction ,
487
499
)
488
500
)
501
+ elif len (metrics ) > 2 :
502
+ for i , metric in enumerate (metrics [:- 1 ]):
503
+ cls .pyFile .write (
504
+ """\
505
+ {metric} = float(prediction[{i}]""" .format (metric = metric , i = i )
506
+ )
507
+ cls .pyFile .write (
508
+ """\
509
+ max_prediction = max({metric_list})
510
+ index_prediction = {metric_list}.index(max_prediction)
511
+ {classification} = index_prediction""" .format (metric_list = metrics [:- 1 ], classification = metrics [- 1 ])
512
+ )
513
+ else :
514
+ ValueError ("Improper metrics argument was provided. Please provide a list of string metrics." )
515
+
489
516
elif isH2OModel and not isMLFlow :
490
517
cls .pyFile .write (
491
518
"""\n
0 commit comments