Skip to content

Commit 08f3347

Browse files
committed
updated create_performance_definition signature
1 parent 27c9743 commit 08f3347

File tree

3 files changed

+89
-38
lines changed

3 files changed

+89
-38
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@ Unreleased
55
- public_model task also defines methods mapped to MAS module steps when publishing to MAS.
66
- SSL verification can be disable with `SSLREQCERT` environment variable.
77

8+
**Changes**
9+
Updated method signature for `create_performance_definition` in Model Manager.
10+
11+
**Bugfixes**
12+
- register_model task no longer adds `rc` and `msg` variables from MAS to the project variables.
813

914
v0.9.6 (2019-07-15)
1015
-------------------

src/sasctl/services/model_management.py

Lines changed: 69 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,21 @@ def publish_model(model, destination, name=None, force=False):
5050
return r
5151

5252

53-
def create_performance_definition(model, library_name, table_name, name=None, description=None, outputLibrary=None, cas_server=None):
53+
def create_performance_definition(model,
54+
library_name,
55+
table_prefix,
56+
name=None,
57+
description=None,
58+
monitor_champion=False,
59+
monitor_challenger=False,
60+
max_bins=None,
61+
scoring_required=False,
62+
all_data=False,
63+
save_output=True,
64+
output_library=None,
65+
autoload_output=False,
66+
cas_server=None,
67+
trace=False):
5468
"""Create the performance task definition in the model project to monitor model performance.
5569
5670
Parameters
@@ -59,66 +73,84 @@ def create_performance_definition(model, library_name, table_name, name=None, de
5973
The name or id of the model, or a dictionary representation of the model.
6074
library_name : str
6175
The library containing the input data, default is 'Public'.
62-
table_name : str
76+
table_prefix : str
6377
The name used for the performance data.
6478
name : str
65-
The name of the performance task, default is 'Performance'.
79+
The name of the performance task.
6680
description : str
67-
The description of the performance task, default is 'Performance monitoring for model' + model.name.
68-
cas_server : str
69-
The CAS Server for the monitoring task, default is 'cas-shared-default'.
70-
championMonitored : bool
81+
The description of the performance task, default is 'Performance
82+
monitoring for model' + model.name.
83+
monitor_champion : bool
7184
Indicates to monitor the project champion model.
72-
challengerMonitored : bool
85+
monitor_challenger : bool
7386
Indicates to monitor challenger models.
74-
includeAllData : bool
75-
Indicates whether to run a performance job against all the data tables in a library.
76-
scoreExecutionRequired : bool
77-
Indicates whether the scoring task execution is required. This should be set 'False' if you have provided the scores and 'True' if not.
78-
maxBins : int
79-
The maximum bins number, default is 10.
80-
resultLibrary : str
81-
The performance output table library, default is 'ModelPerformanceData'.
82-
traceOn : bool
83-
Indicates whether to turn on tracing.
84-
performanceResultSaved : bool
85-
Indicates whether the performance results are saved.
86-
loadPerformanceResult : bool
87-
Indicates to load performance result data.
88-
87+
max_bins : int
88+
The maximum bins number, Must be >= 2. Defaults to 10.
89+
scoring_required : bool
90+
Whether model scoring must be performed on the input data before
91+
performance results can be computed. Should be `False` if target
92+
values are included in the `table_prefix` tables.
93+
all_data : bool
94+
Whether to run the performance job against all matching data tables
95+
in `library_name` or just the new tables. Defaults to `False`.
96+
save_output : bool
97+
Whether to save the computed results to a table in `output_library`.
98+
Defaults to True.
99+
output_library : str
100+
Name of a CASLIB where computed results should be saved. Defaults to
101+
'ModelPerformanceData'.
102+
autoload_output : bool
103+
Whether computed results should automatically be re-loaded
104+
after a CAS server restart.
105+
cas_server : str
106+
The CAS Server for the monitoring task, default is 'cas-shared-default'.
107+
trace : bool
108+
Whether to enable trace messages in the SAS job log when
109+
executing the performance definition.
89110
90111
Returns
91112
-------
92-
str
93-
Performance task definition schema in JSON format.
113+
RestObj
114+
The performance task definition schema
94115
95116
"""
96117
from .model_repository import get_model, get_project
97118

119+
if '_' in table_prefix:
120+
raise ValueError("Parameter 'table_prefix' cannot contain underscores."
121+
" Received a value of '%s'.") % table_prefix
122+
123+
max_bins = 10 if max_bins is None else int(max_bins)
124+
if int(max_bins) < 2:
125+
raise ValueError("Parameter 'max_bins' must be at least 2. "
126+
"Received a value of '%s'." % max_bins)
127+
98128
model = get_model(model)
99129
project = get_project(model.projectId)
100130

101-
# Performance data cannot be captured unless certain project properties have been configured.
131+
# Performance data cannot be captured unless certain project properties
132+
# have been configured.
102133
for required in ['targetVariable', 'targetLevel', 'predictionVariable']:
103134
if getattr(project, required, None) is None:
104-
raise ValueError("Project %s must have the '%s' property set." % (project.name, required))
135+
raise ValueError("Project %s must have the '%s' property set."
136+
% (project.name, required))
105137

106138
request = {'projectId': project.id,
107139
'name': name or model.name + ' Performance',
108140
'modelIds': [model.id],
109-
'championMonitored': False,
110-
'challengerMonitored': False,
111-
'includeAllData': False,
112-
'scoreExecutionRequired': False,
113-
'maxBins': 10,
114-
'resultLibrary': outputLibrary or 'ModelPerformanceData',
115-
'traceOn': False,
116-
'performanceResultSaved': True,
141+
'championMonitored': monitor_champion,
142+
'challengerMonitored': monitor_challenger,
143+
'maxBins': max_bins,
144+
'resultLibrary': output_library or 'ModelPerformanceData',
145+
'includeAllData': all_data,
146+
'scoreExecutionRequired': scoring_required,
147+
'performanceResultSaved': save_output,
148+
'loadPerformanceResult': autoload_output,
117149
'dataLibrary': library_name or 'Public',
118-
'loadPerformanceResult': False,
119150
'description': description or 'Performance definition for model ' + model.name,
120151
'casServerId': cas_server or 'cas-shared-default',
121-
'dataPrefix': table_name
152+
'dataPrefix': table_prefix,
153+
'traceOn': trace
122154
}
123155

124156
# If model doesn't specify input/output variables, try to pull from project definition

tests/unit/test_model_management.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,11 @@ def test_create_performance_definition():
5454
get_project.return_value['targetVariable'] = 'target'
5555
get_project.return_value['targetLevel'] = 'interval'
5656
get_project.return_value['predictionVariable'] = 'predicted'
57-
_ = mm.create_performance_definition('model', 'TestLibrary', 'TestData')
57+
_ = mm.create_performance_definition('model', 'TestLibrary',
58+
'TestData',
59+
max_bins=3,
60+
monitor_challenger=True,
61+
monitor_champion=True)
5862

5963
assert post.call_count == 1
6064
url, data = post.call_args
@@ -66,3 +70,13 @@ def test_create_performance_definition():
6670
assert 'cas-shared-default' == data['json']['casServerId']
6771
assert data['json']['name'] is not None
6872
assert data['json']['description'] is not None
73+
assert data['json']['maxBins'] == 3
74+
assert data['json']['championMonitored'] == True
75+
assert data['json']['challengerMonitored'] == True
76+
77+
def test_table_prefix_format():
78+
with pytest.raises(ValueError):
79+
# Underscores should not be allowed
80+
_ = mm.create_performance_definition('model',
81+
'TestLibrary',
82+
'invalid_name')

0 commit comments

Comments
 (0)