Skip to content

Commit 28761bb

Browse files
committed
update_model_performance task
1 parent 4022d8c commit 28761bb

7 files changed

+4887
-14
lines changed

src/sasctl/tasks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,7 @@ def submit_request():
434434
return module
435435

436436

437-
def update_performance(data, model, label, refresh=True):
437+
def update_model_performance(data, model, label, refresh=True):
438438
"""Upload data for calculating model performance metrics.
439439
440440
Model performance and data distributions can be tracked over time by

tests/cassettes/tests.integration.test_tasks.TestSklearnLinearModel.test_create_performance_definition.json

Lines changed: 722 additions & 0 deletions
Large diffs are not rendered by default.

tests/cassettes/tests.integration.test_tasks.TestSklearnLinearModel.test_register_model.json

Lines changed: 1238 additions & 0 deletions
Large diffs are not rendered by default.

tests/cassettes/tests.integration.test_tasks.TestSklearnLinearModel.test_update_model_performance.json

Lines changed: 1813 additions & 0 deletions
Large diffs are not rendered by default.

tests/cassettes/tests.integration.test_tasks.TestSklearnLinearModel.test_update_model_performance_swat.json

Lines changed: 1004 additions & 0 deletions
Large diffs are not rendered by default.

tests/integration/test_tasks.py

Lines changed: 105 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,10 @@
1919

2020

2121
@pytest.fixture
22-
def sklearn_model():
23-
"""Returns a simple Scikit-Learn model """
22+
def sklearn_logistic_model():
23+
"""A Scikit-Learn logistic regression fit to Iris data set."""
2424

25-
try:
26-
import pandas as pd
27-
except ImportError:
28-
pytest.skip('Package `pandas` not found.')
25+
pd = pytest.importorskip('pandas')
2926

3027
try:
3128
from sklearn import datasets
@@ -48,6 +45,27 @@ def sklearn_model():
4845
return model, iris.iloc[:, 0:4]
4946

5047

48+
@pytest.fixture
49+
def sklearn_linear_model():
50+
"""A Scikit-Learn linear regression fit to Boston housing data."""
51+
52+
pd = pytest.importorskip('pandas')
53+
datasets = pytest.importorskip('sklearn.datasets')
54+
linear_model = pytest.importorskip('sklearn.linear_model')
55+
56+
data = datasets.load_boston()
57+
X = pd.DataFrame(data.data, columns=data.feature_names)
58+
y = pd.DataFrame(data.target, columns=['Price'])
59+
60+
with warnings.catch_warnings():
61+
warnings.simplefilter('ignore')
62+
lm = linear_model.LinearRegression()
63+
lm.fit(X, y)
64+
65+
return lm, X, y
66+
67+
68+
5169
@pytest.mark.incremental
5270
class TestModels:
5371
def test_register_astore(self, astore):
@@ -61,11 +79,11 @@ def test_register_astore(self, astore):
6179
assert isinstance(model, RestObj)
6280
assert ASTORE_MODEL_NAME == model.name
6381

64-
def test_register_sklearn(self, sklearn_model):
82+
def test_register_sklearn(self, sklearn_logistic_model):
6583
from sasctl.tasks import register_model
6684
from sasctl import RestObj
6785

68-
sk_model, train_df = sklearn_model
86+
sk_model, train_df = sklearn_logistic_model
6987

7088
# Register model and ensure attributes are set correctly
7189
model = register_model(sk_model, SCIKIT_MODEL_NAME,
@@ -132,7 +150,6 @@ def test_publish_sklearn_again(self):
132150
# MAS module should automatically have methods bound
133151
assert callable(p.score)
134152

135-
136153
def test_score_sklearn(self):
137154
from sasctl.services import microanalytic_score as mas
138155

@@ -141,3 +158,82 @@ def test_score_sklearn(self):
141158
r = m.score(sepalwidth=1, sepallength=2, petallength=3, petalwidth=4)
142159
assert r == 'virginica'
143160

161+
162+
@pytest.mark.incremental
163+
class TestSklearnLinearModel:
164+
MODEL_NAME = 'Scikit Linear Model'
165+
PROJECT_NAME = 'Boston Housing'
166+
167+
def test_register_model(self, sklearn_linear_model):
168+
from sasctl.tasks import register_model
169+
from sasctl import RestObj
170+
171+
sk_model, X, y = sklearn_linear_model
172+
173+
# Register model and ensure attributes are set correctly
174+
model = register_model(sk_model,
175+
self.MODEL_NAME,
176+
project=self.PROJECT_NAME,
177+
input=X,
178+
force=True)
179+
180+
assert isinstance(model, RestObj)
181+
assert self.MODEL_NAME == model.name
182+
assert 'Prediction' == model.function
183+
assert 'Linear regression' == model.algorithm
184+
assert 'Python' == model.trainCodeType
185+
assert 'ds2MultiType' == model.scoreCodeType
186+
187+
assert len(model.inputVariables) == 13
188+
assert len(model.outputVariables) == 1
189+
190+
# Don't compare to sys.version since cassettes used may have been
191+
# created by a different version
192+
assert re.match('Python \d\.\d', model.tool)
193+
194+
# Ensure input & output metadata was set
195+
for col in X.columns:
196+
assert 1 == len([v for v in model.inputVariables
197+
+ model.outputVariables if v.get('name') == col])
198+
199+
# Ensure model files were created
200+
from sasctl.services import model_repository as mr
201+
files = mr.get_model_contents(model)
202+
filenames = [f.name for f in files]
203+
assert 'model.pkl' in filenames
204+
assert 'dmcas_epscorecode.sas' in filenames
205+
assert 'dmcas_packagescorecode.sas' in filenames
206+
207+
def test_create_performance_definition(self, sklearn_linear_model):
208+
from sasctl.services import model_repository as mr
209+
from sasctl.services import model_management as mm
210+
211+
lm, X, y = sklearn_linear_model
212+
213+
project = mr.get_project(self.PROJECT_NAME)
214+
# Update project properties
215+
project['function'] = 'prediction'
216+
project['targetLevel'] = 'interval'
217+
project['targetVariable'] = 'Price'
218+
project['predictionVariable'] = 'var1'
219+
project = mr.update_project(project)
220+
221+
mm.create_performance_definition(self.MODEL_NAME, 'Public', 'boston')
222+
223+
def test_update_model_performance(self, sklearn_linear_model, cas_session):
224+
from six.moves import mock
225+
from sasctl.tasks import update_model_performance
226+
227+
lm, X, y = sklearn_linear_model
228+
229+
# Score & set output var
230+
train_df = X.copy()
231+
train_df['var1'] = lm.predict(X)
232+
train_df['Price'] = y
233+
234+
with mock.patch('swat.CAS') as CAS:
235+
for period in ('q12019', 'q22019', 'q32019', 'q42019'):
236+
sample = train_df.sample(frac=0.1)
237+
update_model_performance(sample, self.MODEL_NAME, period)
238+
239+

tests/unit/test_tasks.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def test_parse_module_url():
7575

7676

7777
def test_save_performance_project_types():
78-
from sasctl.tasks import update_performance
78+
from sasctl.tasks import update_model_performance
7979

8080
with mock.patch('sasctl._services.model_repository.ModelRepository''.get_model') as model:
8181
with mock.patch('sasctl._services.model_repository.ModelRepository.get_project') as project:
@@ -84,17 +84,17 @@ def test_save_performance_project_types():
8484
# Function is required
8585
with pytest.raises(ValueError):
8686
project.return_value = {}
87-
update_performance(None, None, None)
87+
update_model_performance(None, None, None)
8888

8989
# Target Level is required
9090
with pytest.raises(ValueError):
9191
project.return_value = {'function': 'Prediction'}
92-
update_performance(None, None, None)
92+
update_model_performance(None, None, None)
9393

9494
# Prediction variable required
9595
with pytest.raises(ValueError):
9696
project.return_value = {'function': 'Prediction',
9797
'targetLevel': 'Binary'}
98-
update_performance(None, None, None)
98+
update_model_performance(None, None, None)
9999

100100
# Check projects w/ invalid properties

0 commit comments

Comments
 (0)