Skip to content

Commit 6408d1c

Browse files
committed
bugfix: correctly score Pipeline models
1 parent 3506451 commit 6408d1c

File tree

3 files changed

+47
-2
lines changed

3 files changed

+47
-2
lines changed

CHANGELOG.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,15 @@ Unreleased
33
----------
44
-
55

6+
v1.2.2 (2019-8-21)
7+
------------------
8+
**Bugfixes**
9+
- `register_model` task should now correctly identify columns when registering a Sci-kit pipeline.
10+
11+
612
v1.2.1 (2019-8-20)
713
------------------
8-
**Improvements**
14+
**Improvements**
915
- Added the ability for `register_model` to correctly handle CAS tables containing data step
1016
score code.
1117

src/sasctl/utils/pymas/core.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,6 @@ def _build_pymas(obj, func_name=None, input_types=None, array_input=False,
276276
# Run one observation through the model and use the result to
277277
# determine output variables
278278
output = target_func(input_types.head(1))
279-
# output = target_func(input_types.iloc[0, :].values.reshape((1, -1)))
280279
output_vars = ds2_variables(output, output_vars=True)
281280
vars.extend(output_vars)
282281
elif isinstance(input_types, type):

tests/integration/test_pymas.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,35 @@ def sklearn_model(train_data):
6363
return model
6464

6565

66+
@pytest.fixture
67+
def sklearn_pipeline(train_data):
68+
from sklearn.pipeline import Pipeline
69+
from sklearn.ensemble import GradientBoostingClassifier
70+
from sklearn.preprocessing import StandardScaler
71+
from sklearn.impute import SimpleImputer
72+
from sklearn.compose import ColumnTransformer
73+
74+
X, y = train_data
75+
76+
numeric_transformer = Pipeline([
77+
('imputer', SimpleImputer(strategy='median')),
78+
('scaler', StandardScaler())
79+
])
80+
81+
preprocessor = ColumnTransformer([
82+
('num', numeric_transformer, X.columns)
83+
])
84+
85+
pipe = Pipeline([
86+
('preprocess', preprocessor),
87+
('classifier', GradientBoostingClassifier())
88+
])
89+
90+
pipe.fit(X, y)
91+
92+
return pipe
93+
94+
6695
@pytest.fixture
6796
def pickle_file(tmpdir_factory, sklearn_model):
6897
"""Returns the path to a file containing a pickled Scikit-Learn model """
@@ -215,6 +244,17 @@ def test_from_python_file(python_file):
215244
assert isinstance(p, PyMAS)
216245

217246

247+
def test_with_sklearn_pipeline(train_data, sklearn_pipeline):
248+
from sasctl.utils.pymas import PyMAS, from_pickle
249+
250+
X, y = train_data
251+
p = from_pickle(pickle.dumps(sklearn_pipeline),
252+
func_name='predict',
253+
input_types=X)
254+
255+
assert isinstance(p, PyMAS)
256+
assert len(p.variables) > 4 # 4 input features in Iris data set
257+
218258
@pytest.mark.usefixtures('session')
219259
def test_publish_and_execute(tmpdir):
220260
import pickle

0 commit comments

Comments
 (0)