bugfix: correctly score Pipeline models

jlwalke2 · jlwalke2 · commit 6408d1c55116 · 2019-08-21T10:18:52.000-04:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,9 +3,15 @@ Unreleased
 ----------
  - 
 
+v1.2.2 (2019-8-21)
+------------------
+**Bugfixes**
+ - `register_model` task should now correctly identify columns when registering a Sci-kit pipeline.
+ 
+
 v1.2.1 (2019-8-20)
 ------------------
- **Improvements**
+**Improvements**
  - Added the ability for `register_model` to correctly handle CAS tables containing data step
  score code.
  
diff --git a/src/sasctl/utils/pymas/core.py b/src/sasctl/utils/pymas/core.py
@@ -276,7 +276,6 @@ def _build_pymas(obj, func_name=None, input_types=None, array_input=False,
         # Run one observation through the model and use the result to
         # determine output variables
         output = target_func(input_types.head(1))
-        # output = target_func(input_types.iloc[0, :].values.reshape((1, -1)))
         output_vars = ds2_variables(output, output_vars=True)
         vars.extend(output_vars)
     elif isinstance(input_types, type):
diff --git a/tests/integration/test_pymas.py b/tests/integration/test_pymas.py
@@ -63,6 +63,35 @@ def sklearn_model(train_data):
     return model
 
 
+@pytest.fixture
+def sklearn_pipeline(train_data):
+    from sklearn.pipeline import Pipeline
+    from sklearn.ensemble import GradientBoostingClassifier
+    from sklearn.preprocessing import StandardScaler
+    from sklearn.impute import SimpleImputer
+    from sklearn.compose import ColumnTransformer
+
+    X, y = train_data
+
+    numeric_transformer = Pipeline([
+        ('imputer', SimpleImputer(strategy='median')),
+        ('scaler', StandardScaler())
+    ])
+
+    preprocessor = ColumnTransformer([
+        ('num', numeric_transformer, X.columns)
+    ])
+
+    pipe = Pipeline([
+        ('preprocess', preprocessor),
+        ('classifier', GradientBoostingClassifier())
+    ])
+
+    pipe.fit(X, y)
+
+    return pipe
+
+
 @pytest.fixture
 def pickle_file(tmpdir_factory, sklearn_model):
     """Returns the path to a file containing a pickled Scikit-Learn model """
@@ -215,6 +244,17 @@ def test_from_python_file(python_file):
     assert isinstance(p, PyMAS)
 
 
+def test_with_sklearn_pipeline(train_data, sklearn_pipeline):
+    from sasctl.utils.pymas import PyMAS, from_pickle
+
+    X, y = train_data
+    p = from_pickle(pickle.dumps(sklearn_pipeline),
+                    func_name='predict',
+                    input_types=X)
+
+    assert isinstance(p, PyMAS)
+    assert len(p.variables) > 4  # 4 input features in Iris data set
+
 @pytest.mark.usefixtures('session')
 def test_publish_and_execute(tmpdir):
     import pickle