MRG Sklearn 0.21 compatibility and CI (#752)

amueller · Matthias Feurer · commit e6ee09d6af0c · 2019-07-26T17:39:30.000+02:00
* test against scikit-learn 0.21

* fix call to roc_auc

* added verbose parameter to pipeline in 0.21

* remove no-longer-existant categorical_features paramter

* more pipeline parameter checks

* more imputer replacements

* don't break on dev versions

* typo on roc_auc_score name

* use ordered dicts, avoid nan comparison

* undid weird merge artifact

* add missing file whoops

* flake8

* try fixing import in backport, pep8

* move SimpleImputer to testing module

* don't trust dicts to be ordered

* run CI mostly on 0.21.2

* failed to safe lol
diff --git a/.travis.yml b/.travis.yml
@@ -15,10 +15,11 @@ env:
   - TEST_DIR=/tmp/test_dir/
   - MODULE=openml
   matrix:
-  - DISTRIB="conda" PYTHON_VERSION="3.5" SKLEARN_VERSION="0.20.0"
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.20.0"
-  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.20.0" RUN_FLAKE8="true" SKIP_TESTS="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.20.0" COVERAGE="true" DOCPUSH="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.5" SKLEARN_VERSION="0.21.2"
+  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.21.2"
+  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" RUN_FLAKE8="true" SKIP_TESTS="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" COVERAGE="true" DOCPUSH="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.20.2"
   # Checks for older scikit-learn versions (which also don't nicely work with
   # Python3.7)
   - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.19.2"
diff --git a/openml/testing.py b/openml/testing.py
@@ -321,4 +321,10 @@ def _check_fold_timing_evaluations(
                         self.assertLessEqual(evaluation, max_val)
 
 
-__all__ = ['TestBase']
+try:
+    from sklearn.impute import SimpleImputer
+except ImportError:
+    from sklearn.preprocessing import Imputer as SimpleImputer
+
+
+__all__ = ['TestBase', 'SimpleImputer']
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -28,18 +28,15 @@
 import sklearn.tree
 import sklearn.cluster
 
-if LooseVersion(sklearn.__version__) < "0.20":
-    from sklearn.preprocessing import Imputer
-else:
-    from sklearn.impute import SimpleImputer as Imputer
 
 import openml
 from openml.extensions.sklearn import SklearnExtension
 from openml.exceptions import PyOpenMLError
 from openml.flows import OpenMLFlow
 from openml.flows.functions import assert_flows_equal
 from openml.runs.trace import OpenMLRunTrace
-from openml.testing import TestBase
+from openml.testing import TestBase, SimpleImputer
+
 
 this_directory = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(this_directory)
@@ -285,11 +282,14 @@ def test_serialize_pipeline(self):
         # Comparing the pipeline
         # The parameters only have the name of base objects(not the whole flow)
         # as value
-        # memory parameter has been added in 0.19
+        # memory parameter has been added in 0.19, verbose in 0.21
         if LooseVersion(sklearn.__version__) < "0.19":
             self.assertEqual(len(serialization.parameters), 1)
-        else:
+        elif LooseVersion(sklearn.__version__) < "0.21":
             self.assertEqual(len(serialization.parameters), 2)
+        else:
+            self.assertEqual(len(serialization.parameters), 3)
+
         # Hard to compare two representations of a dict due to possibly
         # different sorting. Making a json makes it easier
         self.assertEqual(
@@ -374,8 +374,10 @@ def test_serialize_pipeline_clustering(self):
         # memory parameter has been added in 0.19
         if LooseVersion(sklearn.__version__) < "0.19":
             self.assertEqual(len(serialization.parameters), 1)
-        else:
+        elif LooseVersion(sklearn.__version__) < "0.21":
             self.assertEqual(len(serialization.parameters), 2)
+        else:
+            self.assertEqual(len(serialization.parameters), 3)
         # Hard to compare two representations of a dict due to possibly
         # different sorting. Making a json makes it easier
         self.assertEqual(
@@ -624,7 +626,7 @@ def test_serialize_feature_union_switched_names(self):
             .format(module_name_encoder))
 
     def test_serialize_complex_flow(self):
-        ohe = sklearn.preprocessing.OneHotEncoder(categorical_features=[0])
+        ohe = sklearn.preprocessing.OneHotEncoder()
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         boosting = sklearn.ensemble.AdaBoostClassifier(
             base_estimator=sklearn.tree.DecisionTreeClassifier())
@@ -747,25 +749,26 @@ def test_serialize_simple_parameter_grid(self):
         # Examples from the scikit-learn documentation
         models = [sklearn.svm.SVC(), sklearn.ensemble.RandomForestClassifier()]
         grids = \
-            [[{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
-              {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
-               'kernel': ['rbf']}],
-             {"max_depth": [3, None],
-              "max_features": [1, 3, 10],
-              "min_samples_split": [1, 3, 10],
-              "min_samples_leaf": [1, 3, 10],
-              "bootstrap": [True, False],
-              "criterion": ["gini", "entropy"]}]
+            [[OrderedDict([('C', [1, 10, 100, 1000]), ('kernel', ['linear'])]),
+              OrderedDict([('C', [1, 10, 100, 1000]), ('gamma', [0.001, 0.0001]),
+                           ('kernel', ['rbf'])])],
+             OrderedDict([("bootstrap", [True, False]),
+                          ("criterion", ["gini", "entropy"]),
+                          ("max_depth", [3, None]),
+                          ("max_features", [1, 3, 10]),
+                          ("min_samples_leaf", [1, 3, 10]),
+                          ("min_samples_split", [1, 3, 10])
+                          ])]
 
         for grid, model in zip(grids, models):
             serialized = self.extension.model_to_flow(grid)
             deserialized = self.extension.flow_to_model(serialized)
 
             self.assertEqual(deserialized, grid)
             self.assertIsNot(deserialized, grid)
-
+            # providing error_score because nan != nan
             hpo = sklearn.model_selection.GridSearchCV(
-                param_grid=grid, estimator=model)
+                param_grid=grid, estimator=model, error_score=-1000)
 
             serialized = self.extension.model_to_flow(hpo)
             deserialized = self.extension.flow_to_model(serialized)
@@ -943,7 +946,7 @@ def test_illegal_parameter_names(self):
     def test_illegal_parameter_names_pipeline(self):
         # illegal name: steps
         steps = [
-            ('Imputer', Imputer(strategy='median')),
+            ('Imputer', SimpleImputer(strategy='median')),
             ('OneHotEncoder',
              sklearn.preprocessing.OneHotEncoder(sparse=False,
                                                  handle_unknown='ignore')),
@@ -956,7 +959,7 @@ def test_illegal_parameter_names_featureunion(self):
         # illegal name: transformer_list
         transformer_list = [
             ('transformer_list',
-             Imputer(strategy='median')),
+             SimpleImputer(strategy='median')),
             ('OneHotEncoder',
              sklearn.preprocessing.OneHotEncoder(sparse=False,
                                                  handle_unknown='ignore'))
@@ -1015,18 +1018,25 @@ def test_paralizable_check(self):
                 self.extension._prevent_optimize_n_jobs(model)
 
     def test__get_fn_arguments_with_defaults(self):
-        if LooseVersion(sklearn.__version__) < "0.19":
+        sklearn_version = LooseVersion(sklearn.__version__)
+        if sklearn_version < "0.19":
             fns = [
                 (sklearn.ensemble.RandomForestRegressor.__init__, 15),
                 (sklearn.tree.DecisionTreeClassifier.__init__, 12),
                 (sklearn.pipeline.Pipeline.__init__, 0)
             ]
-        else:
+        elif sklearn_version < "0.21":
             fns = [
                 (sklearn.ensemble.RandomForestRegressor.__init__, 16),
                 (sklearn.tree.DecisionTreeClassifier.__init__, 13),
                 (sklearn.pipeline.Pipeline.__init__, 1)
             ]
+        else:
+            fns = [
+                (sklearn.ensemble.RandomForestRegressor.__init__, 16),
+                (sklearn.tree.DecisionTreeClassifier.__init__, 13),
+                (sklearn.pipeline.Pipeline.__init__, 2)
+            ]
 
         for fn, num_params_with_defaults in fns:
             defaults, defaultless = (
@@ -1047,7 +1057,7 @@ def test_deserialize_with_defaults(self):
         # used the 'initialize_with_defaults' flag of the deserialization
         # method to return a flow that contains default hyperparameter
         # settings.
-        steps = [('Imputer', Imputer()),
+        steps = [('Imputer', SimpleImputer()),
                  ('OneHotEncoder', sklearn.preprocessing.OneHotEncoder()),
                  ('Estimator', sklearn.tree.DecisionTreeClassifier())]
         pipe_orig = sklearn.pipeline.Pipeline(steps=steps)
@@ -1071,7 +1081,7 @@ def test_deserialize_adaboost_with_defaults(self):
         # used the 'initialize_with_defaults' flag of the deserialization
         # method to return a flow that contains default hyperparameter
         # settings.
-        steps = [('Imputer', Imputer()),
+        steps = [('Imputer', SimpleImputer()),
                  ('OneHotEncoder', sklearn.preprocessing.OneHotEncoder()),
                  ('Estimator', sklearn.ensemble.AdaBoostClassifier(
                      sklearn.tree.DecisionTreeClassifier()))]
@@ -1097,7 +1107,7 @@ def test_deserialize_complex_with_defaults(self):
         # method to return a flow that contains default hyperparameter
         # settings.
         steps = [
-            ('Imputer', Imputer()),
+            ('Imputer', SimpleImputer()),
             ('OneHotEncoder', sklearn.preprocessing.OneHotEncoder()),
             (
                 'Estimator',
@@ -1237,7 +1247,7 @@ def test_run_model_on_task(self):
         class MyPipe(sklearn.pipeline.Pipeline):
             pass
         task = openml.tasks.get_task(1)
-        pipe = MyPipe([('imp', Imputer()),
+        pipe = MyPipe([('imp', SimpleImputer()),
                        ('dummy', sklearn.dummy.DummyClassifier())])
         openml.runs.run_model_on_task(pipe, task)
 
@@ -1309,7 +1319,7 @@ def test_run_model_on_fold_classification_1(self):
         y_test = y[test_indices]
 
         pipeline = sklearn.pipeline.Pipeline(steps=[
-            ('imp', sklearn.preprocessing.Imputer()),
+            ('imp', SimpleImputer()),
             ('clf', sklearn.tree.DecisionTreeClassifier()),
         ])
         # TODO add some mocking here to actually test the innards of this function, too!
@@ -1435,11 +1445,11 @@ def predict_proba(*args, **kwargs):
             y_train = y[train_indices]
             X_test = X[test_indices]
             clf1 = sklearn.pipeline.Pipeline(steps=[
-                ('imputer', sklearn.preprocessing.Imputer()),
+                ('imputer', SimpleImputer()),
                 ('estimator', sklearn.naive_bayes.GaussianNB())
             ])
             clf2 = sklearn.pipeline.Pipeline(steps=[
-                ('imputer', sklearn.preprocessing.Imputer()),
+                ('imputer', SimpleImputer()),
                 ('estimator', HardNaiveBayes())
             ])
 
@@ -1492,7 +1502,7 @@ def test_run_model_on_fold_regression(self):
         y_test = y[test_indices]
 
         pipeline = sklearn.pipeline.Pipeline(steps=[
-            ('imp', sklearn.preprocessing.Imputer()),
+            ('imp', SimpleImputer()),
             ('clf', sklearn.tree.DecisionTreeRegressor()),
         ])
         # TODO add some mocking here to actually test the innards of this function, too!
@@ -1537,7 +1547,7 @@ def test_run_model_on_fold_clustering(self):
         X = task.get_X(dataset_format='array')
 
         pipeline = sklearn.pipeline.Pipeline(steps=[
-            ('imp', sklearn.preprocessing.Imputer()),
+            ('imp', SimpleImputer()),
             ('clf', sklearn.cluster.KMeans()),
         ])
         # TODO add some mocking here to actually test the innards of this function, too!
@@ -1626,7 +1636,7 @@ def test_trim_flow_name(self):
         long = """sklearn.pipeline.Pipeline(
                     columntransformer=sklearn.compose._column_transformer.ColumnTransformer(
                         numeric=sklearn.pipeline.Pipeline(
-                            imputer=sklearn.preprocessing.imputation.Imputer,
+                            SimpleImputer=sklearn.preprocessing.imputation.Imputer,
                             standardscaler=sklearn.preprocessing.data.StandardScaler),
                         nominal=sklearn.pipeline.Pipeline(
                             simpleimputer=sklearn.impute.SimpleImputer,
@@ -1650,7 +1660,7 @@ def test_trim_flow_name(self):
         self.assertEqual(short, SklearnExtension.trim_flow_name(long_stripped))
 
         long = """sklearn.pipeline.Pipeline(
-                    Imputer=sklearn.preprocessing.imputation.Imputer,
+                    SimpleImputer=sklearn.preprocessing.imputation.Imputer,
                     VarianceThreshold=sklearn.feature_selection.variance_threshold.VarianceThreshold, # noqa: E501
                     Estimator=sklearn.model_selection._search.RandomizedSearchCV(
                         estimator=sklearn.tree.tree.DecisionTreeClassifier))"""
@@ -1660,7 +1670,7 @@ def test_trim_flow_name(self):
 
         long = """sklearn.model_selection._search.RandomizedSearchCV(
                     estimator=sklearn.pipeline.Pipeline(
-                        Imputer=sklearn.preprocessing.imputation.Imputer,
+                        SimpleImputer=sklearn.preprocessing.imputation.Imputer,
                         classifier=sklearn.ensemble.forest.RandomForestClassifier))"""
         short = "sklearn.RandomizedSearchCV(Pipeline(Imputer,RandomForestClassifier))"
         long_stripped, _ = re.subn(r'\s', '', long)
diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
@@ -19,18 +19,13 @@
 import sklearn.naive_bayes
 import sklearn.tree
 
-if LooseVersion(sklearn.__version__) < "0.20":
-    from sklearn.preprocessing import Imputer
-else:
-    from sklearn.impute import SimpleImputer as Imputer
-
 import xmltodict
 
 import openml
 from openml._api_calls import _perform_api_call
 import openml.exceptions
 import openml.extensions.sklearn
-from openml.testing import TestBase
+from openml.testing import TestBase, SimpleImputer
 import openml.utils
 
 
@@ -318,8 +313,8 @@ def test_illegal_flow(self):
         # should throw error as it contains two imputers
         illegal = sklearn.pipeline.Pipeline(
             steps=[
-                ('imputer1', Imputer()),
-                ('imputer2', Imputer()),
+                ('imputer1', SimpleImputer()),
+                ('imputer2', SimpleImputer()),
                 ('classif', sklearn.tree.DecisionTreeClassifier())
             ]
         )
@@ -350,7 +345,7 @@ def test_existing_flow_exists(self):
         if LooseVersion(sklearn.__version__) >= '0.20':
             ohe_params['categories'] = 'auto'
         steps = [
-            ('imputation', Imputer(strategy='median')),
+            ('imputation', SimpleImputer(strategy='median')),
             ('hotencoding', sklearn.preprocessing.OneHotEncoder(**ohe_params)),
             (
                 'variencethreshold',
diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
@@ -288,7 +288,7 @@ def test_get_flow_reinstantiate_model_no_extension(self):
     def test_get_flow_reinstantiate_model_wrong_version(self):
         # Note that CI does not test against 0.19.1.
         openml.config.server = self.production_server
-        _, sklearn_major, _ = LooseVersion(sklearn.__version__).version
+        _, sklearn_major, _ = LooseVersion(sklearn.__version__).version[:3]
         flow = 8175
         expected = 'Trying to deserialize a model with dependency sklearn==0.19.1 not satisfied.'
         self.assertRaisesRegex(ValueError,
diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
@@ -7,9 +7,8 @@
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.model_selection import GridSearchCV
 from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import Imputer
 
-from openml.testing import TestBase
+from openml.testing import TestBase, SimpleImputer
 import openml
 import openml.extensions.sklearn
 
@@ -106,7 +105,7 @@ def _check_array(array, type_):
     def test_to_from_filesystem_vanilla(self):
 
         model = Pipeline([
-            ('imputer', Imputer(strategy='mean')),
+            ('imputer', SimpleImputer(strategy='mean')),
             ('classifier', DecisionTreeClassifier(max_depth=1)),
         ])
         task = openml.tasks.get_task(119)
@@ -139,7 +138,7 @@ def test_to_from_filesystem_vanilla(self):
     def test_to_from_filesystem_search(self):
 
         model = Pipeline([
-            ('imputer', Imputer(strategy='mean')),
+            ('imputer', SimpleImputer(strategy='mean')),
             ('classifier', DecisionTreeClassifier(max_depth=1)),
         ])
         model = GridSearchCV(
@@ -175,7 +174,7 @@ def test_to_from_filesystem_search(self):
     def test_to_from_filesystem_no_model(self):
 
         model = Pipeline([
-            ('imputer', Imputer(strategy='mean')),
+            ('imputer', SimpleImputer(strategy='mean')),
             ('classifier', DummyClassifier()),
         ])
         task = openml.tasks.get_task(119)
@@ -205,7 +204,7 @@ def test_publish_with_local_loaded_flow(self):
         extension = openml.extensions.sklearn.SklearnExtension()
 
         model = Pipeline([
-            ('imputer', Imputer(strategy='mean')),
+            ('imputer', SimpleImputer(strategy='mean')),
             ('classifier', DummyClassifier()),
         ])
         task = openml.tasks.get_task(119)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
diff --git a/tests/test_study/test_study_examples.py b/tests/test_study/test_study_examples.py