MNT scikit-learn 0.23 compatibility (#65)

rth · web-flow · commit 10103f8932de · 2020-06-17T22:15:38.000+02:00
diff --git a/README.rst b/README.rst
@@ -33,7 +33,7 @@ Dependencies
 scikit-learn-extra requires,
  
 - Python (>=3.6)
-- scikit-learn (>=0.21), and its dependencies
+- scikit-learn (>=0.22), and its dependencies
 
 
 User installation
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -9,7 +9,7 @@ jobs:
         python.version: '3.6'
         NUMPY_VERSION: "1.13.3"
         SCIPY_VERSION: "0.19.1"
-        SKLEARN_VERSION: "0.21.2"
+        SKLEARN_VERSION: "0.22.2post1"
       Python37:
         python.version: '3.7'
         NUMPY_VERSION: "1.16.5"
@@ -63,7 +63,7 @@ jobs:
         python.version: '3.6'
         NUMPY_VERSION: "1.13.3"
         SCIPY_VERSION: "0.19.1"
-        SKLEARN_VERSION: "0.21.2"
+        SKLEARN_VERSION: "0.22.2post1"
       Python37:
         python.version: '3.7'
         NUMPY_VERSION: "1.16.5"
@@ -115,7 +115,7 @@ jobs:
         python.version: '3.6'
         NUMPY_VERSION: "1.13.3"
         SCIPY_VERSION: "1.0.1"
-        SKLEARN_VERSION: "0.21.2"
+        SKLEARN_VERSION: "0.22.2post1"
       Python38:
         python_ver: '38'
         python.version: '3.8'
diff --git a/conftest.py b/conftest.py
@@ -0,0 +1,39 @@
+import sys
+from distutils.version import LooseVersion
+import sklearn
+
+import pytest
+from _pytest.doctest import DoctestItem
+
+
+def pytest_collection_modifyitems(config, items):
+
+    # numpy changed the str/repr formatting of numpy arrays in 1.14. We want to
+    # run doctests only for numpy >= 1.14.
+    skip_doctests = False
+    try:
+        import numpy as np
+
+        if LooseVersion(np.__version__) < LooseVersion("1.14") or LooseVersion(
+            sklearn.__version__
+        ) < LooseVersion("0.23.0"):
+            reason = (
+                "doctests are only run for numpy >= 1.14 "
+                "and scikit-learn >=0.23.0"
+            )
+            skip_doctests = True
+        elif sys.platform.startswith("win32"):
+            reason = (
+                "doctests are not run for Windows because numpy arrays "
+                "repr is inconsistent across platforms."
+            )
+            skip_doctests = True
+    except ImportError:
+        pass
+
+    if skip_doctests:
+        skip_marker = pytest.mark.skip(reason=reason)
+
+        for item in items:
+            if isinstance(item, DoctestItem):
+                item.add_marker(skip_marker)
diff --git a/doc/install.rst b/doc/install.rst
@@ -7,7 +7,7 @@ Dependencies
 scikit-learn-extra requires,
  
 - Python (>=3.6)
-- scikit-learn (>=0.21), and its dependencies
+- scikit-learn (>=0.22), and its dependencies
 
 
 User installation
diff --git a/setup.py b/setup.py
@@ -26,7 +26,7 @@
 LICENSE = "new BSD"
 DOWNLOAD_URL = "https://github.com/scikit-learn-contrib/scikit-learn-extra"
 VERSION = __version__  # noqa
-INSTALL_REQUIRES = ["numpy>=1.13.3", "scipy>=0.19.1", "scikit-learn>=0.21.0"]
+INSTALL_REQUIRES = ["numpy>=1.13.3", "scipy>=0.19.1", "scikit-learn>=0.22.0"]
 CLASSIFIERS = [
     "Intended Audience :: Science/Research",
     "Intended Audience :: Developers",
diff --git a/sklearn_extra/cluster/tests/test_k_medoids.py b/sklearn_extra/cluster/tests/test_k_medoids.py
@@ -3,13 +3,12 @@
 import numpy as np
 from unittest import mock
 from scipy.sparse import csc_matrix
+import pytest
 
 from sklearn.datasets import load_iris
 from sklearn.metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS
 from sklearn.metrics.pairwise import euclidean_distances
-from sklearn.utils.testing import assert_array_equal, assert_equal
-from sklearn.utils.testing import assert_raise_message, assert_warns_message
-from sklearn.utils.testing import assert_allclose
+from numpy.testing import assert_allclose, assert_array_equal
 
 from sklearn_extra.cluster import KMedoids
 from sklearn.cluster import KMeans
@@ -21,51 +20,37 @@
 def test_kmedoids_input_validation_and_fit_check():
     rng = np.random.RandomState(seed)
     # Invalid parameters
-    assert_raise_message(
-        ValueError,
-        "n_clusters should be a nonnegative " "integer. 0 was given",
-        KMedoids(n_clusters=0).fit,
-        X,
-    )
+    msg = "n_clusters should be a nonnegative integer. 0 was given"
+    with pytest.raises(ValueError, match=msg):
+        KMedoids(n_clusters=0).fit(X)
 
-    assert_raise_message(
-        ValueError,
-        "n_clusters should be a nonnegative " "integer. None was given",
-        KMedoids(n_clusters=None).fit,
-        X,
-    )
+    msg = "n_clusters should be a nonnegative integer. None was given"
+    with pytest.raises(ValueError, match=msg):
+        KMedoids(n_clusters=None).fit(X)
 
-    assert_raise_message(
-        ValueError,
-        "max_iter should be a nonnegative " "integer. 0 was given",
-        KMedoids(n_clusters=1, max_iter=0).fit,
-        X,
-    )
+    msg = "max_iter should be a nonnegative integer. 0 was given"
+    with pytest.raises(ValueError, match=msg):
+        KMedoids(n_clusters=1, max_iter=0).fit(X)
 
-    assert_raise_message(
-        ValueError,
-        "max_iter should be a nonnegative " "integer. None was given",
-        KMedoids(n_clusters=1, max_iter=None).fit,
-        X,
-    )
+    msg = "max_iter should be a nonnegative integer. None was given"
+    with pytest.raises(ValueError, match=msg):
+        KMedoids(n_clusters=1, max_iter=None).fit(X)
 
-    assert_raise_message(
-        ValueError,
-        "init needs to be one of the following: "
-        "['random', 'heuristic', 'k-medoids++']",
-        KMedoids(init=None).fit,
-        X,
+    msg = (
+        r"init needs to be one of the following: "
+        r".*random.*heuristic.*k-medoids\+\+"
     )
+    with pytest.raises(ValueError, match=msg):
+        KMedoids(init=None).fit(X)
 
     # Trying to fit 3 samples to 8 clusters
-    Xsmall = rng.rand(5, 2)
-    assert_raise_message(
-        ValueError,
-        "The number of medoids (8) must be less "
-        "than the number of samples 5.",
-        KMedoids(n_clusters=8).fit,
-        Xsmall,
+    msg = (
+        "The number of medoids \(8\) must be less "
+        "than the number of samples 5."
     )
+    Xsmall = rng.rand(5, 2)
+    with pytest.raises(ValueError, match=msg):
+        KMedoids(n_clusters=8).fit(Xsmall)
 
 
 def test_random_deterministic():
@@ -113,7 +98,8 @@ def test_kmedoids_empty_clusters():
     rng = np.random.RandomState(seed)
     X = [[1], [1], [1]]
     kmedoids = KMedoids(n_clusters=2, random_state=rng)
-    assert_warns_message(UserWarning, "Cluster 1 is empty!", kmedoids.fit, X)
+    with pytest.warns(UserWarning, match="Cluster 1 is empty!"):
+        kmedoids.fit(X)
 
 
 @mock.patch.object(KMedoids, "_kpp_init", return_value=object())
@@ -212,12 +198,10 @@ def test_max_iter():
     model = KMedoids(
         n_clusters=10, init="random", random_state=rng, max_iter=1
     )
-    assert_warns_message(
-        UserWarning,
-        "Maximum number of iteration reached before",
-        model.fit,
-        X_iris,
-    )
+    msg = "Maximum number of iteration reached before"
+
+    with pytest.warns(UserWarning, match=msg):
+        model.fit(X_iris)
 
 
 def test_kmedoids_iris():
@@ -261,7 +245,7 @@ def test_kmedoids_fit_predict_transform():
     model = KMedoids(random_state=rng)
 
     labels1 = model.fit_predict(X)
-    assert_equal(len(labels1), 100)
+    assert len(labels1) == 100
     assert_array_equal(labels1, model.labels_)
 
     labels2 = model.predict(X)
@@ -282,7 +266,7 @@ def my_metric(a, b):
 
     model = KMedoids(random_state=rng, metric=my_metric)
     labels1 = model.fit_predict(X)
-    assert_equal(len(labels1), 100)
+    assert len(labels1) == 100
     assert_array_equal(labels1, model.labels_)
 
 
@@ -308,5 +292,5 @@ def test_kmedoids_on_sparse_input():
     data = np.array([1, 1])
     X = csc_matrix((data, (row, col)), shape=(2, 5))
     labels = model.fit_predict(X)
-    assert_equal(len(labels), 2)
+    assert len(labels) == 2
     assert_array_equal(labels, model.labels_)
diff --git a/sklearn_extra/kernel_approximation/_fastfood.py b/sklearn_extra/kernel_approximation/_fastfood.py
@@ -1,5 +1,6 @@
 # License: BSD 3 clause
 
+from math import sqrt
 import numpy as np
 from scipy.stats import chi
 
@@ -56,7 +57,7 @@ class Fastfood(BaseEstimator, TransformerMixin):
 
     def __init__(
         self,
-        sigma=np.sqrt(1 / 2),
+        sigma=sqrt(1 / 2),
         n_components=100,
         tradeoff_mem_accuracy="accuracy",
         random_state=None,
diff --git a/sklearn_extra/kernel_approximation/test_fastfood.py b/sklearn_extra/kernel_approximation/test_fastfood.py
@@ -1,8 +1,7 @@
 import pytest
 import numpy as np
 
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_array_almost_equal
+from numpy.testing import assert_array_almost_equal
 from sklearn.metrics.pairwise import rbf_kernel
 
 from sklearn_extra.kernel_approximation import Fastfood
@@ -28,7 +27,7 @@
 def test_fastfood_enforce_dimensionality_constraint(message, input_, expected):
     d, n = input_
     output = Fastfood._enforce_dimensionality_constraints(d, n)
-    assert_equal(expected, output, message)
+    assert expected == output, message
 
 
 def test_fastfood():
diff --git a/sklearn_extra/kernel_methods/_eigenpro.py b/sklearn_extra/kernel_methods/_eigenpro.py
@@ -394,10 +394,12 @@ def _raw_predict(self, X):
         return Y
 
     def _get_tags(self):
-        return {"multioutput": True}
+        tags = super()._get_tags()
+        tags["multioutput"] = True
+        return tags
 
 
-class EigenProRegressor(BaseEigenPro, RegressorMixin):
+class EigenProRegressor(RegressorMixin, BaseEigenPro):
     """Regression using EigenPro iteration.
 
     Train least squared kernel regression model with mini-batch EigenPro
@@ -470,9 +472,7 @@ class EigenProRegressor(BaseEigenPro, RegressorMixin):
     >>> y_train = rng.randn(n_samples, n_targets)
     >>> rgs = EigenProRegressor(n_epoch=3, gamma=.5, subsample_size=50)
     >>> rgs.fit(x_train, y_train)
-    EigenProRegressor(batch_size='auto', coef0=1, degree=3, gamma=0.5, kernel='rbf',
-                      kernel_params=None, n_components=1000, n_epoch=3,
-                      random_state=None, subsample_size=50)
+    EigenProRegressor(gamma=0.5, n_epoch=3, subsample_size=50)
     >>> y_pred = rgs.predict(x_train)
     >>> loss = np.mean(np.square(y_train - y_pred))
     """
@@ -510,7 +510,7 @@ def predict(self, X):
         return self._raw_predict(X)
 
 
-class EigenProClassifier(BaseEigenPro, ClassifierMixin):
+class EigenProClassifier(ClassifierMixin, BaseEigenPro):
     """Classification using EigenPro iteration.
 
     Train least squared kernel classification model with mini-batch EigenPro
@@ -584,9 +584,7 @@ class EigenProClassifier(BaseEigenPro, ClassifierMixin):
     >>> y_train = rng.randint(n_targets, size=n_samples)
     >>> rgs = EigenProClassifier(n_epoch=3, gamma=.01, subsample_size=50)
     >>> rgs.fit(x_train, y_train)
-    EigenProClassifier(batch_size='auto', coef0=1, degree=3, gamma=0.01,
-                       kernel='rbf', kernel_params=None, n_components=1000,
-                       n_epoch=3, random_state=None, subsample_size=50)
+    EigenProClassifier(gamma=0.01, n_epoch=3, subsample_size=50)
     >>> y_pred = rgs.predict(x_train)
     >>> loss = np.mean(y_train != y_pred)
     """
diff --git a/sklearn_extra/kernel_methods/tests/test_eigenpro.py b/sklearn_extra/kernel_methods/tests/test_eigenpro.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 from sklearn.datasets import make_regression, make_classification
-from sklearn.utils.testing import assert_allclose
+from numpy.testing import assert_allclose
 from sklearn_extra.kernel_methods import EigenProRegressor, EigenProClassifier
 
 import pytest
diff --git a/sklearn_extra/tests/test_common.py b/sklearn_extra/tests/test_common.py
@@ -7,17 +7,15 @@
 
 ALL_ESTIMATORS = [Fastfood, KMedoids, EigenProClassifier, EigenProRegressor]
 
-if hasattr(estimator_checks, "parametrize_with_checks"):
-    # Common tests are only run on scikit-learn 0.22+
 
-    @estimator_checks.parametrize_with_checks(ALL_ESTIMATORS)
-    def test_all_estimators(estimator, check, request):
-        # TODO: fix this common test failure cf #41
-        if isinstance(
-            estimator, EigenProClassifier
-        ) and "function check_classifier_multioutput" in str(check):
-            request.applymarker(
-                pytest.mark.xfail(run=False, reason="See issue #41")
-            )
+@estimator_checks.parametrize_with_checks([cls() for cls in ALL_ESTIMATORS])
+def test_all_estimators(estimator, check, request):
+    # TODO: fix this common test failure cf #41
+    if isinstance(
+        estimator, EigenProClassifier
+    ) and "function check_classifier_multioutput" in str(check):
+        request.applymarker(
+            pytest.mark.xfail(run=False, reason="See issue #41")
+        )
 
-        return check(estimator)
+    return check(estimator)
diff --git a/sklearn_extra/utils/tests/test_fht.py b/sklearn_extra/utils/tests/test_fht.py
@@ -1,8 +1,7 @@
 import numpy as np
 import numpy.testing as npt
 from scipy.linalg import hadamard
-
-from sklearn.utils.testing import assert_raises
+import pytest
 
 from sklearn_extra.utils._cyfht import fht as cyfht
 from sklearn_extra.utils._cyfht import fht2 as cyfht2
@@ -36,5 +35,9 @@ def test_numerical_fuzzing_fht2():
 
 
 def test_exception_when_input_not_power_two():
-    assert_raises(ValueError, cyfht, np.zeros(9, dtype=np.float64))
-    assert_raises(ValueError, cyfht2, np.zeros((2, 9), dtype=np.float64))
+    msg = "Length of input for fht must be a power of two"
+    with pytest.raises(ValueError, match=msg):
+        cyfht(np.zeros(9, dtype=np.float64))
+    msg = "Length of rows for fht2 must be a power of two"
+    with pytest.raises(ValueError, match=msg):
+        cyfht2(np.zeros((2, 9), dtype=np.float64))