more clean up and coverage

glemaitre · glemaitre · commit 4aeb92754aaf · 2024-12-18T21:41:22.000+01:00
diff --git a/imblearn/ensemble/_bagging.py b/imblearn/ensemble/_bagging.py
@@ -8,7 +8,6 @@
 import numbers
 
 import numpy as np
-import sklearn
 from sklearn.base import clone
 from sklearn.ensemble import BaggingClassifier
 from sklearn.ensemble._bagging import _parallel_decision_function
@@ -25,11 +24,9 @@
 from ..under_sampling.base import BaseUnderSampler
 from ..utils import Substitution, check_sampling_strategy, check_target_type
 from ..utils._docstring import _n_jobs_docstring, _random_state_docstring
-from ..utils._sklearn_compat import _fit_context, validate_data
+from ..utils._sklearn_compat import _fit_context, sklearn_version, validate_data
 from ._common import _bagging_parameter_constraints, _estimator_has
 
-sklearn_version = parse_version(sklearn.__version__)
-
 
 @Substitution(
     sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
diff --git a/imblearn/ensemble/_easy_ensemble.py b/imblearn/ensemble/_easy_ensemble.py
@@ -9,7 +9,6 @@
 import warnings
 
 import numpy as np
-import sklearn
 from sklearn.base import clone
 from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier
 from sklearn.ensemble._bagging import _parallel_decision_function
@@ -26,11 +25,15 @@
 from ..under_sampling.base import BaseUnderSampler
 from ..utils import Substitution, check_sampling_strategy, check_target_type
 from ..utils._docstring import _n_jobs_docstring, _random_state_docstring
-from ..utils._sklearn_compat import _fit_context, get_tags, validate_data
+from ..utils._sklearn_compat import (
+    _fit_context,
+    get_tags,
+    sklearn_version,
+    validate_data,
+)
 from ._common import _bagging_parameter_constraints, _estimator_has
 
 MAX_INT = np.iinfo(np.int32).max
-sklearn_version = parse_version(sklearn.__version__)
 
 
 @Substitution(
diff --git a/imblearn/ensemble/_forest.py b/imblearn/ensemble/_forest.py
@@ -8,7 +8,6 @@
 from warnings import warn
 
 import numpy as np
-import sklearn
 from numpy import float32 as DTYPE
 from numpy import float64 as DOUBLE
 from scipy.sparse import issparse
@@ -33,12 +32,11 @@
 from ..under_sampling import RandomUnderSampler
 from ..utils import Substitution
 from ..utils._docstring import _n_jobs_docstring, _random_state_docstring
-from ..utils._sklearn_compat import _fit_context, validate_data
+from ..utils._sklearn_compat import _fit_context, sklearn_version, validate_data
 from ..utils._validation import check_sampling_strategy
 from ._common import _random_forest_classifier_parameter_constraints
 
 MAX_INT = np.iinfo(np.int32).max
-sklearn_version = parse_version(parse_version(sklearn.__version__).base_version)
 
 
 def _local_parallel_build_trees(
diff --git a/imblearn/ensemble/_weight_boosting.py b/imblearn/ensemble/_weight_boosting.py
@@ -4,7 +4,6 @@
 from copy import deepcopy
 
 import numpy as np
-import sklearn
 from sklearn.base import clone
 from sklearn.ensemble import AdaBoostClassifier
 from sklearn.ensemble._base import _set_random_states
@@ -19,11 +18,9 @@
 from ..under_sampling.base import BaseUnderSampler
 from ..utils import Substitution, check_target_type
 from ..utils._docstring import _random_state_docstring
-from ..utils._sklearn_compat import _fit_context
+from ..utils._sklearn_compat import _fit_context, sklearn_version
 from ._common import _adaboost_classifier_parameter_constraints
 
-sklearn_version = parse_version(sklearn.__version__)
-
 
 @Substitution(
     sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
diff --git a/imblearn/ensemble/tests/test_forest.py b/imblearn/ensemble/tests/test_forest.py
@@ -1,14 +1,12 @@
 import numpy as np
 import pytest
-import sklearn
 from sklearn.datasets import make_classification
 from sklearn.model_selection import GridSearchCV, train_test_split
 from sklearn.utils._testing import assert_allclose, assert_array_equal
 from sklearn.utils.fixes import parse_version
 
 from imblearn.ensemble import BalancedRandomForestClassifier
-
-sklearn_version = parse_version(sklearn.__version__)
+from imblearn.utils._sklearn_compat import sklearn_version
 
 
 @pytest.fixture
diff --git a/imblearn/ensemble/tests/test_weight_boosting.py b/imblearn/ensemble/tests/test_weight_boosting.py
@@ -82,3 +82,13 @@ def test_rusboost_sample_weight(imbalanced_dataset):
 
     with pytest.raises(AssertionError):
         assert_array_equal(y_pred_no_sample_weight, y_pred_sample_weight)
+
+
+@pytest.mark.parametrize("algorithm", ["SAMME", "SAMME.R"])
+def test_rusboost_algorithm(imbalanced_dataset, algorithm):
+    X, y = imbalanced_dataset
+
+    rusboost = RUSBoostClassifier(algorithm=algorithm)
+    warn_msg = "`algorithm` parameter is deprecated in 0.12 and will be removed"
+    with pytest.warns(FutureWarning, match=warn_msg):
+        rusboost.fit(X, y)
diff --git a/imblearn/over_sampling/_smote/tests/test_smote_nc.py b/imblearn/over_sampling/_smote/tests/test_smote_nc.py
@@ -15,11 +15,6 @@
 
 from imblearn.over_sampling import SMOTENC
 
-# from imblearn.utils.estimator_checks import (
-#     _set_checking_parameters,
-#     check_param_validation,
-# )
-
 
 def data_heterogneous_ordered():
     rng = np.random.RandomState(42)
@@ -293,17 +288,6 @@ def test_smotenc_deprecation_ohe_():
         smote.ohe_
 
 
-# """ def test_smotenc_param_validation():
-#     """Check that we validate the parameters correctly since this estimator requires
-#     a specific parameter.
-#     """
-#     categorical_features = [0]
-#     smote = SMOTENC(categorical_features=categorical_features, random_state=0)
-#     name = smote.__class__.__name__
-#     _set_checking_parameters(smote)
-#     check_param_validation(name, smote) """
-
-
 def test_smotenc_bool_categorical():
     """Check that we don't try to early convert the full input data to numeric when
     handling a pandas dataframe.
diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py
@@ -29,6 +29,7 @@
     get_routing_for_object,
 )
 from sklearn.utils._param_validation import HasMethods
+from sklearn.utils.fixes import parse_version
 from sklearn.utils.metaestimators import available_if
 from sklearn.utils.validation import check_is_fitted, check_memory
 
@@ -38,6 +39,7 @@
     _raise_for_params,
     get_tags,
     process_routing,
+    sklearn_version,
     validate_params,
 )
 
@@ -55,7 +57,7 @@ def _raise_or_warn_if_not_fitted(estimator):
     """A context manager to make sure a NotFittedError is raised, if a sub-estimator
     raises the error.
     Otherwise, we raise a warning if the pipeline is not fitted, with the deprecation.
-    TODO(1.8): remove this context manager and replace with check_is_fitted.
+    TODO(0.15): remove this context manager and replace with check_is_fitted.
     """
     try:
         yield
@@ -70,7 +72,7 @@ def _raise_or_warn_if_not_fitted(estimator):
             (
                 "This Pipeline instance is not fitted yet. Call 'fit' with "
                 "appropriate arguments before using other methods such as transform, "
-                "predict, etc. This will raise an error in 1.8 instead of the current "
+                "predict, etc. This will raise an error in 0.15 instead of the current "
                 "warning."
             ),
             FutureWarning,
@@ -511,6 +513,13 @@ def fit(self, X, y=None, **params):
                 "`sklearn.set_config(enable_metadata_routing=True)`."
             )
 
+        if sklearn_version < parse_version("1.4") and self.transform_input is not None:
+            raise ValueError(
+                "The `transform_input` parameter is not supported in scikit-learn "
+                "versions prior to 1.4. Please upgrade to scikit-learn 1.4 or "
+                "later."
+            )
+
         routed_params = self._check_method_params(method="fit", props=params)
         Xt, yt = self._fit(X, y, routed_params, raw_params=params)
         with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)):
diff --git a/imblearn/tests/test_pipeline.py b/imblearn/tests/test_pipeline.py
@@ -1,6 +1,7 @@
 """
 Test the pipeline module.
 """
+
 # Authors: Guillaume Lemaitre <g.lemaitre58@gmail.com>
 #          Christos Aridas
 # License: MIT
@@ -15,7 +16,8 @@
 import pytest
 from joblib import Memory
 from pytest import raises
-from sklearn.base import BaseEstimator, clone
+from sklearn import config_context
+from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin, clone
 from sklearn.cluster import KMeans
 from sklearn.datasets import load_iris, make_classification
 from sklearn.decomposition import PCA
@@ -30,11 +32,13 @@
     assert_array_almost_equal,
     assert_array_equal,
 )
+from sklearn.utils.fixes import parse_version
 
 from imblearn.datasets import make_imbalance
 from imblearn.pipeline import Pipeline, make_pipeline
 from imblearn.under_sampling import EditedNearestNeighbours as ENN
 from imblearn.under_sampling import RandomUnderSampler
+from imblearn.utils._sklearn_compat import sklearn_version
 from imblearn.utils.estimator_checks import check_param_validation
 
 JUNK_FOOD_DOCS = (
@@ -1365,3 +1369,129 @@ def test_pipeline_with_set_output():
     assert isinstance(X_res, pd.DataFrame)
     # transformer will not change `y` and sampler will always preserve the type of `y`
     assert isinstance(y_res, type(y))
+
+
+# TODO(0.15): change warning to checking for NotFittedError
+@pytest.mark.parametrize(
+    "method",
+    [
+        "predict",
+        "predict_proba",
+        "predict_log_proba",
+        "decision_function",
+        "score",
+        "score_samples",
+        "transform",
+        "inverse_transform",
+    ],
+)
+def test_pipeline_warns_not_fitted(method):
+    class StatelessEstimator(BaseEstimator):
+        """Stateless estimator that doesn't check if it's fitted.
+        Stateless estimators that don't require fit, should properly set the
+        `requires_fit` flag and implement a `__sklearn_check_is_fitted__` returning
+        `True`.
+        """
+
+        def fit(self, X, y):
+            return self  # pragma: no cover
+
+        def transform(self, X):
+            return X
+
+        def predict(self, X):
+            return np.ones(len(X))
+
+        def predict_proba(self, X):
+            return np.ones(len(X))
+
+        def predict_log_proba(self, X):
+            return np.zeros(len(X))
+
+        def decision_function(self, X):
+            return np.ones(len(X))
+
+        def score(self, X, y):
+            return 1
+
+        def score_samples(self, X):
+            return np.ones(len(X))
+
+        def inverse_transform(self, X):
+            return X
+
+    pipe = Pipeline([("estimator", StatelessEstimator())])
+    with pytest.warns(FutureWarning, match="This Pipeline instance is not fitted yet."):
+        getattr(pipe, method)([[1]])
+
+
+# transform_input tests
+# =====================
+
+
+@pytest.mark.skipif(
+    sklearn_version < parse_version("1.4"),
+    reason="scikit-learn < 1.4 does not support transform_input",
+)
+@config_context(enable_metadata_routing=True)
+def test_transform_input_explicit_value_check():
+    """Test that the right transformed values are passed to `fit`."""
+
+    class Transformer(TransformerMixin, BaseEstimator):
+        def fit(self, X, y):
+            self.fitted_ = True
+            return self
+
+        def transform(self, X):
+            return X + 1
+
+    class Estimator(ClassifierMixin, BaseEstimator):
+        def fit(self, X, y, X_val=None, y_val=None):
+            assert_array_equal(X, np.array([[1, 2]]))
+            assert_array_equal(y, np.array([0, 1]))
+            assert_array_equal(X_val, np.array([[2, 3]]))
+            assert_array_equal(y_val, np.array([0, 1]))
+            return self
+
+    X = np.array([[0, 1]])
+    y = np.array([0, 1])
+    X_val = np.array([[1, 2]])
+    y_val = np.array([0, 1])
+    pipe = Pipeline(
+        [
+            ("transformer", Transformer()),
+            ("estimator", Estimator().set_fit_request(X_val=True, y_val=True)),
+        ],
+        transform_input=["X_val"],
+    )
+    pipe.fit(X, y, X_val=X_val, y_val=y_val)
+
+
+def test_transform_input_no_slep6():
+    """Make sure the right error is raised if slep6 is not enabled."""
+    X = np.array([[1, 2], [3, 4]])
+    y = np.array([0, 1])
+    msg = "The `transform_input` parameter can only be set if metadata"
+    with pytest.raises(ValueError, match=msg):
+        make_pipeline(DummyTransf(), transform_input=["blah"]).fit(X, y)
+
+
+@pytest.mark.skipif(
+    sklearn_version >= parse_version("1.4"),
+    reason="scikit-learn >= 1.4 supports transform_input",
+)
+@config_context(enable_metadata_routing=True)
+def test_transform_input_sklearn_version():
+    """Test that transform_input raises error with sklearn < 1.4."""
+    X = np.array([[1, 2], [3, 4]])
+    y = np.array([0, 1])
+    msg = (
+        "The `transform_input` parameter is not supported in scikit-learn versions "
+        "prior to 1.4"
+    )
+    with pytest.raises(ValueError, match=msg):
+        make_pipeline(DummyTransf(), transform_input=["blah"]).fit(X, y)
+
+
+# end of transform_input tests
+# =============================
diff --git a/imblearn/utils/_test_common/instance_generator.py b/imblearn/utils/_test_common/instance_generator.py
@@ -8,7 +8,6 @@
 from functools import partial
 from inspect import isfunction
 
-import sklearn
 from sklearn import clone, config_context
 from sklearn.exceptions import SkipTestWarning
 from sklearn.linear_model import LogisticRegression
@@ -42,10 +41,9 @@
     OneSidedSelection,
     RandomUnderSampler,
 )
+from imblearn.utils._sklearn_compat import sklearn_version
 from imblearn.utils.testing import all_estimators
 
-sklearn_version = parse_version(sklearn.__version__).base_version
-
 # The following dictionary is to indicate constructor arguments suitable for the test
 # suite, which uses very small datasets, and is intended to run rather quickly.
 INIT_PARAMS = {
@@ -232,7 +230,7 @@ def _yield_instances_for_check(check, estimator_orig):
     },
 }
 
-if sklearn_version < "1.4":
+if sklearn_version < parse_version("1.4"):
     for _, Estimator in all_estimators():
         if Estimator in PER_ESTIMATOR_XFAIL_CHECKS:
             PER_ESTIMATOR_XFAIL_CHECKS[Estimator]["check_estimators_pickle"] = "FIXME"