From ed7e6fc7569570cfbd2ce0f923b9c55209836b3d Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Sun, 10 Nov 2024 23:51:54 +0100
Subject: [PATCH 01/20] iter

---
 imblearn/base.py                              |  22 +-
 imblearn/ensemble/_bagging.py                 |  15 +-
 imblearn/ensemble/_easy_ensemble.py           |  20 +-
 imblearn/ensemble/_forest.py                  |  22 +-
 imblearn/metrics/pairwise.py                  |   7 +-
 .../over_sampling/_random_over_sampler.py     |   5 +-
 imblearn/over_sampling/_smote/base.py         |  13 +-
 imblearn/tests/test_common.py                 |  24 +-
 .../_random_under_sampler.py                  |   5 +-
 imblearn/utils/_tags.py                       |  13 +
 imblearn/utils/_test_common/__init__.py       |   0
 .../utils/_test_common/instance_generator.py  | 138 ++++++++
 imblearn/utils/_validation.py                 |  11 +-
 imblearn/utils/estimator_checks.py            | 305 +++++++++++++++---
 imblearn/utils/fixes.py                       |  38 ++-
 imblearn/utils/tests/test_estimator_checks.py |  12 +-
 16 files changed, 543 insertions(+), 107 deletions(-)
 create mode 100644 imblearn/utils/_tags.py
 create mode 100644 imblearn/utils/_test_common/__init__.py
 create mode 100644 imblearn/utils/_test_common/instance_generator.py

diff --git a/imblearn/base.py b/imblearn/base.py
index 6e3954532..18913667d 100644
--- a/imblearn/base.py
+++ b/imblearn/base.py
@@ -7,15 +7,26 @@
 from abc import ABCMeta, abstractmethod
 
 import numpy as np
+import sklearn
 from sklearn.base import BaseEstimator, OneToOneFeatureMixin
 from sklearn.preprocessing import label_binarize
+from sklearn.utils.metaestimators import available_if
 from sklearn.utils.multiclass import check_classification_targets
+from sklearn.utils.fixes import parse_version
 
 from .utils import check_sampling_strategy, check_target_type
+from .utils.fixes import validate_data
 from .utils._param_validation import validate_parameter_constraints
+from .utils._tags import InputTags
 from .utils._validation import ArraysTransformer
 
 
+def check_version():
+    return parse_version(
+        parse_version(sklearn.__version__).base_version
+    ) >= parse_version("1.6")
+
+
 class _ParamsValidationMixin:
     """Mixin class to validate parameters."""
 
@@ -147,7 +158,7 @@ def _check_X_y(self, X, y, accept_sparse=None):
         if accept_sparse is None:
             accept_sparse = ["csr", "csc"]
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
-        X, y = self._validate_data(X, y, reset=True, accept_sparse=accept_sparse)
+        X, y = validate_data(self, X=X, y=y, reset=True, accept_sparse=accept_sparse)
         return X, y, binarize_y
 
     def fit(self, X, y):
@@ -196,9 +207,18 @@ def fit_resample(self, X, y):
         self._validate_params()
         return super().fit_resample(X, y)
 
+    @available_if(check_version)
     def _more_tags(self):
         return {"X_types": ["2darray", "sparse", "dataframe"]}
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags = InputTags()
+        tags.input_tags.two_d_array = True
+        tags.input_tags.sparse = True
+        tags.input_tags.dataframe = True
+        return tags
+
 
 def _identity(X, y):
     return X, y
diff --git a/imblearn/ensemble/_bagging.py b/imblearn/ensemble/_bagging.py
index 79559cd2c..0ce2526ae 100644
--- a/imblearn/ensemble/_bagging.py
+++ b/imblearn/ensemble/_bagging.py
@@ -26,10 +26,10 @@
 from ..utils import Substitution, check_sampling_strategy, check_target_type
 from ..utils._docstring import _n_jobs_docstring, _random_state_docstring
 from ..utils._param_validation import HasMethods, Interval, StrOptions
-from ..utils.fixes import _fit_context
+from ..utils.fixes import _fit_context, validate_data
 from ._common import _bagging_parameter_constraints, _estimator_has
 
-sklearn_version = parse_version(sklearn.__version__)
+sklearn_version = parse_version(parse_version(sklearn.__version__).base_version)
 
 
 @Substitution(
@@ -382,12 +382,17 @@ def decision_function(self, X):
         check_is_fitted(self)
 
         # Check data
-        X = self._validate_data(
-            X,
+        if sklearn_version < parse_version("1.6"):
+            kwargs = {"force_all_finite": False}
+        else:
+            kwargs = {"ensure_all_finite": False}
+        X = validate_data(
+            self,
+            X=X,
             accept_sparse=["csr", "csc"],
             dtype=None,
-            force_all_finite=False,
             reset=False,
+            **kwargs
         )
 
         # Parallel loop
diff --git a/imblearn/ensemble/_easy_ensemble.py b/imblearn/ensemble/_easy_ensemble.py
index aec7f6837..78b1e842a 100644
--- a/imblearn/ensemble/_easy_ensemble.py
+++ b/imblearn/ensemble/_easy_ensemble.py
@@ -14,7 +14,6 @@
 from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier
 from sklearn.ensemble._bagging import _parallel_decision_function
 from sklearn.ensemble._base import _partition_estimators
-from sklearn.utils._tags import _safe_tags
 from sklearn.utils.fixes import parse_version
 from sklearn.utils.metaestimators import available_if
 from sklearn.utils.parallel import Parallel, delayed
@@ -27,11 +26,11 @@
 from ..utils import Substitution, check_sampling_strategy, check_target_type
 from ..utils._docstring import _n_jobs_docstring, _random_state_docstring
 from ..utils._param_validation import Interval, StrOptions
-from ..utils.fixes import _fit_context
+from ..utils.fixes import _fit_context, get_tags, validate_data
 from ._common import _bagging_parameter_constraints, _estimator_has
 
 MAX_INT = np.iinfo(np.int32).max
-sklearn_version = parse_version(sklearn.__version__)
+sklearn_version = parse_version(parse_version(sklearn.__version__).base_version)
 
 
 @Substitution(
@@ -311,12 +310,17 @@ def decision_function(self, X):
         check_is_fitted(self)
 
         # Check data
-        X = self._validate_data(
-            X,
+        if sklearn_version < parse_version("1.6"):
+            kwargs = {"force_all_finite": False}
+        else:
+            kwargs = {"ensure_all_finite": False}
+        X = validate_data(
+            self,
+            X=X,
             accept_sparse=["csr", "csc"],
             dtype=None,
-            force_all_finite=False,
             reset=False,
+            **kwargs,
         )
 
         # Parallel loop
@@ -351,4 +355,6 @@ def _get_estimator(self):
 
     # TODO: remove when minimum supported version of scikit-learn is 1.5
     def _more_tags(self):
-        return {"allow_nan": _safe_tags(self._get_estimator(), "allow_nan")}
+        # This code should not be called for scikit-learn >= 1.6
+        # Therefore, get_tags corresponds to _safe_tags that returns a dict
+        return {"allow_nan": get_tags(self._get_estimator(), "allow_nan")}
diff --git a/imblearn/ensemble/_forest.py b/imblearn/ensemble/_forest.py
index 587db01d8..5f1b700bc 100644
--- a/imblearn/ensemble/_forest.py
+++ b/imblearn/ensemble/_forest.py
@@ -35,11 +35,11 @@
 from ..utils._docstring import _n_jobs_docstring, _random_state_docstring
 from ..utils._param_validation import Hidden, Interval, StrOptions
 from ..utils._validation import check_sampling_strategy
-from ..utils.fixes import _fit_context
+from ..utils.fixes import _fit_context, validate_data
 from ._common import _random_forest_classifier_parameter_constraints
 
 MAX_INT = np.iinfo(np.int32).max
-sklearn_version = parse_version(sklearn.__version__)
+sklearn_version = parse_version(parse_version(sklearn.__version__).base_version)
 
 
 def _local_parallel_build_trees(
@@ -597,21 +597,25 @@ def fit(self, X, y, sample_weight=None):
         # TODO: remove when the minimum supported version of scipy will be 1.4
         # Support for missing values
         if parse_version(sklearn_version.base_version) >= parse_version("1.4"):
-            force_all_finite = False
+            if sklearn_version >= parse_version("1.6"):
+                kwargs = {"ensure_all_finite": False}
+            else:
+                kwargs = {"force_all_finite": False}
         else:
-            force_all_finite = True
+            kwargs = {"force_all_finite": False}
 
-        X, y = self._validate_data(
-            X,
-            y,
+        X, y = validate_data(
+            self,
+            X=X,
+            y=y,
             multi_output=True,
             accept_sparse="csc",
             dtype=DTYPE,
-            force_all_finite=force_all_finite,
+            **kwargs,
         )
 
         # TODO: remove when the minimum supported version of scikit-learn will be 1.4
-        if parse_version(sklearn_version.base_version) >= parse_version("1.4"):
+        if sklearn_version >= parse_version("1.4"):
             # _compute_missing_values_in_feature_mask checks if X has missing values and
             # will raise an error if the underlying tree base estimator can't handle
             # missing values. Only the criterion is required to determine if the tree
diff --git a/imblearn/metrics/pairwise.py b/imblearn/metrics/pairwise.py
index 766a6d399..802d726d4 100644
--- a/imblearn/metrics/pairwise.py
+++ b/imblearn/metrics/pairwise.py
@@ -14,6 +14,7 @@
 
 from ..base import _ParamsValidationMixin
 from ..utils._param_validation import StrOptions
+from ..utils.fixes import validate_data
 
 
 class ValueDifferenceMetric(_ParamsValidationMixin, BaseEstimator):
@@ -148,7 +149,7 @@ def fit(self, X, y):
         """
         self._validate_params()
         check_consistent_length(X, y)
-        X, y = self._validate_data(X, y, reset=True, dtype=np.int32)
+        X, y = validate_data(self, X=X, y=y, reset=True, dtype=np.int32)
 
         if isinstance(self.n_categories, str) and self.n_categories == "auto":
             # categories are expected to be encoded from 0 to n_categories - 1
@@ -207,11 +208,11 @@ def pairwise(self, X, Y=None):
             The VDM pairwise distance.
         """
         check_is_fitted(self)
-        X = self._validate_data(X, reset=False, dtype=np.int32)
+        X = validate_data(self, X=X, reset=False, dtype=np.int32)
         n_samples_X = X.shape[0]
 
         if Y is not None:
-            Y = self._validate_data(Y, reset=False, dtype=np.int32)
+            Y = validate_data(self, Y=Y, reset=False, dtype=np.int32)
             n_samples_Y = Y.shape[0]
         else:
             n_samples_Y = n_samples_X
diff --git a/imblearn/over_sampling/_random_over_sampler.py b/imblearn/over_sampling/_random_over_sampler.py
index 993788a42..71da059da 100644
--- a/imblearn/over_sampling/_random_over_sampler.py
+++ b/imblearn/over_sampling/_random_over_sampler.py
@@ -15,6 +15,7 @@
 from ..utils import Substitution, check_target_type
 from ..utils._docstring import _random_state_docstring
 from ..utils._param_validation import Interval
+from ..utils.fixes import _check_n_features, _check_feature_names
 from ..utils._validation import _check_X
 from .base import BaseOverSampler
 
@@ -156,8 +157,8 @@ def __init__(
     def _check_X_y(self, X, y):
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
         X = _check_X(X)
-        self._check_n_features(X, reset=True)
-        self._check_feature_names(X, reset=True)
+        _check_n_features(self, X, reset=True)
+        _check_feature_names(self, X, reset=True)
         return X, y, binarize_y
 
     def _fit_resample(self, X, y):
diff --git a/imblearn/over_sampling/_smote/base.py b/imblearn/over_sampling/_smote/base.py
index 95e10b246..dc2e565ec 100644
--- a/imblearn/over_sampling/_smote/base.py
+++ b/imblearn/over_sampling/_smote/base.py
@@ -32,7 +32,7 @@
 from ...utils._docstring import _n_jobs_docstring, _random_state_docstring
 from ...utils._param_validation import HasMethods, Interval, StrOptions
 from ...utils._validation import _check_X
-from ...utils.fixes import _is_pandas_df, _mode
+from ...utils.fixes import _check_n_features, _check_feature_names, _is_pandas_df, _mode, validate_data
 from ..base import BaseOverSampler
 
 sklearn_version = parse_version(sklearn.__version__).base_version
@@ -601,8 +601,8 @@ def _check_X_y(self, X, y):
         """
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
         X = _check_X(X)
-        self._check_n_features(X, reset=True)
-        self._check_feature_names(X, reset=True)
+        _check_n_features(self, X, reset=True)
+        _check_feature_names(self, X, reset=True)
         return X, y, binarize_y
 
     def _validate_column_types(self, X):
@@ -963,9 +963,10 @@ def __init__(
     def _check_X_y(self, X, y):
         """Check should accept strings and not sparse matrices."""
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
-        X, y = self._validate_data(
-            X,
-            y,
+        X, y = validate_data(
+            self,
+            X=X,
+            y=y,
             reset=True,
             dtype=None,
             accept_sparse=["csr", "csc"],
diff --git a/imblearn/tests/test_common.py b/imblearn/tests/test_common.py
index f04dd1d4c..4028f439a 100644
--- a/imblearn/tests/test_common.py
+++ b/imblearn/tests/test_common.py
@@ -1,4 +1,5 @@
 """Common tests"""
+
 # Authors: Guillaume Lemaitre <g.lemaitre58@gmail.com>
 #          Christos Aridas
 # License: MIT
@@ -10,8 +11,7 @@
 import pytest
 from sklearn.base import clone
 from sklearn.exceptions import ConvergenceWarning
-from sklearn.utils._testing import SkipTest, ignore_warnings, set_random_state
-from sklearn.utils.estimator_checks import _construct_instance, _get_check_estimator_ids
+from sklearn.utils._testing import ignore_warnings
 from sklearn.utils.estimator_checks import (
     parametrize_with_checks as parametrize_with_checks_sklearn,
 )
@@ -25,6 +25,10 @@
     parametrize_with_checks,
 )
 from imblearn.utils.testing import all_estimators
+from imblearn.utils._test_common.instance_generator import (
+    _get_check_estimator_ids,
+    _tested_estimators,
+)
 
 
 @pytest.mark.parametrize("name, Estimator", all_estimators())
@@ -34,22 +38,6 @@ def test_all_estimator_no_base_class(name, Estimator):
     assert not name.lower().startswith("base"), msg
 
 
-def _tested_estimators():
-    for name, Estimator in all_estimators():
-        try:
-            estimator = _construct_instance(Estimator)
-            set_random_state(estimator)
-        except SkipTest:
-            continue
-
-        if isinstance(estimator, NearMiss):
-            # For NearMiss, let's check the three algorithms
-            for version in (1, 2, 3):
-                yield clone(estimator).set_params(version=version)
-        else:
-            yield estimator
-
-
 @parametrize_with_checks_sklearn(list(_tested_estimators()))
 def test_estimators_compatibility_sklearn(estimator, check, request):
     _set_checking_parameters(estimator)
diff --git a/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py b/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
index 876195a6d..f914b7882 100644
--- a/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
+++ b/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
@@ -8,6 +8,7 @@
 from sklearn.utils import _safe_indexing, check_random_state
 
 from ...utils import Substitution, check_target_type
+from ...utils.fixes import _check_n_features, _check_feature_names
 from ...utils._docstring import _random_state_docstring
 from ...utils._validation import _check_X
 from ..base import BaseUnderSampler
@@ -99,8 +100,8 @@ def __init__(
     def _check_X_y(self, X, y):
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
         X = _check_X(X)
-        self._check_n_features(X, reset=True)
-        self._check_feature_names(X, reset=True)
+        _check_n_features(self, X, reset=True)
+        _check_feature_names(self, X, reset=True)
         return X, y, binarize_y
 
     def _fit_resample(self, X, y):
diff --git a/imblearn/utils/_tags.py b/imblearn/utils/_tags.py
new file mode 100644
index 000000000..5a43b4d52
--- /dev/null
+++ b/imblearn/utils/_tags.py
@@ -0,0 +1,13 @@
+from dataclasses import dataclass
+
+import sklearn
+from sklearn.utils.fixes import parse_version
+
+sklearn_version = parse_version(parse_version(sklearn.__version__).base_version)
+
+if sklearn_version >= parse_version("1.6"):
+    from sklearn.utils._tags import InputTags
+
+    @dataclass
+    class InputTags(InputTags):
+        dataframe: bool = True
diff --git a/imblearn/utils/_test_common/__init__.py b/imblearn/utils/_test_common/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/imblearn/utils/_test_common/instance_generator.py b/imblearn/utils/_test_common/instance_generator.py
new file mode 100644
index 000000000..455427967
--- /dev/null
+++ b/imblearn/utils/_test_common/instance_generator.py
@@ -0,0 +1,138 @@
+# Authors: Guillaume Lemaitre <g.lemaitre58@gmail.com>
+#          Christos Aridas
+# License: MIT
+
+import re
+import warnings
+from contextlib import suppress
+from functools import partial
+from inspect import isfunction
+
+from sklearn import clone, config_context
+from sklearn.linear_model import LogisticRegression
+from sklearn.exceptions import SkipTestWarning
+from sklearn.utils._testing import SkipTest
+
+from imblearn.over_sampling import SMOTENC
+from imblearn.pipeline import Pipeline
+from imblearn.under_sampling import NearMiss, RandomUnderSampler
+from imblearn.utils.testing import all_estimators
+
+# The following dictionary is to indicate constructor arguments suitable for the test
+# suite, which uses very small datasets, and is intended to run rather quickly.
+INIT_PARAMS = {
+    NearMiss: [dict(version=1), dict(version=2), dict(version=3)],
+    Pipeline: dict(
+        steps=[("sampler", RandomUnderSampler()), ("logistic", LogisticRegression())]
+    ),
+    SMOTENC: dict(categorical_features=[0]),
+}
+
+# This dictionary stores parameters for specific checks. It also enables running the
+# same check with multiple instances of the same estimator with different parameters.
+# The special key "*" allows to apply the parameters to all checks.
+# TODO(devtools): allow third-party developers to pass test specific params to checks
+PER_ESTIMATOR_CHECK_PARAMS: dict = {}
+
+SKIPPED_ESTIMATORS = []
+
+
+def _tested_estimators(type_filter=None):
+    for _, Estimator in all_estimators(type_filter=type_filter):
+        with suppress(SkipTest):
+            for estimator in _construct_instances(Estimator):
+                yield estimator
+
+
+def _construct_instances(Estimator):
+    """Construct Estimator instances if possible.
+
+    If parameter sets in INIT_PARAMS are provided, use them. If there are a list
+    of parameter sets, return one instance for each set.
+    """
+    if Estimator in SKIPPED_ESTIMATORS:
+        msg = f"Can't instantiate estimator {Estimator.__name__}"
+        # raise additional warning to be shown by pytest
+        warnings.warn(msg, SkipTestWarning)
+        raise SkipTest(msg)
+
+    if Estimator in INIT_PARAMS:
+        param_sets = INIT_PARAMS[Estimator]
+        if not isinstance(param_sets, list):
+            param_sets = [param_sets]
+        for params in param_sets:
+            est = Estimator(**params)
+            yield est
+    else:
+        yield Estimator()
+
+
+def _get_check_estimator_ids(obj):
+    """Create pytest ids for checks.
+
+    When `obj` is an estimator, this returns the pprint version of the
+    estimator (with `print_changed_only=True`). When `obj` is a function, the
+    name of the function is returned with its keyword arguments.
+
+    `_get_check_estimator_ids` is designed to be used as the `id` in
+    `pytest.mark.parametrize` where `check_estimator(..., generate_only=True)`
+    is yielding estimators and checks.
+
+    Parameters
+    ----------
+    obj : estimator or function
+        Items generated by `check_estimator`.
+
+    Returns
+    -------
+    id : str or None
+
+    See Also
+    --------
+    check_estimator
+    """
+    if isfunction(obj):
+        return obj.__name__
+    if isinstance(obj, partial):
+        if not obj.keywords:
+            return obj.func.__name__
+        kwstring = ",".join(["{}={}".format(k, v) for k, v in obj.keywords.items()])
+        return "{}({})".format(obj.func.__name__, kwstring)
+    if hasattr(obj, "get_params"):
+        with config_context(print_changed_only=True):
+            return re.sub(r"\s", "", str(obj))
+
+
+def _yield_instances_for_check(check, estimator_orig):
+    """Yield instances for a check.
+
+    For most estimators, this is a no-op.
+
+    For estimators which have an entry in PER_ESTIMATOR_CHECK_PARAMS, this will yield
+    an estimator for each parameter set in PER_ESTIMATOR_CHECK_PARAMS[estimator].
+    """
+    # TODO(devtools): enable this behavior for third party estimators as well
+    if type(estimator_orig) not in PER_ESTIMATOR_CHECK_PARAMS:
+        yield estimator_orig
+        return
+
+    check_params = PER_ESTIMATOR_CHECK_PARAMS[type(estimator_orig)]
+
+    try:
+        check_name = check.__name__
+    except AttributeError:
+        # partial tests
+        check_name = check.func.__name__
+
+    if check_name not in check_params:
+        yield estimator_orig
+        return
+
+    param_set = check_params[check_name]
+    if isinstance(param_set, dict):
+        param_set = [param_set]
+
+    for params in param_set:
+        estimator = clone(estimator_orig)
+        estimator.set_params(**params)
+        yield estimator
diff --git a/imblearn/utils/_validation.py b/imblearn/utils/_validation.py
index bb17cf015..66e637763 100644
--- a/imblearn/utils/_validation.py
+++ b/imblearn/utils/_validation.py
@@ -11,9 +11,11 @@
 
 import numpy as np
 from scipy.sparse import issparse
+import sklearn
 from sklearn.base import clone
 from sklearn.neighbors import NearestNeighbors
 from sklearn.utils import check_array, column_or_1d
+from sklearn.utils.fixes import parse_version
 from sklearn.utils.multiclass import type_of_target
 from sklearn.utils.validation import _num_samples
 
@@ -28,6 +30,8 @@
 )
 TARGET_KIND = ("binary", "multiclass", "multilabel-indicator")
 
+sklearn_version = parse_version(parse_version(sklearn.__version__).base_version)
+
 
 class ArraysTransformer:
     """A class to convert sampler output arrays to their original types."""
@@ -643,6 +647,11 @@ def _check_X(X):
         )
     if _is_pandas_df(X):
         return X
+    if sklearn_version >= parse_version("1.6"):
+        kwargs = {"ensure_all_finite": False}
+    else:
+        kwargs = {"force_all_finite": False}
+
     return check_array(
-        X, dtype=None, accept_sparse=["csr", "csc"], force_all_finite=False
+        X, dtype=None, accept_sparse=["csr", "csc"], **kwargs
     )
diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py
index fc58c321c..83793a443 100644
--- a/imblearn/utils/estimator_checks.py
+++ b/imblearn/utils/estimator_checks.py
@@ -9,7 +9,8 @@
 import traceback
 import warnings
 from collections import Counter
-from functools import partial
+from dataclasses import is_dataclass
+from functools import partial, wraps
 
 import numpy as np
 import sklearn
@@ -24,7 +25,6 @@
 )
 from sklearn.exceptions import SkipTestWarning
 from sklearn.preprocessing import StandardScaler, label_binarize
-from sklearn.utils._tags import _safe_tags
 from sklearn.utils._testing import (
     SkipTest,
     assert_allclose,
@@ -33,10 +33,7 @@
     set_random_state,
 )
 from sklearn.utils.estimator_checks import (
-    _enforce_estimator_tags_X,
-    _enforce_estimator_tags_y,
-    _get_check_estimator_ids,
-    _maybe_mark_xfail,
+    _enforce_estimator_tags_X, _enforce_estimator_tags_y
 )
 from sklearn.utils.fixes import parse_version
 from sklearn.utils.multiclass import type_of_target
@@ -45,6 +42,10 @@
 from imblearn.over_sampling.base import BaseOverSampler
 from imblearn.under_sampling.base import BaseCleaningSampler, BaseUnderSampler
 from imblearn.utils._param_validation import generate_invalid_param_val, make_constraint
+from imblearn.utils._test_common.instance_generator import (
+    _get_check_estimator_ids, _yield_instances_for_check
+)
+from imblearn.utils.fixes import get_tags
 
 sklearn_version = parse_version(sklearn.__version__)
 
@@ -80,20 +81,32 @@ def _set_checking_parameters(estimator):
 
 
 def _yield_sampler_checks(sampler):
-    tags = sampler._get_tags()
+    tags = get_tags(sampler)
+    if is_dataclass(tags):
+        # scikit-learn >= 1.6
+        accept_sparse = tags.input_tags.sparse
+        accept_dataframe = tags.input_tags.dataframe
+        accept_string = tags.input_tags.string
+        allow_nan = tags.input_tags.allow_nan
+    else:
+        # scikit-learn < 1.6
+        accept_sparse = sparse in tags["X_types"]
+        accept_dataframe = "dataframe" in tags["X_types"]
+        accept_string = "string" in tags["X_types"]
+        allow_nan = tags["allow_nan"]
     yield check_target_type
     yield check_samplers_one_label
     yield check_samplers_fit
     yield check_samplers_fit_resample
     yield check_samplers_sampling_strategy_fit_resample
-    if "sparse" in tags["X_types"]:
+    if accept_sparse:
         yield check_samplers_sparse
-    if "dataframe" in tags["X_types"]:
+    if accept_dataframe:
         yield check_samplers_pandas
         yield check_samplers_pandas_sparse
-    if "string" in tags["X_types"]:
+    if accept_string:
         yield check_samplers_string
-    if tags["allow_nan"]:
+    if allow_nan:
         yield check_samplers_nan
     yield check_samplers_list
     yield check_samplers_multiclass_ova
@@ -112,10 +125,17 @@ def _yield_classifier_checks(classifier):
     yield check_classifiers_with_encoded_labels
 
 
-def _yield_all_checks(estimator):
+def _yield_all_checks(estimator, legacy=True):
     name = estimator.__class__.__name__
-    tags = estimator._get_tags()
-    if tags["_skip_test"]:
+    tags = get_tags(estimator)
+
+    if is_dataclass(tags):
+        # scikit-learn >= 1.6
+        skip_test = tags._skip_test
+    else:
+        # scikit-learn < 1.6
+        skip_test = tags["_skip_test"]
+    if skip_test:
         warnings.warn(
             f"Explicit SKIP via _skip_test tag for estimator {name}.",
             SkipTestWarning,
@@ -130,9 +150,139 @@ def _yield_all_checks(estimator):
             yield check
 
 
-def parametrize_with_checks(estimators):
+def _check_name(check):
+    if hasattr(check, "__wrapped__"):
+        return _check_name(check.__wrapped__)
+    return check.func.__name__ if isinstance(check, partial) else check.__name__
+
+
+def _maybe_mark(estimator, check, expected_failed_checks=None, mark=None, pytest=None):
+    """Mark the test as xfail or skip if needed.
+
+    Parameters
+    ----------
+    estimator : estimator object
+        Estimator instance for which to generate checks.
+    check : partial or callable
+        Check to be marked.
+    expected_failed_checks : dict[str, str], default=None
+        Dictionary of the form {check_name: reason} for checks that are expected to
+        fail.
+    mark : "xfail" or "skip" or None
+        Whether to mark the check as xfail or skip.
+    pytest : pytest module, default=None
+        Pytest module to use to mark the check. This is only needed if ``mark`` is
+        `"xfail"`. Note that one can run `check_estimator` without having `pytest`
+        installed. This is used in combination with `parametrize_with_checks` only.
+    """
+    should_be_marked, reason = _should_be_skipped_or_marked(
+        estimator, check, expected_failed_checks
+    )
+    if not should_be_marked or mark is None:
+        return estimator, check
+
+    estimator_name = estimator.__class__.__name__
+    if mark == "xfail":
+        return pytest.param(estimator, check, marks=pytest.mark.xfail(reason=reason))
+    else:
+
+        @wraps(check)
+        def wrapped(*args, **kwargs):
+            raise SkipTest(
+                f"Skipping {_check_name(check)} for {estimator_name}: {reason}"
+            )
+
+        return estimator, wrapped
+
+
+def _should_be_skipped_or_marked(
+    estimator, check, expected_failed_checks: dict[str, str] | None = None
+) -> tuple[bool, str]:
+    """Check whether a check should be skipped or marked as xfail.
+
+    Parameters
+    ----------
+    estimator : estimator object
+        Estimator instance for which to generate checks.
+    check : partial or callable
+        Check to be marked.
+    expected_failed_checks : dict[str, str], default=None
+        Dictionary of the form {check_name: reason} for checks that are expected to
+        fail.
+
+    Returns
+    -------
+    should_be_marked : bool
+        Whether the check should be marked as xfail or skipped.
+    reason : str
+        Reason for skipping the check.
+    """
+
+    expected_failed_checks = expected_failed_checks or {}
+
+    check_name = _check_name(check)
+    if check_name in expected_failed_checks:
+        return True, expected_failed_checks[check_name]
+
+    return False, "Check is not expected to fail"
+
+
+def estimator_checks_generator(
+    estimator, *, legacy=True, expected_failed_checks=None, mark=None
+):
+    """Iteratively yield all check callables for an estimator.
+
+    .. versionadded:: 1.6
+
+    Parameters
+    ----------
+    estimator : estimator object
+        Estimator instance for which to generate checks.
+    legacy : bool, default=True
+        Whether to include legacy checks. Over time we remove checks from this category
+        and move them into their specific category.
+    expected_failed_checks : dict[str, str], default=None
+        Dictionary of the form {check_name: reason} for checks that are expected to
+        fail.
+    mark : {"xfail", "skip"} or None, default=None
+        Whether to mark the checks that are expected to fail as
+        xfail(`pytest.mark.xfail`) or skip. Marking a test as "skip" is done via
+        wrapping the check in a function that raises a
+        :class:`~sklearn.exceptions.SkipTest` exception.
+
+    Returns
+    -------
+    estimator_checks_generator : generator
+        Generator that yields (estimator, check) tuples.
+    """
+    if mark == "xfail":
+        import pytest
+    else:
+        pytest = None  # type: ignore
+
+    name = type(estimator).__name__
+    for check in _yield_all_checks(estimator, legacy=legacy):
+        check_with_name = partial(check, name)
+        for check_instance in _yield_instances_for_check(check, estimator):
+            yield _maybe_mark(
+                check_instance,
+                check_with_name,
+                expected_failed_checks=expected_failed_checks,
+                mark=mark,
+                pytest=pytest,
+            )
+
+
+def parametrize_with_checks(estimators, *, legacy=True, expected_failed_checks=None):
     """Pytest specific decorator for parametrizing estimator checks.
 
+    Checks are categorised into the following groups:
+
+    - API checks: a set of checks to ensure API compatibility with scikit-learn.
+      Refer to https://scikit-learn.org/dev/developers/develop.html a requirement of
+      scikit-learn estimators.
+    - legacy: a set of checks which gradually will be grouped into other categories.
+
     The `id` of each check is set to be a pprint version of the estimator
     and the name of the check with its keyword arguments.
     This allows to use `pytest -k` to specify which tests to run::
@@ -144,10 +294,41 @@ def parametrize_with_checks(estimators):
     estimators : list of estimators instances
         Estimators to generated checks for.
 
+        .. versionchanged:: 0.24
+           Passing a class was deprecated in version 0.23, and support for
+           classes was removed in 0.24. Pass an instance instead.
+
+        .. versionadded:: 0.24
+
+
+    legacy : bool, default=True
+        Whether to include legacy checks. Over time we remove checks from this category
+        and move them into their specific category.
+
+        .. versionadded:: 1.6
+
+    expected_failed_checks : callable, default=None
+        A callable that takes an estimator as input and returns a dictionary of the
+        form::
+
+            {
+                "check_name": "my reason",
+            }
+
+        Where `"check_name"` is the name of the check, and `"my reason"` is why
+        the check fails. These tests will be marked as xfail if the check fails.
+
+
+        .. versionadded:: 1.6
+
     Returns
     -------
     decorator : `pytest.mark.parametrize`
 
+    See Also
+    --------
+    check_estimator : Check if estimator adheres to scikit-learn conventions.
+
     Examples
     --------
     >>> from sklearn.utils.estimator_checks import parametrize_with_checks
@@ -158,18 +339,29 @@ def parametrize_with_checks(estimators):
     ...                           DecisionTreeRegressor()])
     ... def test_sklearn_compatible_estimator(estimator, check):
     ...     check(estimator)
+
     """
     import pytest
 
-    def checks_generator():
+    if any(isinstance(est, type) for est in estimators):
+        msg = (
+            "Passing a class was deprecated in version 0.23 "
+            "and isn't supported anymore from 0.24."
+            "Please pass an instance instead."
+        )
+        raise TypeError(msg)
+
+    def _checks_generator(estimators, legacy, expected_failed_checks):
         for estimator in estimators:
-            name = type(estimator).__name__
-            for check in _yield_all_checks(estimator):
-                check = partial(check, name)
-                yield _maybe_mark_xfail(estimator, check, pytest)
+            args = {"estimator": estimator, "legacy": legacy, "mark": "xfail"}
+            if callable(expected_failed_checks):
+                args["expected_failed_checks"] = expected_failed_checks(estimator)
+            yield from estimator_checks_generator(**args)
 
     return pytest.mark.parametrize(
-        "estimator, check", checks_generator(), ids=_get_check_estimator_ids
+        "estimator, check",
+        _checks_generator(estimators, legacy, expected_failed_checks),
+        ids=_get_check_estimator_ids,
     )
 
 
@@ -404,7 +596,12 @@ def check_samplers_sample_indices(name, sampler_orig):
     sampler = clone(sampler_orig)
     X, y = sample_dataset_generator()
     sampler.fit_resample(X, y)
-    sample_indices = sampler._get_tags().get("sample_indices", None)
+    tags = get_tags(sampler)
+    if is_dataclass(tags):
+        sample_indices = getattr(tags, "sample_indices", None)
+    else:
+        # scikit-learn < 1.6
+        sample_indices = tags.get("sample_indices", None)
     if sample_indices:
         assert hasattr(sampler, "sample_indices_") is sample_indices
     else:
@@ -529,14 +726,7 @@ def check_param_validation(name, estimator_orig):
                 continue
 
             with raises(ValueError, match=match, err_msg=err_msg):
-                if any(
-                    isinstance(X_type, str) and X_type.endswith("labels")
-                    for X_type in _safe_tags(estimator, key="X_types")
-                ):
-                    # The estimator is a label transformer and take only `y`
-                    getattr(estimator, method)(y)  # pragma: no cover
-                else:
-                    getattr(estimator, method)(X, y)
+                getattr(estimator, method)(X, y)
 
         # Then, for constraints that are more than a type constraint, check that the
         # error is raised if param does match a valid type but does not match any valid
@@ -557,14 +747,7 @@ def check_param_validation(name, estimator_orig):
                     continue
 
                 with raises(ValueError, match=match, err_msg=err_msg):
-                    if any(
-                        X_type.endswith("labels")
-                        for X_type in _safe_tags(estimator, key="X_types")
-                    ):
-                        # The estimator is a label transformer and take only `y`
-                        getattr(estimator, method)(y)  # pragma: no cover
-                    else:
-                        getattr(estimator, method)(X, y)
+                    getattr(estimator, method)(X, y)
 
 
 def check_dataframe_column_names_consistency(name, estimator_orig):
@@ -575,12 +758,22 @@ def check_dataframe_column_names_consistency(name, estimator_orig):
             "pandas is not installed: not checking column name consistency for pandas"
         )
 
-    tags = _safe_tags(estimator_orig)
-    is_supported_X_types = (
-        "2darray" in tags["X_types"] or "categorical" in tags["X_types"]
-    )
+    tags = get_tags(estimator_orig)
 
-    if not is_supported_X_types or tags["no_validation"]:
+    if is_dataclass(tags):
+        # scikit-learn >= 1.6
+        is_supported_X_types = (
+            tags.input_tags.two_d_array or tags.input_tags.categorical
+        )
+        no_validation = tags.no_validation
+    else:
+        # scikit-learn < 1.6
+        is_supported_X_types = (
+            "2darray" in tags["X_types"] or "categorical" in tags["X_types"]
+        )
+        no_validation = tags["no_validation"]
+
+    if not is_supported_X_types or no_validation:
         return
 
     rng = np.random.RandomState(0)
@@ -711,8 +904,18 @@ def check_dataframe_column_names_consistency(name, estimator_orig):
 
 
 def check_sampler_get_feature_names_out(name, sampler_orig):
-    tags = sampler_orig._get_tags()
-    if "2darray" not in tags["X_types"] or tags["no_validation"]:
+    tags = get_tags(sampler_orig)
+
+    if is_dataclass(tags):
+        # scikit-learn >= 1.6
+        two_d_array = tags.input_tags.two_d_array
+        no_validation = tags.no_validation
+    else:
+        # scikit-learn < 1.6
+        two_d_array = "2darray" in tags["X_types"]
+        no_validation = tags["no_validation"]
+
+    if not two_d_array or no_validation:
         return
 
     X, y = make_blobs(
@@ -759,8 +962,16 @@ def check_sampler_get_feature_names_out_pandas(name, sampler_orig):
             "pandas is not installed: not checking column name consistency for pandas"
         )
 
-    tags = sampler_orig._get_tags()
-    if "2darray" not in tags["X_types"] or tags["no_validation"]:
+    tags = get_tags(sampler_orig)
+    if is_dataclass(tags):
+        # scikit-learn >= 1.6
+        two_d_array = tags.input_tags.two_d_array
+        no_validation = tags.no_validation
+    else:
+        # scikit-learn < 1.6
+        two_d_array = "2darray" in tags["X_types"]
+        no_validation = tags["no_validation"]
+    if not two_d_array or no_validation:
         return
 
     X, y = make_blobs(
diff --git a/imblearn/utils/fixes.py b/imblearn/utils/fixes.py
index 9e4852566..94d1c03f8 100644
--- a/imblearn/utils/fixes.py
+++ b/imblearn/utils/fixes.py
@@ -15,7 +15,7 @@
 from .._config import config_context, get_config
 
 sp_version = parse_version(scipy.__version__)
-sklearn_version = parse_version(sklearn.__version__)
+sklearn_version = parse_version(parse_version(sklearn.__version__).base_version)
 
 
 # TODO: Remove when SciPy 1.9 is the minimum supported version
@@ -136,3 +136,39 @@ def _is_pandas_df(X):
                 return False
             return isinstance(X, pd.DataFrame)
         return False
+
+
+if sklearn_version < parse_version("1.6"):
+    from sklearn.utils._tags import _safe_tags as get_tags
+else:
+    from sklearn.utils import get_tags
+
+if sklearn_version < parse_version("1.6"):
+    def validate_data(
+        _estimator,
+        /,
+        X="no_validation",
+        y="no_validation",
+        reset=True,
+        validate_separately=False,
+        skip_check_array=False,
+        **check_params,
+    ):
+        return _estimator._validate_data(
+            X, y, reset, validate_separately, skip_check_array, **check_params
+        )
+else:
+    from sklearn.utils.validation import validate_data  # type: ignore[no-redef]
+
+
+if sklearn_version < parse_version("1.6"):
+    def _check_n_features(estimator, X, *, reset):
+        return estimator._check_n_features(X, reset=reset)
+else:
+    from sklearn.utils.validation import _check_n_features  # type: ignore[no-redef]
+
+if sklearn_version < parse_version("1.6"):
+    def _check_feature_names(estimator, X, *, reset):
+        return estimator._check_feature_names(X, reset=reset)
+else:
+    from sklearn.utils.validation import _check_feature_names  # type: ignore[no-redef]
diff --git a/imblearn/utils/tests/test_estimator_checks.py b/imblearn/utils/tests/test_estimator_checks.py
index 32e9c6723..0ebd495a1 100644
--- a/imblearn/utils/tests/test_estimator_checks.py
+++ b/imblearn/utils/tests/test_estimator_checks.py
@@ -15,6 +15,7 @@
     check_samplers_string,
     check_target_type,
 )
+from imblearn.utils.fixes import validate_data
 
 
 class BaseBadSampler(BaseEstimator):
@@ -47,7 +48,7 @@ class NotFittedSampler(BaseBadSampler):
     """Sampler without target checking."""
 
     def fit(self, X, y):
-        X, y = self._validate_data(X, y)
+        X, y = validate_data(self, X=X, y=y)
         return self
 
 
@@ -55,7 +56,7 @@ class NoAcceptingSparseSampler(BaseBadSampler):
     """Sampler which does not accept sparse matrix."""
 
     def fit(self, X, y):
-        X, y = self._validate_data(X, y)
+        X, y = validate_data(self, X=X, y=y)
         self.sampling_strategy_ = "sampling_strategy_"
         return self
 
@@ -72,9 +73,10 @@ def _fit_resample(self, X, y):
 class IndicesSampler(BaseOverSampler):
     def _check_X_y(self, X, y):
         y, binarize_y = target_check(y, indicate_one_vs_all=True)
-        X, y = self._validate_data(
-            X,
-            y,
+        X, y = validate_data(
+            self,
+            X=X,
+            y=y,
             reset=True,
             dtype=None,
             force_all_finite=False,

From 3d25e4738793354140f48be266f9fcd92c5eaf5d Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Mon, 11 Nov 2024 11:17:13 +0100
Subject: [PATCH 02/20] fix

---
 imblearn/base.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/imblearn/base.py b/imblearn/base.py
index 18913667d..b47e47788 100644
--- a/imblearn/base.py
+++ b/imblearn/base.py
@@ -17,7 +17,6 @@
 from .utils import check_sampling_strategy, check_target_type
 from .utils.fixes import validate_data
 from .utils._param_validation import validate_parameter_constraints
-from .utils._tags import InputTags
 from .utils._validation import ArraysTransformer
 
 
@@ -213,6 +212,8 @@ def _more_tags(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+
+        from .utils._tags import InputTags
         tags.input_tags = InputTags()
         tags.input_tags.two_d_array = True
         tags.input_tags.sparse = True

From eaa6873afcb5114dd40178076493ee19f75fdca0 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Mon, 11 Nov 2024 11:43:36 +0100
Subject: [PATCH 03/20] iter

---
 imblearn/utils/fixes.py | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/imblearn/utils/fixes.py b/imblearn/utils/fixes.py
index 94d1c03f8..a38d29847 100644
--- a/imblearn/utils/fixes.py
+++ b/imblearn/utils/fixes.py
@@ -144,19 +144,8 @@ def _is_pandas_df(X):
     from sklearn.utils import get_tags
 
 if sklearn_version < parse_version("1.6"):
-    def validate_data(
-        _estimator,
-        /,
-        X="no_validation",
-        y="no_validation",
-        reset=True,
-        validate_separately=False,
-        skip_check_array=False,
-        **check_params,
-    ):
-        return _estimator._validate_data(
-            X, y, reset, validate_separately, skip_check_array, **check_params
-        )
+    def validate_data(_estimator, **kwargs):
+        return _estimator._validate_data(**kwargs)
 else:
     from sklearn.utils.validation import validate_data  # type: ignore[no-redef]
 

From 92924eb8af02d257dca4d3efa37519cb7c0370cb Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Mon, 11 Nov 2024 11:53:48 +0100
Subject: [PATCH 04/20] iter

---
 imblearn/metrics/pairwise.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/imblearn/metrics/pairwise.py b/imblearn/metrics/pairwise.py
index 802d726d4..6f840ed6b 100644
--- a/imblearn/metrics/pairwise.py
+++ b/imblearn/metrics/pairwise.py
@@ -10,7 +10,7 @@
 from sklearn.base import BaseEstimator
 from sklearn.utils import check_consistent_length
 from sklearn.utils.multiclass import unique_labels
-from sklearn.utils.validation import check_is_fitted
+from sklearn.utils.validation import check_array, check_is_fitted
 
 from ..base import _ParamsValidationMixin
 from ..utils._param_validation import StrOptions
@@ -208,11 +208,11 @@ def pairwise(self, X, Y=None):
             The VDM pairwise distance.
         """
         check_is_fitted(self)
-        X = validate_data(self, X=X, reset=False, dtype=np.int32)
+        X = check_array(X, dtype=np.int32)
         n_samples_X = X.shape[0]
 
         if Y is not None:
-            Y = validate_data(self, Y=Y, reset=False, dtype=np.int32)
+            Y = check_array(Y, dtype=np.int32)
             n_samples_Y = Y.shape[0]
         else:
             n_samples_Y = n_samples_X

From 176b61435db8bfef07acd799d692989d985731a8 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Mon, 11 Nov 2024 11:59:07 +0100
Subject: [PATCH 05/20] fix

---
 imblearn/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/imblearn/base.py b/imblearn/base.py
index b47e47788..cb4172f7d 100644
--- a/imblearn/base.py
+++ b/imblearn/base.py
@@ -20,7 +20,7 @@
 from .utils._validation import ArraysTransformer
 
 
-def check_version():
+def check_version(estimator):
     return parse_version(
         parse_version(sklearn.__version__).base_version
     ) >= parse_version("1.6")

From fa206e4047523c18719308d805a02e868d78b0e8 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Mon, 11 Nov 2024 12:03:52 +0100
Subject: [PATCH 06/20] real fix

---
 imblearn/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/imblearn/base.py b/imblearn/base.py
index cb4172f7d..acdce49b7 100644
--- a/imblearn/base.py
+++ b/imblearn/base.py
@@ -23,7 +23,7 @@
 def check_version(estimator):
     return parse_version(
         parse_version(sklearn.__version__).base_version
-    ) >= parse_version("1.6")
+    ) < parse_version("1.6")
 
 
 class _ParamsValidationMixin:

From c1514dc6a995e4173d3649bd1ea267c75d01d9f5 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Mon, 11 Nov 2024 20:05:40 +0100
Subject: [PATCH 07/20] fix _more_tags

---
 imblearn/base.py                              | 13 ++----
 imblearn/ensemble/_bagging.py                 |  3 +-
 imblearn/ensemble/_easy_ensemble.py           |  3 +-
 imblearn/ensemble/_forest.py                  |  4 +-
 imblearn/metrics/pairwise.py                  |  4 +-
 imblearn/over_sampling/_adasyn.py             |  3 ++
 .../over_sampling/_random_over_sampler.py     |  4 +-
 imblearn/over_sampling/_smote/base.py         | 11 ++++-
 .../_cluster_centroids.py                     |  3 ++
 .../_condensed_nearest_neighbour.py           |  3 ++
 .../_edited_nearest_neighbours.py             |  6 ++-
 .../_instance_hardness_threshold.py           |  3 ++
 .../_prototype_selection/_nearmiss.py         |  3 ++
 .../_neighbourhood_cleaning_rule.py           |  3 ++
 .../_one_sided_selection.py                   |  3 ++
 .../_random_under_sampler.py                  |  8 +++-
 .../_prototype_selection/_tomek_links.py      |  3 ++
 imblearn/utils/fixes.py                       | 41 +++++++++++++++++++
 18 files changed, 103 insertions(+), 18 deletions(-)

diff --git a/imblearn/base.py b/imblearn/base.py
index acdce49b7..5bc784f3f 100644
--- a/imblearn/base.py
+++ b/imblearn/base.py
@@ -12,20 +12,12 @@
 from sklearn.preprocessing import label_binarize
 from sklearn.utils.metaestimators import available_if
 from sklearn.utils.multiclass import check_classification_targets
-from sklearn.utils.fixes import parse_version
 
 from .utils import check_sampling_strategy, check_target_type
-from .utils.fixes import validate_data
+from .utils.fixes import check_version_package, validate_data
 from .utils._param_validation import validate_parameter_constraints
 from .utils._validation import ArraysTransformer
 
-
-def check_version(estimator):
-    return parse_version(
-        parse_version(sklearn.__version__).base_version
-    ) < parse_version("1.6")
-
-
 class _ParamsValidationMixin:
     """Mixin class to validate parameters."""
 
@@ -206,10 +198,11 @@ def fit_resample(self, X, y):
         self._validate_params()
         return super().fit_resample(X, y)
 
-    @available_if(check_version)
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"X_types": ["2darray", "sparse", "dataframe"]}
 
+    @available_if(check_version_package("sklearn", ">=", "1.6"))
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
 
diff --git a/imblearn/ensemble/_bagging.py b/imblearn/ensemble/_bagging.py
index 0ce2526ae..974759fa3 100644
--- a/imblearn/ensemble/_bagging.py
+++ b/imblearn/ensemble/_bagging.py
@@ -26,7 +26,7 @@
 from ..utils import Substitution, check_sampling_strategy, check_target_type
 from ..utils._docstring import _n_jobs_docstring, _random_state_docstring
 from ..utils._param_validation import HasMethods, Interval, StrOptions
-from ..utils.fixes import _fit_context, validate_data
+from ..utils.fixes import _fit_context, check_version_package, validate_data
 from ._common import _bagging_parameter_constraints, _estimator_has
 
 sklearn_version = parse_version(parse_version(sklearn.__version__).base_version)
@@ -420,6 +420,7 @@ def base_estimator_(self):
         )
         raise error
 
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         tags = super()._more_tags()
         tags_key = "_xfail_checks"
diff --git a/imblearn/ensemble/_easy_ensemble.py b/imblearn/ensemble/_easy_ensemble.py
index 78b1e842a..cea77690f 100644
--- a/imblearn/ensemble/_easy_ensemble.py
+++ b/imblearn/ensemble/_easy_ensemble.py
@@ -26,7 +26,7 @@
 from ..utils import Substitution, check_sampling_strategy, check_target_type
 from ..utils._docstring import _n_jobs_docstring, _random_state_docstring
 from ..utils._param_validation import Interval, StrOptions
-from ..utils.fixes import _fit_context, get_tags, validate_data
+from ..utils.fixes import _fit_context, check_version_package, get_tags, validate_data
 from ._common import _bagging_parameter_constraints, _estimator_has
 
 MAX_INT = np.iinfo(np.int32).max
@@ -354,6 +354,7 @@ def _get_estimator(self):
         return self.estimator
 
     # TODO: remove when minimum supported version of scikit-learn is 1.5
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         # This code should not be called for scikit-learn >= 1.6
         # Therefore, get_tags corresponds to _safe_tags that returns a dict
diff --git a/imblearn/ensemble/_forest.py b/imblearn/ensemble/_forest.py
index 5f1b700bc..c2904e7e9 100644
--- a/imblearn/ensemble/_forest.py
+++ b/imblearn/ensemble/_forest.py
@@ -24,6 +24,7 @@
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.utils import _safe_indexing, check_random_state
 from sklearn.utils.fixes import parse_version
+from sklearn.utils.metaestimators import available_if
 from sklearn.utils.multiclass import type_of_target
 from sklearn.utils.parallel import Parallel, delayed
 from sklearn.utils.validation import _check_sample_weight
@@ -35,7 +36,7 @@
 from ..utils._docstring import _n_jobs_docstring, _random_state_docstring
 from ..utils._param_validation import Hidden, Interval, StrOptions
 from ..utils._validation import check_sampling_strategy
-from ..utils.fixes import _fit_context, validate_data
+from ..utils.fixes import _fit_context, check_version_package, validate_data
 from ._common import _random_forest_classifier_parameter_constraints
 
 MAX_INT = np.iinfo(np.int32).max
@@ -884,5 +885,6 @@ def _compute_oob_predictions(self, X, y):
 
         return oob_pred
 
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"multioutput": False, "multilabel": False}
diff --git a/imblearn/metrics/pairwise.py b/imblearn/metrics/pairwise.py
index 6f840ed6b..12e221935 100644
--- a/imblearn/metrics/pairwise.py
+++ b/imblearn/metrics/pairwise.py
@@ -9,12 +9,13 @@
 from scipy.spatial import distance_matrix
 from sklearn.base import BaseEstimator
 from sklearn.utils import check_consistent_length
+from sklearn.utils.metaestimators import available_if
 from sklearn.utils.multiclass import unique_labels
 from sklearn.utils.validation import check_array, check_is_fitted
 
 from ..base import _ParamsValidationMixin
 from ..utils._param_validation import StrOptions
-from ..utils.fixes import validate_data
+from ..utils.fixes import check_version_package, validate_data
 
 
 class ValueDifferenceMetric(_ParamsValidationMixin, BaseEstimator):
@@ -229,6 +230,7 @@ def pairwise(self, X, Y=None):
             )
         return distance
 
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {
             "requires_positive_X": True,  # X should be encoded with OrdinalEncoder
diff --git a/imblearn/over_sampling/_adasyn.py b/imblearn/over_sampling/_adasyn.py
index 2dffed228..afc4e7417 100644
--- a/imblearn/over_sampling/_adasyn.py
+++ b/imblearn/over_sampling/_adasyn.py
@@ -10,8 +10,10 @@
 import numpy as np
 from scipy import sparse
 from sklearn.utils import _safe_indexing, check_random_state
+from sklearn.utils.metaestimators import available_if
 
 from ..utils import Substitution, check_neighbors_object
+from ..utils.fixes import check_version_package
 from ..utils._docstring import _n_jobs_docstring, _random_state_docstring
 from ..utils._param_validation import HasMethods, Interval
 from .base import BaseOverSampler
@@ -229,6 +231,7 @@ def _fit_resample(self, X, y):
 
         return X_resampled, y_resampled
 
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {
             "X_types": ["2darray"],
diff --git a/imblearn/over_sampling/_random_over_sampler.py b/imblearn/over_sampling/_random_over_sampler.py
index 71da059da..eca331986 100644
--- a/imblearn/over_sampling/_random_over_sampler.py
+++ b/imblearn/over_sampling/_random_over_sampler.py
@@ -10,12 +10,13 @@
 import numpy as np
 from scipy import sparse
 from sklearn.utils import _safe_indexing, check_array, check_random_state
+from sklearn.utils.metaestimators import available_if
 from sklearn.utils.sparsefuncs import mean_variance_axis
 
 from ..utils import Substitution, check_target_type
 from ..utils._docstring import _random_state_docstring
 from ..utils._param_validation import Interval
-from ..utils.fixes import _check_n_features, _check_feature_names
+from ..utils.fixes import _check_n_features, _check_feature_names, check_version_package
 from ..utils._validation import _check_X
 from .base import BaseOverSampler
 
@@ -250,6 +251,7 @@ def _fit_resample(self, X, y):
 
         return X_resampled, y_resampled
 
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {
             "X_types": ["2darray", "string", "sparse", "dataframe"],
diff --git a/imblearn/over_sampling/_smote/base.py b/imblearn/over_sampling/_smote/base.py
index dc2e565ec..c7decb9da 100644
--- a/imblearn/over_sampling/_smote/base.py
+++ b/imblearn/over_sampling/_smote/base.py
@@ -21,6 +21,7 @@
     check_array,
     check_random_state,
 )
+from sklearn.utils.metaestimators import available_if
 from sklearn.utils.fixes import parse_version
 from sklearn.utils.sparsefuncs_fast import (
     csr_mean_variance_axis0,
@@ -32,7 +33,14 @@
 from ...utils._docstring import _n_jobs_docstring, _random_state_docstring
 from ...utils._param_validation import HasMethods, Interval, StrOptions
 from ...utils._validation import _check_X
-from ...utils.fixes import _check_n_features, _check_feature_names, _is_pandas_df, _mode, validate_data
+from ...utils.fixes import (
+    _check_n_features,
+    _check_feature_names,
+    _is_pandas_df,
+    _mode,
+    check_version_package,
+    validate_data,
+)
 from ..base import BaseOverSampler
 
 sklearn_version = parse_version(sklearn.__version__).base_version
@@ -1062,5 +1070,6 @@ def _fit_resample(self, X, y):
         else:
             return X_resampled, y_resampled
 
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"X_types": ["2darray", "dataframe", "string"]}
diff --git a/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py b/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py
index 5e2ca3a82..d8cdde1ce 100644
--- a/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py
+++ b/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py
@@ -12,10 +12,12 @@
 from sklearn.cluster import KMeans
 from sklearn.neighbors import NearestNeighbors
 from sklearn.utils import _safe_indexing
+from sklearn.utils.metaestimators import available_if
 
 from ...utils import Substitution
 from ...utils._docstring import _random_state_docstring
 from ...utils._param_validation import HasMethods, StrOptions
+from ...utils.fixes import check_version_package
 from ..base import BaseUnderSampler
 
 VOTING_KIND = ("auto", "hard", "soft")
@@ -201,5 +203,6 @@ def _fit_resample(self, X, y):
 
         return X_resampled, np.array(y_resampled, dtype=y.dtype)
 
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"sample_indices": False}
diff --git a/imblearn/under_sampling/_prototype_selection/_condensed_nearest_neighbour.py b/imblearn/under_sampling/_prototype_selection/_condensed_nearest_neighbour.py
index fe49f1707..803fa6858 100644
--- a/imblearn/under_sampling/_prototype_selection/_condensed_nearest_neighbour.py
+++ b/imblearn/under_sampling/_prototype_selection/_condensed_nearest_neighbour.py
@@ -14,9 +14,11 @@
 from sklearn.base import clone
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.utils import _safe_indexing, check_random_state
+from sklearn.utils.metaestimators import available_if
 
 from ...utils import Substitution
 from ...utils._docstring import _n_jobs_docstring, _random_state_docstring
+from ...utils.fixes import check_version_package
 from ...utils._param_validation import HasMethods, Interval
 from ..base import BaseCleaningSampler
 
@@ -259,5 +261,6 @@ def estimator_(self):
         )
         return self.estimators_[-1]
 
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"sample_indices": True}
diff --git a/imblearn/under_sampling/_prototype_selection/_edited_nearest_neighbours.py b/imblearn/under_sampling/_prototype_selection/_edited_nearest_neighbours.py
index 38abd4bed..6133dae66 100644
--- a/imblearn/under_sampling/_prototype_selection/_edited_nearest_neighbours.py
+++ b/imblearn/under_sampling/_prototype_selection/_edited_nearest_neighbours.py
@@ -11,11 +11,12 @@
 
 import numpy as np
 from sklearn.utils import _safe_indexing
+from sklearn.utils.metaestimators import available_if
 
 from ...utils import Substitution, check_neighbors_object
 from ...utils._docstring import _n_jobs_docstring
 from ...utils._param_validation import HasMethods, Interval, StrOptions
-from ...utils.fixes import _mode
+from ...utils.fixes import _mode, check_version_package
 from ..base import BaseCleaningSampler
 
 SEL_KIND = ("all", "mode")
@@ -189,6 +190,7 @@ def _fit_resample(self, X, y):
 
         return _safe_indexing(X, idx_under), _safe_indexing(y, idx_under)
 
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"sample_indices": True}
 
@@ -410,6 +412,7 @@ def _fit_resample(self, X, y):
 
         return X_resampled, y_resampled
 
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"sample_indices": True}
 
@@ -619,5 +622,6 @@ def _fit_resample(self, X, y):
 
         return X_resampled, y_resampled
 
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"sample_indices": True}
diff --git a/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py b/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py
index dac3f3c33..94977784b 100644
--- a/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py
+++ b/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py
@@ -15,9 +15,11 @@
 from sklearn.ensemble._base import _set_random_states
 from sklearn.model_selection import StratifiedKFold, cross_val_predict
 from sklearn.utils import _safe_indexing, check_random_state
+from sklearn.utils.metaestimators import available_if
 
 from ...utils import Substitution
 from ...utils._docstring import _n_jobs_docstring, _random_state_docstring
+from ...utils.fixes import check_version_package
 from ...utils._param_validation import HasMethods
 from ..base import BaseUnderSampler
 
@@ -200,5 +202,6 @@ def _fit_resample(self, X, y):
 
         return _safe_indexing(X, idx_under), _safe_indexing(y, idx_under)
 
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"sample_indices": True}
diff --git a/imblearn/under_sampling/_prototype_selection/_nearmiss.py b/imblearn/under_sampling/_prototype_selection/_nearmiss.py
index 70f647fa5..0817694da 100644
--- a/imblearn/under_sampling/_prototype_selection/_nearmiss.py
+++ b/imblearn/under_sampling/_prototype_selection/_nearmiss.py
@@ -10,8 +10,10 @@
 
 import numpy as np
 from sklearn.utils import _safe_indexing
+from sklearn.utils.metaestimators import available_if
 
 from ...utils import Substitution, check_neighbors_object
+from ...utils.fixes import check_version_package
 from ...utils._docstring import _n_jobs_docstring
 from ...utils._param_validation import HasMethods, Interval
 from ..base import BaseUnderSampler
@@ -303,6 +305,7 @@ def _fit_resample(self, X, y):
         return _safe_indexing(X, idx_under), _safe_indexing(y, idx_under)
 
     # fmt: off
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {
             "sample_indices": True,
diff --git a/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py b/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py
index 7277a3c99..9d0f1831b 100644
--- a/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py
+++ b/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py
@@ -12,10 +12,12 @@
 from sklearn.base import clone
 from sklearn.neighbors import KNeighborsClassifier, NearestNeighbors
 from sklearn.utils import _safe_indexing
+from sklearn.utils.metaestimators import available_if
 
 from ...utils import Substitution
 from ...utils._docstring import _n_jobs_docstring
 from ...utils._param_validation import HasMethods, Hidden, Interval, StrOptions
+from ...utils.fixes import check_version_package
 from ..base import BaseCleaningSampler
 from ._edited_nearest_neighbours import EditedNearestNeighbours
 
@@ -256,5 +258,6 @@ def _fit_resample(self, X, y):
             _safe_indexing(y, self.sample_indices_),
         )
 
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"sample_indices": True}
diff --git a/imblearn/under_sampling/_prototype_selection/_one_sided_selection.py b/imblearn/under_sampling/_prototype_selection/_one_sided_selection.py
index bfd3449bd..f02b25778 100644
--- a/imblearn/under_sampling/_prototype_selection/_one_sided_selection.py
+++ b/imblearn/under_sampling/_prototype_selection/_one_sided_selection.py
@@ -12,8 +12,10 @@
 from sklearn.base import clone
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.utils import _safe_indexing, check_random_state
+from sklearn.utils.metaestimators import available_if
 
 from ...utils import Substitution
+from ...utils.fixes import check_version_package
 from ...utils._docstring import _n_jobs_docstring, _random_state_docstring
 from ...utils._param_validation import HasMethods, Interval
 from ..base import BaseCleaningSampler
@@ -225,5 +227,6 @@ def estimator_(self):
         )
         return self.estimators_[-1]
 
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"sample_indices": True}
diff --git a/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py b/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
index f914b7882..7dcfbe4fb 100644
--- a/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
+++ b/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
@@ -6,9 +6,14 @@
 
 import numpy as np
 from sklearn.utils import _safe_indexing, check_random_state
+from sklearn.utils.metaestimators import available_if
 
 from ...utils import Substitution, check_target_type
-from ...utils.fixes import _check_n_features, _check_feature_names
+from ...utils.fixes import (
+    _check_n_features,
+    _check_feature_names,
+    check_version_package,
+)
 from ...utils._docstring import _random_state_docstring
 from ...utils._validation import _check_X
 from ..base import BaseUnderSampler
@@ -132,6 +137,7 @@ def _fit_resample(self, X, y):
 
         return _safe_indexing(X, idx_under), _safe_indexing(y, idx_under)
 
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {
             "X_types": ["2darray", "string", "sparse", "dataframe"],
diff --git a/imblearn/under_sampling/_prototype_selection/_tomek_links.py b/imblearn/under_sampling/_prototype_selection/_tomek_links.py
index b0f954959..254d482c9 100644
--- a/imblearn/under_sampling/_prototype_selection/_tomek_links.py
+++ b/imblearn/under_sampling/_prototype_selection/_tomek_links.py
@@ -10,8 +10,10 @@
 import numpy as np
 from sklearn.neighbors import NearestNeighbors
 from sklearn.utils import _safe_indexing
+from sklearn.utils.metaestimators import available_if
 
 from ...utils import Substitution
+from ...utils.fixes import check_version_package
 from ...utils._docstring import _n_jobs_docstring
 from ..base import BaseCleaningSampler
 
@@ -156,5 +158,6 @@ def _fit_resample(self, X, y):
             _safe_indexing(y, self.sample_indices_),
         )
 
+    @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"sample_indices": True}
diff --git a/imblearn/utils/fixes.py b/imblearn/utils/fixes.py
index a38d29847..d960f5374 100644
--- a/imblearn/utils/fixes.py
+++ b/imblearn/utils/fixes.py
@@ -161,3 +161,44 @@ def _check_feature_names(estimator, X, *, reset):
         return estimator._check_feature_names(X, reset=reset)
 else:
     from sklearn.utils.validation import _check_feature_names  # type: ignore[no-redef]
+
+
+def check_version_package(package, constraint, version, /):
+    """Create a function to check package version against a constraint.
+
+    Parameters
+    ----------
+    package : str
+        The package name to check version for.
+    constraint : {"<", "<=", ">", ">="}
+        The version constraint.
+    version : str
+        The version to compare against.
+
+    Returns
+    -------
+    callable
+        A function that takes an estimator and returns bool.
+    """
+    operators = {
+        "<": lambda x, y: x < y,
+        "<=": lambda x, y: x <= y,
+        ">": lambda x, y: x > y,
+        ">=": lambda x, y: x >= y
+    }
+
+    if constraint not in operators:
+        raise ValueError(f"Invalid constraint: {constraint}")
+
+    op = operators[constraint]
+    parsed_version = parse_version(version)
+
+    def check_version(estimator):
+        try:
+            pkg = __import__(package)
+            pkg_version = parse_version(parse_version(pkg.__version__).base_version)
+            return op(pkg_version, parsed_version)
+        except (ImportError, AttributeError):
+            return False
+
+    return check_version

From acb8234cd21c517422257db9178a2237446047d3 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Tue, 12 Nov 2024 11:44:27 +0100
Subject: [PATCH 08/20] iter

---
 imblearn/base.py                              | 16 +++-
 imblearn/ensemble/_bagging.py                 |  6 +-
 imblearn/ensemble/_easy_ensemble.py           |  6 +-
 imblearn/ensemble/_forest.py                  |  9 +-
 imblearn/metrics/tests/test_classification.py | 12 ++-
 .../over_sampling/_random_over_sampler.py     |  8 ++
 imblearn/over_sampling/_smote/base.py         | 18 ++++
 .../_condensed_nearest_neighbour.py           |  6 ++
 .../_edited_nearest_neighbours.py             | 18 ++++
 .../_instance_hardness_threshold.py           |  6 ++
 .../_prototype_selection/_nearmiss.py         |  6 ++
 .../_neighbourhood_cleaning_rule.py           |  6 ++
 .../_one_sided_selection.py                   |  6 ++
 .../_random_under_sampler.py                  |  8 ++
 .../_prototype_selection/_tomek_links.py      |  6 ++
 imblearn/utils/_tags.py                       | 94 ++++++++++++++++++-
 .../utils/_test_common/instance_generator.py  | 47 +++++++++-
 imblearn/utils/_validation.py                 |  6 +-
 imblearn/utils/estimator_checks.py            |  2 +-
 imblearn/utils/fixes.py                       | 11 ++-
 imblearn/utils/tests/test_estimator_checks.py |  2 +-
 21 files changed, 257 insertions(+), 42 deletions(-)

diff --git a/imblearn/base.py b/imblearn/base.py
index 5bc784f3f..d31a898a0 100644
--- a/imblearn/base.py
+++ b/imblearn/base.py
@@ -37,7 +37,7 @@ class attribute, which is a dictionary `param_name: list of constraints`. See
             )
 
 
-class SamplerMixin(_ParamsValidationMixin, BaseEstimator, metaclass=ABCMeta):
+class SamplerMixin(_ParamsValidationMixin, metaclass=ABCMeta):
     """Mixin class for samplers with abstract method.
 
     Warning: This class should not be used directly. Use the derive classes
@@ -135,7 +135,7 @@ def _fit_resample(self, X, y):
         pass
 
 
-class BaseSampler(SamplerMixin, OneToOneFeatureMixin):
+class BaseSampler(SamplerMixin, OneToOneFeatureMixin, BaseEstimator):
     """Base class for sampling algorithms.
 
     Warning: This class should not be used directly. Use the derive classes
@@ -204,9 +204,15 @@ def _more_tags(self):
 
     @available_if(check_version_package("sklearn", ">=", "1.6"))
     def __sklearn_tags__(self):
-        tags = super().__sklearn_tags__()
-
-        from .utils._tags import InputTags
+        from .utils._tags import Tags, SamplerTags, TargetTags, InputTags
+        tags = Tags(
+            estimator_type="sampler",
+            target_tags=TargetTags(required=True),
+            transformer_tags=None,
+            regressor_tags=None,
+            classifier_tags=None,
+            sampler_tags=SamplerTags(),
+        )
         tags.input_tags = InputTags()
         tags.input_tags.two_d_array = True
         tags.input_tags.sparse = True
diff --git a/imblearn/ensemble/_bagging.py b/imblearn/ensemble/_bagging.py
index 974759fa3..59bd7816f 100644
--- a/imblearn/ensemble/_bagging.py
+++ b/imblearn/ensemble/_bagging.py
@@ -382,17 +382,13 @@ def decision_function(self, X):
         check_is_fitted(self)
 
         # Check data
-        if sklearn_version < parse_version("1.6"):
-            kwargs = {"force_all_finite": False}
-        else:
-            kwargs = {"ensure_all_finite": False}
         X = validate_data(
             self,
             X=X,
             accept_sparse=["csr", "csc"],
             dtype=None,
             reset=False,
-            **kwargs
+            ensure_all_finite=False,
         )
 
         # Parallel loop
diff --git a/imblearn/ensemble/_easy_ensemble.py b/imblearn/ensemble/_easy_ensemble.py
index cea77690f..35a7df2cd 100644
--- a/imblearn/ensemble/_easy_ensemble.py
+++ b/imblearn/ensemble/_easy_ensemble.py
@@ -310,17 +310,13 @@ def decision_function(self, X):
         check_is_fitted(self)
 
         # Check data
-        if sklearn_version < parse_version("1.6"):
-            kwargs = {"force_all_finite": False}
-        else:
-            kwargs = {"ensure_all_finite": False}
         X = validate_data(
             self,
             X=X,
             accept_sparse=["csr", "csc"],
             dtype=None,
             reset=False,
-            **kwargs,
+            ensure_all_finite=False,
         )
 
         # Parallel loop
diff --git a/imblearn/ensemble/_forest.py b/imblearn/ensemble/_forest.py
index c2904e7e9..6b0fb686e 100644
--- a/imblearn/ensemble/_forest.py
+++ b/imblearn/ensemble/_forest.py
@@ -598,12 +598,9 @@ def fit(self, X, y, sample_weight=None):
         # TODO: remove when the minimum supported version of scipy will be 1.4
         # Support for missing values
         if parse_version(sklearn_version.base_version) >= parse_version("1.4"):
-            if sklearn_version >= parse_version("1.6"):
-                kwargs = {"ensure_all_finite": False}
-            else:
-                kwargs = {"force_all_finite": False}
+            ensure_all_finite = False
         else:
-            kwargs = {"force_all_finite": False}
+            ensure_all_finite = False
 
         X, y = validate_data(
             self,
@@ -612,7 +609,7 @@ def fit(self, X, y, sample_weight=None):
             multi_output=True,
             accept_sparse="csc",
             dtype=DTYPE,
-            **kwargs,
+            ensure_all_finite=ensure_all_finite,
         )
 
         # TODO: remove when the minimum supported version of scikit-learn will be 1.4
diff --git a/imblearn/metrics/tests/test_classification.py b/imblearn/metrics/tests/test_classification.py
index 8169cee81..ec579e52d 100644
--- a/imblearn/metrics/tests/test_classification.py
+++ b/imblearn/metrics/tests/test_classification.py
@@ -4,6 +4,7 @@
 #          Christos Aridas
 # License: MIT
 
+import warnings
 from functools import partial
 
 import numpy as np
@@ -23,7 +24,6 @@
 from sklearn.utils._testing import (
     assert_allclose,
     assert_array_equal,
-    assert_no_warnings,
 )
 from sklearn.utils.validation import check_random_state
 
@@ -105,11 +105,13 @@ def test_sensitivity_specificity_score_binary():
     # binary class case the score is the value of the measure for the positive
     # class (e.g. label == 1). This is deprecated for average != 'binary'.
     for kwargs in ({}, {"average": "binary"}):
-        sen = assert_no_warnings(sensitivity_score, y_true, y_pred, **kwargs)
-        assert sen == pytest.approx(0.68, rel=R_TOL)
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            sen = sensitivity_score(y_true, y_pred, **kwargs)
+            assert sen == pytest.approx(0.68, rel=R_TOL)
 
-        spe = assert_no_warnings(specificity_score, y_true, y_pred, **kwargs)
-        assert spe == pytest.approx(0.88, rel=R_TOL)
+            spe = specificity_score(y_true, y_pred, **kwargs)
+            assert spe == pytest.approx(0.88, rel=R_TOL)
 
 
 @pytest.mark.filterwarnings("ignore:Specificity is ill-defined")
diff --git a/imblearn/over_sampling/_random_over_sampler.py b/imblearn/over_sampling/_random_over_sampler.py
index eca331986..5495491d7 100644
--- a/imblearn/over_sampling/_random_over_sampler.py
+++ b/imblearn/over_sampling/_random_over_sampler.py
@@ -261,3 +261,11 @@ def _more_tags(self):
                 "check_complex_data": "Robust to this type of data.",
             },
         }
+
+    @available_if(check_version_package("sklearn", ">=", "1.6"))
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.allow_nan = True
+        tags.input_tags.string = True
+        tags.sampler_tags.sample_indices = True
+        return tags
diff --git a/imblearn/over_sampling/_smote/base.py b/imblearn/over_sampling/_smote/base.py
index c7decb9da..261a58d87 100644
--- a/imblearn/over_sampling/_smote/base.py
+++ b/imblearn/over_sampling/_smote/base.py
@@ -829,6 +829,17 @@ def ohe_(self):
         )
         return self.categorical_encoder_
 
+    @available_if(check_version_package("sklearn", "<", "1.6"))
+    def _more_tags(self):
+        return {"X_types": ["2darray", "dataframe", "string"]}
+
+    @available_if(check_version_package("sklearn", ">=", "1.6"))
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = False
+        tags.input_tags.string = True
+        return tags
+
 
 @Substitution(
     sampling_strategy=BaseOverSampler._sampling_strategy_docstring,
@@ -1073,3 +1084,10 @@ def _fit_resample(self, X, y):
     @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"X_types": ["2darray", "dataframe", "string"]}
+
+    @available_if(check_version_package("sklearn", ">=", "1.6"))
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = False
+        tags.input_tags.string = True
+        return tags
diff --git a/imblearn/under_sampling/_prototype_selection/_condensed_nearest_neighbour.py b/imblearn/under_sampling/_prototype_selection/_condensed_nearest_neighbour.py
index 803fa6858..987fe0541 100644
--- a/imblearn/under_sampling/_prototype_selection/_condensed_nearest_neighbour.py
+++ b/imblearn/under_sampling/_prototype_selection/_condensed_nearest_neighbour.py
@@ -264,3 +264,9 @@ def estimator_(self):
     @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"sample_indices": True}
+
+    @available_if(check_version_package("sklearn", ">=", "1.6"))
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.sampler_tags.sample_indices = True
+        return tags
diff --git a/imblearn/under_sampling/_prototype_selection/_edited_nearest_neighbours.py b/imblearn/under_sampling/_prototype_selection/_edited_nearest_neighbours.py
index 6133dae66..b31faf440 100644
--- a/imblearn/under_sampling/_prototype_selection/_edited_nearest_neighbours.py
+++ b/imblearn/under_sampling/_prototype_selection/_edited_nearest_neighbours.py
@@ -194,6 +194,12 @@ def _fit_resample(self, X, y):
     def _more_tags(self):
         return {"sample_indices": True}
 
+    @available_if(check_version_package("sklearn", ">=", "1.6"))
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.sampler_tags.sample_indices = True
+        return tags
+
 
 @Substitution(
     sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
@@ -416,6 +422,12 @@ def _fit_resample(self, X, y):
     def _more_tags(self):
         return {"sample_indices": True}
 
+    @available_if(check_version_package("sklearn", ">=", "1.6"))
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.sampler_tags.sample_indices = True
+        return tags
+
 
 @Substitution(
     sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
@@ -625,3 +637,9 @@ def _fit_resample(self, X, y):
     @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"sample_indices": True}
+
+    @available_if(check_version_package("sklearn", ">=", "1.6"))
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.sampler_tags.sample_indices = True
+        return tags
diff --git a/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py b/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py
index 94977784b..02a2ab38b 100644
--- a/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py
+++ b/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py
@@ -205,3 +205,9 @@ def _fit_resample(self, X, y):
     @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"sample_indices": True}
+
+    @available_if(check_version_package("sklearn", ">=", "1.6"))
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.sampler_tags.sample_indices = True
+        return tags
diff --git a/imblearn/under_sampling/_prototype_selection/_nearmiss.py b/imblearn/under_sampling/_prototype_selection/_nearmiss.py
index 0817694da..5b9734e04 100644
--- a/imblearn/under_sampling/_prototype_selection/_nearmiss.py
+++ b/imblearn/under_sampling/_prototype_selection/_nearmiss.py
@@ -315,3 +315,9 @@ def _more_tags(self):
             }
         }
     # fmt: on
+
+    @available_if(check_version_package("sklearn", ">=", "1.6"))
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.sampler_tags.sample_indices = True
+        return tags
diff --git a/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py b/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py
index 9d0f1831b..53395a9e8 100644
--- a/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py
+++ b/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py
@@ -261,3 +261,9 @@ def _fit_resample(self, X, y):
     @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"sample_indices": True}
+
+    @available_if(check_version_package("sklearn", ">=", "1.6"))
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.sampler_tags.sample_indices = True
+        return tags
diff --git a/imblearn/under_sampling/_prototype_selection/_one_sided_selection.py b/imblearn/under_sampling/_prototype_selection/_one_sided_selection.py
index f02b25778..72e29dafe 100644
--- a/imblearn/under_sampling/_prototype_selection/_one_sided_selection.py
+++ b/imblearn/under_sampling/_prototype_selection/_one_sided_selection.py
@@ -230,3 +230,9 @@ def estimator_(self):
     @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"sample_indices": True}
+
+    @available_if(check_version_package("sklearn", ">=", "1.6"))
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.sampler_tags.sample_indices = True
+        return tags
diff --git a/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py b/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
index 7dcfbe4fb..1b0130eb4 100644
--- a/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
+++ b/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
@@ -147,3 +147,11 @@ def _more_tags(self):
                 "check_complex_data": "Robust to this type of data.",
             },
         }
+
+    @available_if(check_version_package("sklearn", ">=", "1.6"))
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.allow_nan = True
+        tags.input_tags.string = True
+        tags.sampler_tags.sample_indices = True
+        return tags
diff --git a/imblearn/under_sampling/_prototype_selection/_tomek_links.py b/imblearn/under_sampling/_prototype_selection/_tomek_links.py
index 254d482c9..b21ab9190 100644
--- a/imblearn/under_sampling/_prototype_selection/_tomek_links.py
+++ b/imblearn/under_sampling/_prototype_selection/_tomek_links.py
@@ -161,3 +161,9 @@ def _fit_resample(self, X, y):
     @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
         return {"sample_indices": True}
+
+    @available_if(check_version_package("sklearn", ">=", "1.6"))
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.sampler_tags.sample_indices = True
+        return tags
diff --git a/imblearn/utils/_tags.py b/imblearn/utils/_tags.py
index 5a43b4d52..7d59e73a8 100644
--- a/imblearn/utils/_tags.py
+++ b/imblearn/utils/_tags.py
@@ -1,13 +1,101 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 
 import sklearn
 from sklearn.utils.fixes import parse_version
+from .fixes import _dataclass_args
 
 sklearn_version = parse_version(parse_version(sklearn.__version__).base_version)
 
 if sklearn_version >= parse_version("1.6"):
-    from sklearn.utils._tags import InputTags
+    from sklearn.utils._tags import (
+        TargetTags,
+        TransformerTags,
+        ClassifierTags,
+        RegressorTags,
+        InputTags,
+    )
 
-    @dataclass
+    @dataclass(**_dataclass_args())
     class InputTags(InputTags):
         dataframe: bool = True
+
+    @dataclass(**_dataclass_args())
+    class SamplerTags:
+        """Tags for the sampler.
+
+        Parameters
+        ----------
+        sample_indices : bool, default=False
+            Whether the sampler returns the indices of the samples that were
+            selected.
+        """
+
+        sample_indices: bool = False
+
+
+@dataclass(**_dataclass_args())
+class Tags:
+    """Tags for the estimator.
+
+    See :ref:`estimator_tags` for more information.
+
+    Parameters
+    ----------
+    estimator_type : str or None
+        The type of the estimator. Can be one of:
+        - "classifier"
+        - "regressor"
+        - "transformer"
+        - "clusterer"
+        - "outlier_detector"
+        - "density_estimator"
+
+    target_tags : :class:`TargetTags`
+        The target(y) tags.
+
+    transformer_tags : :class:`TransformerTags` or None
+        The transformer tags.
+
+    classifier_tags : :class:`ClassifierTags` or None
+        The classifier tags.
+
+    regressor_tags : :class:`RegressorTags` or None
+        The regressor tags.
+
+    sampler_tags : :class:`SamplerTags` or None
+        The sampler tags.
+
+    array_api_support : bool, default=False
+        Whether the estimator supports Array API compatible inputs.
+
+    no_validation : bool, default=False
+        Whether the estimator skips input-validation. This is only meant for
+        stateless and dummy transformers!
+
+    non_deterministic : bool, default=False
+        Whether the estimator is not deterministic given a fixed ``random_state``.
+
+    requires_fit : bool, default=True
+        Whether the estimator requires to be fitted before calling one of
+        `transform`, `predict`, `predict_proba`, or `decision_function`.
+
+    _skip_test : bool, default=False
+        Whether to skip common tests entirely. Don't use this unless
+        you have a *very good* reason.
+
+    input_tags : :class:`InputTags`
+        The input data(X) tags.
+    """
+
+    estimator_type: str | None
+    target_tags: TargetTags
+    transformer_tags: TransformerTags | None
+    classifier_tags: ClassifierTags | None
+    regressor_tags: RegressorTags | None
+    sampler_tags: SamplerTags | None
+    array_api_support: bool = False
+    no_validation: bool = False
+    non_deterministic: bool = False
+    requires_fit: bool = True
+    _skip_test: bool = False
+    input_tags: InputTags = field(default_factory=InputTags)
diff --git a/imblearn/utils/_test_common/instance_generator.py b/imblearn/utils/_test_common/instance_generator.py
index 455427967..0d7b8fbea 100644
--- a/imblearn/utils/_test_common/instance_generator.py
+++ b/imblearn/utils/_test_common/instance_generator.py
@@ -13,19 +13,56 @@
 from sklearn.exceptions import SkipTestWarning
 from sklearn.utils._testing import SkipTest
 
-from imblearn.over_sampling import SMOTENC
+from imblearn.combine import SMOTEENN, SMOTETomek
+from imblearn.ensemble import BalancedBaggingClassifier, BalancedRandomForestClassifier
+from imblearn.over_sampling import (
+    ADASYN,
+    BorderlineSMOTE,
+    KMeansSMOTE,
+    RandomOverSampler,
+    SMOTE,
+    SMOTEN,
+    SMOTENC,
+    SVMSMOTE,
+)
 from imblearn.pipeline import Pipeline
-from imblearn.under_sampling import NearMiss, RandomUnderSampler
+from imblearn.under_sampling import (
+    ClusterCentroids,
+    CondensedNearestNeighbour,
+    InstanceHardnessThreshold,
+    NearMiss,
+    OneSidedSelection,
+    RandomUnderSampler,
+)
 from imblearn.utils.testing import all_estimators
 
 # The following dictionary is to indicate constructor arguments suitable for the test
 # suite, which uses very small datasets, and is intended to run rather quickly.
 INIT_PARAMS = {
-    NearMiss: [dict(version=1), dict(version=2), dict(version=3)],
+    # estimator
+    BalancedBaggingClassifier: dict(random_state=42),
+    BalancedRandomForestClassifier: dict(random_state=42),
     Pipeline: dict(
         steps=[("sampler", RandomUnderSampler()), ("logistic", LogisticRegression())]
     ),
-    SMOTENC: dict(categorical_features=[0]),
+    # over-sampling
+    ADASYN: dict(random_state=42),
+    BorderlineSMOTE: dict(random_state=42),
+    KMeansSMOTE: dict(random_state=0),
+    RandomOverSampler: dict(random_state=42),
+    SMOTE: dict(random_state=42),
+    SMOTEN: dict(random_state=42),
+    SVMSMOTE: dict(random_state=42),
+    # under-sampling
+    ClusterCentroids: dict(random_state=42),
+    CondensedNearestNeighbour: dict(random_state=42),
+    InstanceHardnessThreshold: dict(random_state=42),
+    NearMiss: [dict(version=1), dict(version=2), dict(version=3)],
+    OneSidedSelection: dict(random_state=42),
+    RandomUnderSampler: dict(random_state=42),
+    # combination
+    SMOTEENN: dict(random_state=42),
+    SMOTETomek: dict(random_state=42),
 }
 
 # This dictionary stores parameters for specific checks. It also enables running the
@@ -34,7 +71,7 @@
 # TODO(devtools): allow third-party developers to pass test specific params to checks
 PER_ESTIMATOR_CHECK_PARAMS: dict = {}
 
-SKIPPED_ESTIMATORS = []
+SKIPPED_ESTIMATORS = [SMOTENC]
 
 
 def _tested_estimators(type_filter=None):
diff --git a/imblearn/utils/_validation.py b/imblearn/utils/_validation.py
index 66e637763..8a3e761a7 100644
--- a/imblearn/utils/_validation.py
+++ b/imblearn/utils/_validation.py
@@ -647,11 +647,7 @@ def _check_X(X):
         )
     if _is_pandas_df(X):
         return X
-    if sklearn_version >= parse_version("1.6"):
-        kwargs = {"ensure_all_finite": False}
-    else:
-        kwargs = {"force_all_finite": False}
 
     return check_array(
-        X, dtype=None, accept_sparse=["csr", "csc"], **kwargs
+        X, dtype=None, accept_sparse=["csr", "csc"], ensure_all_finite=False
     )
diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py
index 83793a443..ffb5de129 100644
--- a/imblearn/utils/estimator_checks.py
+++ b/imblearn/utils/estimator_checks.py
@@ -598,7 +598,7 @@ def check_samplers_sample_indices(name, sampler_orig):
     sampler.fit_resample(X, y)
     tags = get_tags(sampler)
     if is_dataclass(tags):
-        sample_indices = getattr(tags, "sample_indices", None)
+        sample_indices = tags.sampler_tags.sample_indices
     else:
         # scikit-learn < 1.6
         sample_indices = tags.get("sample_indices", None)
diff --git a/imblearn/utils/fixes.py b/imblearn/utils/fixes.py
index d960f5374..b5cec84d0 100644
--- a/imblearn/utils/fixes.py
+++ b/imblearn/utils/fixes.py
@@ -145,7 +145,9 @@ def _is_pandas_df(X):
 
 if sklearn_version < parse_version("1.6"):
     def validate_data(_estimator, **kwargs):
-        return _estimator._validate_data(**kwargs)
+        if "ensure_all_finite" in kwargs:
+            force_all_finite = kwargs.pop("ensure_all_finite")
+        return _estimator._validate_data(**kwargs, force_all_finite=force_all_finite)
 else:
     from sklearn.utils.validation import validate_data  # type: ignore[no-redef]
 
@@ -202,3 +204,10 @@ def check_version(estimator):
             return False
 
     return check_version
+
+
+# TODO: Remove when python>=3.10 is the minimum supported version
+def _dataclass_args():
+    if sys.version_info < (3, 10):
+        return {}
+    return {"slots": True}
diff --git a/imblearn/utils/tests/test_estimator_checks.py b/imblearn/utils/tests/test_estimator_checks.py
index 0ebd495a1..71e44a896 100644
--- a/imblearn/utils/tests/test_estimator_checks.py
+++ b/imblearn/utils/tests/test_estimator_checks.py
@@ -79,7 +79,7 @@ def _check_X_y(self, X, y):
             y=y,
             reset=True,
             dtype=None,
-            force_all_finite=False,
+            ensure_all_finite=False,
         )
         return X, y, binarize_y
 

From 2453ca1f663e20823b27a1284599b2e35b9bfab1 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Tue, 12 Nov 2024 11:57:38 +0100
Subject: [PATCH 09/20] iter

---
 imblearn/over_sampling/_random_over_sampler.py         |  2 +-
 imblearn/over_sampling/_smote/base.py                  |  2 +-
 .../_prototype_selection/_random_under_sampler.py      |  2 +-
 imblearn/utils/_validation.py                          | 10 +++++-----
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/imblearn/over_sampling/_random_over_sampler.py b/imblearn/over_sampling/_random_over_sampler.py
index 5495491d7..6c47c6131 100644
--- a/imblearn/over_sampling/_random_over_sampler.py
+++ b/imblearn/over_sampling/_random_over_sampler.py
@@ -157,7 +157,7 @@ def __init__(
 
     def _check_X_y(self, X, y):
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
-        X = _check_X(X)
+        X = _check_X(self, X)
         _check_n_features(self, X, reset=True)
         _check_feature_names(self, X, reset=True)
         return X, y, binarize_y
diff --git a/imblearn/over_sampling/_smote/base.py b/imblearn/over_sampling/_smote/base.py
index 261a58d87..e18a5f455 100644
--- a/imblearn/over_sampling/_smote/base.py
+++ b/imblearn/over_sampling/_smote/base.py
@@ -608,7 +608,7 @@ def _check_X_y(self, X, y):
         features.
         """
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
-        X = _check_X(X)
+        X = _check_X(self, X)
         _check_n_features(self, X, reset=True)
         _check_feature_names(self, X, reset=True)
         return X, y, binarize_y
diff --git a/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py b/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
index 1b0130eb4..b2493c857 100644
--- a/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
+++ b/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
@@ -104,7 +104,7 @@ def __init__(
 
     def _check_X_y(self, X, y):
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
-        X = _check_X(X)
+        X = _check_X(self, X)
         _check_n_features(self, X, reset=True)
         _check_feature_names(self, X, reset=True)
         return X, y, binarize_y
diff --git a/imblearn/utils/_validation.py b/imblearn/utils/_validation.py
index 8a3e761a7..bf1a896b1 100644
--- a/imblearn/utils/_validation.py
+++ b/imblearn/utils/_validation.py
@@ -14,12 +14,12 @@
 import sklearn
 from sklearn.base import clone
 from sklearn.neighbors import NearestNeighbors
-from sklearn.utils import check_array, column_or_1d
+from sklearn.utils import column_or_1d
 from sklearn.utils.fixes import parse_version
 from sklearn.utils.multiclass import type_of_target
 from sklearn.utils.validation import _num_samples
 
-from .fixes import _is_pandas_df
+from .fixes import _is_pandas_df, validate_data
 
 SAMPLING_KIND = (
     "over-sampling",
@@ -638,7 +638,7 @@ def inner_f(*args, **kwargs):
     return inner_f
 
 
-def _check_X(X):
+def _check_X(estimator, X):
     """Check X and do not check it if a dataframe."""
     n_samples = _num_samples(X)
     if n_samples < 1:
@@ -648,6 +648,6 @@ def _check_X(X):
     if _is_pandas_df(X):
         return X
 
-    return check_array(
-        X, dtype=None, accept_sparse=["csr", "csc"], ensure_all_finite=False
+    return validate_data(
+        estimator, X, dtype=None, accept_sparse=["csr", "csc"], ensure_all_finite=False
     )

From ef735f484168e6e1c672528b0f6f2c25e6677498 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Tue, 12 Nov 2024 12:02:24 +0100
Subject: [PATCH 10/20] iter

---
 imblearn/utils/fixes.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/imblearn/utils/fixes.py b/imblearn/utils/fixes.py
index b5cec84d0..69420fa7d 100644
--- a/imblearn/utils/fixes.py
+++ b/imblearn/utils/fixes.py
@@ -147,6 +147,8 @@ def _is_pandas_df(X):
     def validate_data(_estimator, **kwargs):
         if "ensure_all_finite" in kwargs:
             force_all_finite = kwargs.pop("ensure_all_finite")
+        else:
+            force_all_finite = True
         return _estimator._validate_data(**kwargs, force_all_finite=force_all_finite)
 else:
     from sklearn.utils.validation import validate_data  # type: ignore[no-redef]

From 1629b06bb32fa1394b85c3e56ece5be40afe80ea Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Tue, 12 Nov 2024 17:35:12 +0100
Subject: [PATCH 11/20] iter

---
 imblearn/ensemble/_easy_ensemble.py           | 15 +++++++---
 imblearn/ensemble/_forest.py                  | 28 +++++++++++++------
 imblearn/ensemble/_weight_boosting.py         | 27 ++++++++++++------
 .../utils/_test_common/instance_generator.py  | 13 ++++++++-
 4 files changed, 61 insertions(+), 22 deletions(-)

diff --git a/imblearn/ensemble/_easy_ensemble.py b/imblearn/ensemble/_easy_ensemble.py
index 35a7df2cd..1bcb2c1dd 100644
--- a/imblearn/ensemble/_easy_ensemble.py
+++ b/imblearn/ensemble/_easy_ensemble.py
@@ -346,12 +346,19 @@ def base_estimator_(self):
 
     def _get_estimator(self):
         if self.estimator is None:
-            return AdaBoostClassifier(algorithm="SAMME")
+            if parse_version("1.4") <= sklearn_version < parse_version("1.6"):
+                return AdaBoostClassifier(algorithm="SAMME")
+            else:
+                return AdaBoostClassifier()
         return self.estimator
 
     # TODO: remove when minimum supported version of scikit-learn is 1.5
     @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
-        # This code should not be called for scikit-learn >= 1.6
-        # Therefore, get_tags corresponds to _safe_tags that returns a dict
-        return {"allow_nan": get_tags(self._get_estimator(), "allow_nan")}
+        return {"allow_nan": get_tags(self._get_estimator())["allow_nan"]}
+
+    @available_if(check_version_package("sklearn", ">=", "1.6"))
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.allow_nan = get_tags(self._get_estimator()).input_tags.allow_nan
+        return tags
diff --git a/imblearn/ensemble/_forest.py b/imblearn/ensemble/_forest.py
index 6b0fb686e..386293185 100644
--- a/imblearn/ensemble/_forest.py
+++ b/imblearn/ensemble/_forest.py
@@ -5,6 +5,7 @@
 
 import numbers
 from copy import deepcopy
+from dataclasses import is_dataclass
 from warnings import warn
 
 import numpy as np
@@ -36,7 +37,7 @@
 from ..utils._docstring import _n_jobs_docstring, _random_state_docstring
 from ..utils._param_validation import Hidden, Interval, StrOptions
 from ..utils._validation import check_sampling_strategy
-from ..utils.fixes import _fit_context, check_version_package, validate_data
+from ..utils.fixes import _fit_context, check_version_package, get_tags, validate_data
 from ._common import _random_forest_classifier_parameter_constraints
 
 MAX_INT = np.iinfo(np.int32).max
@@ -78,7 +79,7 @@ def _local_parallel_build_trees(
         "bootstrap": bootstrap,
     }
 
-    if parse_version(sklearn_version.base_version) >= parse_version("1.4"):
+    if sklearn_version >= parse_version("1.4"):
         # TODO: remove when the minimum supported version of scikit-learn will be 1.4
         # support for missing values
         params_parallel_build_trees["missing_values_in_feature_mask"] = (
@@ -475,7 +476,7 @@ def __init__(
             "max_samples": max_samples,
         }
         # TODO: remove when the minimum supported version of scikit-learn will be 1.4
-        if parse_version(sklearn_version.base_version) >= parse_version("1.4"):
+        if sklearn_version >= parse_version("1.4"):
             # use scikit-learn support for monotonic constraints
             params_random_forest["monotonic_cst"] = monotonic_cst
         else:
@@ -595,12 +596,12 @@ def fit(self, X, y, sample_weight=None):
         if issparse(y):
             raise ValueError("sparse multilabel-indicator for y is not supported.")
 
-        # TODO: remove when the minimum supported version of scipy will be 1.4
-        # Support for missing values
-        if parse_version(sklearn_version.base_version) >= parse_version("1.4"):
-            ensure_all_finite = False
+        # TODO (1.6): simplify because we will only have dataclass tags
+        tags = get_tags(self)
+        if is_dataclass(tags):
+            ensure_all_finite = not tags.input_tags.allow_nan
         else:
-            ensure_all_finite = False
+            ensure_all_finite = not tags.get("allow_nan", False)
 
         X, y = validate_data(
             self,
@@ -884,4 +885,13 @@ def _compute_oob_predictions(self, X, y):
 
     @available_if(check_version_package("sklearn", "<", "1.6"))
     def _more_tags(self):
-        return {"multioutput": False, "multilabel": False}
+        allow_nan = sklearn_version >= parse_version("1.4")
+        return {"multioutput": False, "multilabel": False, "allow_nan": allow_nan}
+
+    @available_if(check_version_package("sklearn", ">=", "1.6"))
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.target_tags.multi_output = False
+        tags.classifier_tags.multi_label = False
+        tags.input_tags.allow_nan = sklearn_version >= parse_version("1.4")
+        return tags
diff --git a/imblearn/ensemble/_weight_boosting.py b/imblearn/ensemble/_weight_boosting.py
index 9da02255e..fbb77142e 100644
--- a/imblearn/ensemble/_weight_boosting.py
+++ b/imblearn/ensemble/_weight_boosting.py
@@ -10,6 +10,7 @@
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.utils import _safe_indexing
 from sklearn.utils.fixes import parse_version
+from sklearn.utils.metaestimators import available_if
 from sklearn.utils.validation import has_fit_parameter
 
 from ..base import _ParamsValidationMixin
@@ -18,8 +19,8 @@
 from ..under_sampling.base import BaseUnderSampler
 from ..utils import Substitution, check_target_type
 from ..utils._docstring import _random_state_docstring
-from ..utils._param_validation import Interval, StrOptions
-from ..utils.fixes import _fit_context
+from ..utils._param_validation import Hidden, Interval, StrOptions
+from ..utils.fixes import _fit_context, check_version_package
 from ._common import _adaboost_classifier_parameter_constraints
 
 sklearn_version = parse_version(sklearn.__version__)
@@ -58,7 +59,7 @@ class RUSBoostClassifier(_ParamsValidationMixin, AdaBoostClassifier):
         ``learning_rate``. There is a trade-off between ``learning_rate`` and
         ``n_estimators``.
 
-    algorithm : {{'SAMME', 'SAMME.R'}}, default='SAMME.R'
+    algorithm : {{'SAMME', 'SAMME.R'}}, default='deprecated'
         If 'SAMME.R' then use the SAMME.R real boosting algorithm.
         ``base_estimator`` must support calculation of class probabilities.
         If 'SAMME' then use the SAMME discrete boosting algorithm.
@@ -66,8 +67,10 @@ class RUSBoostClassifier(_ParamsValidationMixin, AdaBoostClassifier):
         achieving a lower test error with fewer boosting iterations.
 
         .. deprecated:: 0.12
-            `"SAMME.R"` is deprecated and will be removed in version 0.14.
-            '"SAMME"' will become the default.
+            `algorithm` is deprecated in 0.12 and will be removed 0.14.
+            Depending on the `scikit-learn` version, the "SAMME.R" algorithm might not
+            be available. Refer to the documentation of
+            :class:`~sklearn.ensemble.AdaBoostClassifier` for more information.
 
     {sampling_strategy}
 
@@ -109,7 +112,7 @@ class RUSBoostClassifier(_ParamsValidationMixin, AdaBoostClassifier):
         ensemble.
 
     feature_importances_ : ndarray of shape (n_features,)
-        The feature importances if supported by the ``base_estimator``.
+        The feature importances if supported by the ``estimator``.
 
     n_features_in_ : int
         Number of features in the input dataset.
@@ -167,6 +170,10 @@ class RUSBoostClassifier(_ParamsValidationMixin, AdaBoostClassifier):
 
     _parameter_constraints.update(
         {
+            "algorithm": [
+                StrOptions({"SAMME", "SAMME.R"}),
+                Hidden(StrOptions({"deprecated"})),
+            ],
             "sampling_strategy": [
                 Interval(numbers.Real, 0, 1, closed="right"),
                 StrOptions({"auto", "majority", "not minority", "not majority", "all"}),
@@ -186,7 +193,7 @@ def __init__(
         *,
         n_estimators=50,
         learning_rate=1.0,
-        algorithm="SAMME.R",
+        algorithm="deprecated",
         sampling_strategy="auto",
         replacement=False,
         random_state=None,
@@ -194,9 +201,9 @@ def __init__(
         super().__init__(
             n_estimators=n_estimators,
             learning_rate=learning_rate,
-            algorithm=algorithm,
             random_state=random_state,
         )
+        self.algorithm = algorithm
         self.estimator = estimator
         self.sampling_strategy = sampling_strategy
         self.replacement = replacement
@@ -394,3 +401,7 @@ def _boost_discrete(self, iboost, X, y, sample_weight, random_state):
             sample_weight *= np.exp(estimator_weight * incorrect * (sample_weight > 0))
 
         return sample_weight, estimator_weight, estimator_error
+
+    @available_if(check_version_package("sklearn", ">=", "1.6"))
+    def _boost(self, iboost, X, y, sample_weight, random_state):
+        return self._boost_discrete(iboost, X, y, sample_weight, random_state)
\ No newline at end of file
diff --git a/imblearn/utils/_test_common/instance_generator.py b/imblearn/utils/_test_common/instance_generator.py
index 0d7b8fbea..f83111854 100644
--- a/imblearn/utils/_test_common/instance_generator.py
+++ b/imblearn/utils/_test_common/instance_generator.py
@@ -10,11 +10,16 @@
 
 from sklearn import clone, config_context
 from sklearn.linear_model import LogisticRegression
+from sklearn.tree import DecisionTreeClassifier
 from sklearn.exceptions import SkipTestWarning
 from sklearn.utils._testing import SkipTest
 
 from imblearn.combine import SMOTEENN, SMOTETomek
-from imblearn.ensemble import BalancedBaggingClassifier, BalancedRandomForestClassifier
+from imblearn.ensemble import (
+    BalancedBaggingClassifier,
+    BalancedRandomForestClassifier,
+    EasyEnsembleClassifier,
+)
 from imblearn.over_sampling import (
     ADASYN,
     BorderlineSMOTE,
@@ -42,6 +47,12 @@
     # estimator
     BalancedBaggingClassifier: dict(random_state=42),
     BalancedRandomForestClassifier: dict(random_state=42),
+    EasyEnsembleClassifier: [
+        # AdaBoostClassifier does not allow nan values
+        dict(random_state=42),
+        # DecisionTreeClassifier allows nan values
+        dict(estimator=DecisionTreeClassifier(random_state=42), random_state=42),
+    ],
     Pipeline: dict(
         steps=[("sampler", RandomUnderSampler()), ("logistic", LogisticRegression())]
     ),

From 7c91d5dd255b4029ec60f4a182bbb3a14ee04205 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Tue, 12 Nov 2024 17:42:05 +0100
Subject: [PATCH 12/20] iter

---
 .../plot_comparison_ensemble_classifier.py      |  2 +-
 imblearn/ensemble/tests/test_easy_ensemble.py   | 17 ++++++-----------
 imblearn/utils/_validation.py                   |  6 +++++-
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/examples/ensemble/plot_comparison_ensemble_classifier.py b/examples/ensemble/plot_comparison_ensemble_classifier.py
index 8c318e5bc..602e477e5 100644
--- a/examples/ensemble/plot_comparison_ensemble_classifier.py
+++ b/examples/ensemble/plot_comparison_ensemble_classifier.py
@@ -197,7 +197,7 @@
 
 from imblearn.ensemble import EasyEnsembleClassifier, RUSBoostClassifier
 
-estimator = AdaBoostClassifier(n_estimators=10, algorithm="SAMME")
+estimator = AdaBoostClassifier(n_estimators=10)
 eec = EasyEnsembleClassifier(n_estimators=10, estimator=estimator)
 eec.fit(X_train, y_train)
 y_pred_eec = eec.predict(X_test)
diff --git a/imblearn/ensemble/tests/test_easy_ensemble.py b/imblearn/ensemble/tests/test_easy_ensemble.py
index 6fe50500f..5d3b23fd2 100644
--- a/imblearn/ensemble/tests/test_easy_ensemble.py
+++ b/imblearn/ensemble/tests/test_easy_ensemble.py
@@ -37,13 +37,10 @@
 Y = np.array([1, 2, 2, 2, 1, 0, 1, 1, 1, 0])
 
 
-@pytest.mark.parametrize("n_estimators", [10, 20])
+@pytest.mark.parametrize("n_estimators", [5, 10])
 @pytest.mark.parametrize(
     "estimator",
-    [
-        AdaBoostClassifier(algorithm="SAMME", n_estimators=5),
-        AdaBoostClassifier(algorithm="SAMME", n_estimators=10),
-    ],
+    [AdaBoostClassifier(n_estimators=5), AdaBoostClassifier(n_estimators=10)],
 )
 def test_easy_ensemble_classifier(n_estimators, estimator):
     # Check classification for various parameter settings.
@@ -89,7 +86,7 @@ def test_estimator():
     assert isinstance(ensemble.estimator_.steps[-1][1], AdaBoostClassifier)
 
     ensemble = EasyEnsembleClassifier(
-        2, AdaBoostClassifier(algorithm="SAMME"), n_jobs=-1, random_state=0
+        2, AdaBoostClassifier(), n_jobs=-1, random_state=0
     ).fit(X_train, y_train)
 
     assert isinstance(ensemble.estimator_.steps[-1][1], AdaBoostClassifier)
@@ -104,9 +101,7 @@ def test_bagging_with_pipeline():
     )
     estimator = EasyEnsembleClassifier(
         n_estimators=2,
-        estimator=make_pipeline(
-            SelectKBest(k=1), AdaBoostClassifier(algorithm="SAMME")
-        ),
+        estimator=make_pipeline(SelectKBest(k=1), AdaBoostClassifier()),
     )
     estimator.fit(X, y).predict(X)
 
@@ -198,7 +193,7 @@ def test_easy_ensemble_classifier_single_estimator():
     clf1 = EasyEnsembleClassifier(n_estimators=1, random_state=0).fit(X_train, y_train)
     clf2 = make_pipeline(
         RandomUnderSampler(random_state=0),
-        AdaBoostClassifier(algorithm="SAMME", random_state=0),
+        AdaBoostClassifier(random_state=0),
     ).fit(X_train, y_train)
 
     assert_array_equal(clf1.predict(X_test), clf2.predict(X_test))
@@ -217,7 +212,7 @@ def test_easy_ensemble_classifier_grid_search():
         "estimator__n_estimators": [3, 4],
     }
     grid_search = GridSearchCV(
-        EasyEnsembleClassifier(estimator=AdaBoostClassifier(algorithm="SAMME")),
+        EasyEnsembleClassifier(estimator=AdaBoostClassifier()),
         parameters,
         cv=5,
     )
diff --git a/imblearn/utils/_validation.py b/imblearn/utils/_validation.py
index bf1a896b1..8a9745c37 100644
--- a/imblearn/utils/_validation.py
+++ b/imblearn/utils/_validation.py
@@ -649,5 +649,9 @@ def _check_X(estimator, X):
         return X
 
     return validate_data(
-        estimator, X, dtype=None, accept_sparse=["csr", "csc"], ensure_all_finite=False
+        estimator,
+        X=X,
+        dtype=None,
+        accept_sparse=["csr", "csc"],
+        ensure_all_finite=False,
     )

From 5d12d07717dfe41da839562b595f6b8a0355f4a1 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Tue, 12 Nov 2024 17:50:33 +0100
Subject: [PATCH 13/20] more fix

---
 imblearn/metrics/pairwise.py                | 6 ++++++
 imblearn/tests/test_docstring_parameters.py | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/imblearn/metrics/pairwise.py b/imblearn/metrics/pairwise.py
index 12e221935..ff6fa39b5 100644
--- a/imblearn/metrics/pairwise.py
+++ b/imblearn/metrics/pairwise.py
@@ -235,3 +235,9 @@ def _more_tags(self):
         return {
             "requires_positive_X": True,  # X should be encoded with OrdinalEncoder
         }
+
+    @available_if(check_version_package("sklearn", ">=", "1.6"))
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.positive_only = True  # X should be encoded with OrdinalEncoder
+        return tags
diff --git a/imblearn/tests/test_docstring_parameters.py b/imblearn/tests/test_docstring_parameters.py
index 03b66b3cb..5cf5a6c2e 100644
--- a/imblearn/tests/test_docstring_parameters.py
+++ b/imblearn/tests/test_docstring_parameters.py
@@ -18,7 +18,6 @@
 )
 from sklearn.utils.deprecation import _is_deprecated
 from sklearn.utils.estimator_checks import (
-    _construct_instance,
     _enforce_estimator_tags_X,
     _enforce_estimator_tags_y,
 )
@@ -27,6 +26,7 @@
 from imblearn.base import is_sampler
 from imblearn.utils.estimator_checks import _set_checking_parameters
 from imblearn.utils.testing import all_estimators
+from imblearn.utils._test_common.instance_generator import _construct_instances
 
 # walk_packages() ignores DeprecationWarnings, now we need to ignore
 # FutureWarnings
@@ -179,7 +179,7 @@ def test_fit_docstring_attributes(name, Estimator):
     if Estimator.__name__ == "Pipeline":
         est = _construct_compose_pipeline_instance(Estimator)
     else:
-        est = _construct_instance(Estimator)
+        est = next(_construct_instances(Estimator))
     _set_checking_parameters(est)
 
     X, y = make_classification(

From e74293a91ce92a206abb5615dd4e8ec8f01eb15b Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Tue, 12 Nov 2024 17:55:46 +0100
Subject: [PATCH 14/20] iter

---
 imblearn/ensemble/_weight_boosting.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/imblearn/ensemble/_weight_boosting.py b/imblearn/ensemble/_weight_boosting.py
index fbb77142e..8c0d41af2 100644
--- a/imblearn/ensemble/_weight_boosting.py
+++ b/imblearn/ensemble/_weight_boosting.py
@@ -402,6 +402,5 @@ def _boost_discrete(self, iboost, X, y, sample_weight, random_state):
 
         return sample_weight, estimator_weight, estimator_error
 
-    @available_if(check_version_package("sklearn", ">=", "1.6"))
     def _boost(self, iboost, X, y, sample_weight, random_state):
         return self._boost_discrete(iboost, X, y, sample_weight, random_state)
\ No newline at end of file

From a33b9f863f14ace8361c57c8202249f179b149b8 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Tue, 12 Nov 2024 17:57:46 +0100
Subject: [PATCH 15/20] iter

---
 imblearn/ensemble/_weight_boosting.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/imblearn/ensemble/_weight_boosting.py b/imblearn/ensemble/_weight_boosting.py
index 8c0d41af2..82c009d0a 100644
--- a/imblearn/ensemble/_weight_boosting.py
+++ b/imblearn/ensemble/_weight_boosting.py
@@ -403,4 +403,8 @@ def _boost_discrete(self, iboost, X, y, sample_weight, random_state):
         return sample_weight, estimator_weight, estimator_error
 
     def _boost(self, iboost, X, y, sample_weight, random_state):
-        return self._boost_discrete(iboost, X, y, sample_weight, random_state)
\ No newline at end of file
+        if self.algorithm == "SAMME.R":
+            return self._boost_real(iboost, X, y, sample_weight, random_state)
+
+        else:  # elif self.algorithm == "SAMME":
+            return self._boost_discrete(iboost, X, y, sample_weight, random_state)
\ No newline at end of file

From 7878bb29b7fa0c6a13552d82ed3752ccb8626f48 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Tue, 12 Nov 2024 18:04:58 +0100
Subject: [PATCH 16/20] iter

---
 imblearn/ensemble/tests/test_easy_ensemble.py | 7 ++-----
 imblearn/pipeline.py                          | 2 +-
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/imblearn/ensemble/tests/test_easy_ensemble.py b/imblearn/ensemble/tests/test_easy_ensemble.py
index 5d3b23fd2..472182060 100644
--- a/imblearn/ensemble/tests/test_easy_ensemble.py
+++ b/imblearn/ensemble/tests/test_easy_ensemble.py
@@ -1,4 +1,5 @@
 """Test the module easy ensemble."""
+
 # Authors: Guillaume Lemaitre <g.lemaitre58@gmail.com>
 #          Christos Aridas
 # License: MIT
@@ -211,9 +212,5 @@ def test_easy_ensemble_classifier_grid_search():
         "n_estimators": [1, 2],
         "estimator__n_estimators": [3, 4],
     }
-    grid_search = GridSearchCV(
-        EasyEnsembleClassifier(estimator=AdaBoostClassifier()),
-        parameters,
-        cv=5,
-    )
+    grid_search = GridSearchCV(EasyEnsembleClassifier(), parameters, cv=5)
     grid_search.fit(X, y)
diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py
index 7453446ad..654d1a5a2 100644
--- a/imblearn/pipeline.py
+++ b/imblearn/pipeline.py
@@ -1059,7 +1059,7 @@ def _fit_resample_one(sampler, X, y, message_clsname="", message=None, params=No
         return X_res, y_res, sampler
 
 
-def _transform_one(transformer, X, y, weight, params):
+def _transform_one(transformer, X, y, weight, params=None):
     """Call transform and apply weight to output.
 
     Parameters

From 8903b00d526cea2b756db40bcf825920a9323c56 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Tue, 12 Nov 2024 18:12:01 +0100
Subject: [PATCH 17/20] iter

---
 imblearn/utils/_tags.py | 131 ++++++++++++++++++++--------------------
 1 file changed, 65 insertions(+), 66 deletions(-)

diff --git a/imblearn/utils/_tags.py b/imblearn/utils/_tags.py
index 7d59e73a8..84b5de60c 100644
--- a/imblearn/utils/_tags.py
+++ b/imblearn/utils/_tags.py
@@ -32,70 +32,69 @@ class SamplerTags:
 
         sample_indices: bool = False
 
+    @dataclass(**_dataclass_args())
+    class Tags:
+        """Tags for the estimator.
+
+        See :ref:`estimator_tags` for more information.
+
+        Parameters
+        ----------
+        estimator_type : str or None
+            The type of the estimator. Can be one of:
+            - "classifier"
+            - "regressor"
+            - "transformer"
+            - "clusterer"
+            - "outlier_detector"
+            - "density_estimator"
+
+        target_tags : :class:`TargetTags`
+            The target(y) tags.
+
+        transformer_tags : :class:`TransformerTags` or None
+            The transformer tags.
+
+        classifier_tags : :class:`ClassifierTags` or None
+            The classifier tags.
+
+        regressor_tags : :class:`RegressorTags` or None
+            The regressor tags.
+
+        sampler_tags : :class:`SamplerTags` or None
+            The sampler tags.
+
+        array_api_support : bool, default=False
+            Whether the estimator supports Array API compatible inputs.
+
+        no_validation : bool, default=False
+            Whether the estimator skips input-validation. This is only meant for
+            stateless and dummy transformers!
+
+        non_deterministic : bool, default=False
+            Whether the estimator is not deterministic given a fixed ``random_state``.
+
+        requires_fit : bool, default=True
+            Whether the estimator requires to be fitted before calling one of
+            `transform`, `predict`, `predict_proba`, or `decision_function`.
+
+        _skip_test : bool, default=False
+            Whether to skip common tests entirely. Don't use this unless
+            you have a *very good* reason.
+
+        input_tags : :class:`InputTags`
+            The input data(X) tags.
+        """
 
-@dataclass(**_dataclass_args())
-class Tags:
-    """Tags for the estimator.
-
-    See :ref:`estimator_tags` for more information.
-
-    Parameters
-    ----------
-    estimator_type : str or None
-        The type of the estimator. Can be one of:
-        - "classifier"
-        - "regressor"
-        - "transformer"
-        - "clusterer"
-        - "outlier_detector"
-        - "density_estimator"
-
-    target_tags : :class:`TargetTags`
-        The target(y) tags.
-
-    transformer_tags : :class:`TransformerTags` or None
-        The transformer tags.
-
-    classifier_tags : :class:`ClassifierTags` or None
-        The classifier tags.
-
-    regressor_tags : :class:`RegressorTags` or None
-        The regressor tags.
-
-    sampler_tags : :class:`SamplerTags` or None
-        The sampler tags.
-
-    array_api_support : bool, default=False
-        Whether the estimator supports Array API compatible inputs.
-
-    no_validation : bool, default=False
-        Whether the estimator skips input-validation. This is only meant for
-        stateless and dummy transformers!
-
-    non_deterministic : bool, default=False
-        Whether the estimator is not deterministic given a fixed ``random_state``.
-
-    requires_fit : bool, default=True
-        Whether the estimator requires to be fitted before calling one of
-        `transform`, `predict`, `predict_proba`, or `decision_function`.
-
-    _skip_test : bool, default=False
-        Whether to skip common tests entirely. Don't use this unless
-        you have a *very good* reason.
-
-    input_tags : :class:`InputTags`
-        The input data(X) tags.
-    """
-
-    estimator_type: str | None
-    target_tags: TargetTags
-    transformer_tags: TransformerTags | None
-    classifier_tags: ClassifierTags | None
-    regressor_tags: RegressorTags | None
-    sampler_tags: SamplerTags | None
-    array_api_support: bool = False
-    no_validation: bool = False
-    non_deterministic: bool = False
-    requires_fit: bool = True
-    _skip_test: bool = False
-    input_tags: InputTags = field(default_factory=InputTags)
+        estimator_type: str | None
+        target_tags: TargetTags
+        transformer_tags: TransformerTags | None
+        classifier_tags: ClassifierTags | None
+        regressor_tags: RegressorTags | None
+        sampler_tags: SamplerTags | None
+        array_api_support: bool = False
+        no_validation: bool = False
+        non_deterministic: bool = False
+        requires_fit: bool = True
+        _skip_test: bool = False
+        input_tags: InputTags = field(default_factory=InputTags)

From 468f92564b47fced5bfc4008096b0955a4e65763 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Wed, 13 Nov 2024 22:40:14 +0100
Subject: [PATCH 18/20] iter

---
 imblearn/ensemble/_weight_boosting.py            | 10 +++++++---
 imblearn/ensemble/tests/test_weight_boosting.py  | 14 ++++++++++++--
 imblearn/pipeline.py                             |  4 ++++
 imblearn/utils/_tags.py                          | 16 +++-------------
 .../utils/_test_common/instance_generator.py     |  5 ++++-
 5 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/imblearn/ensemble/_weight_boosting.py b/imblearn/ensemble/_weight_boosting.py
index 82c009d0a..0caaae27e 100644
--- a/imblearn/ensemble/_weight_boosting.py
+++ b/imblearn/ensemble/_weight_boosting.py
@@ -1,5 +1,6 @@
 import copy
 import numbers
+import warnings
 from copy import deepcopy
 
 import numpy as np
@@ -68,9 +69,6 @@ class RUSBoostClassifier(_ParamsValidationMixin, AdaBoostClassifier):
 
         .. deprecated:: 0.12
             `algorithm` is deprecated in 0.12 and will be removed 0.14.
-            Depending on the `scikit-learn` version, the "SAMME.R" algorithm might not
-            be available. Refer to the documentation of
-            :class:`~sklearn.ensemble.AdaBoostClassifier` for more information.
 
     {sampling_strategy}
 
@@ -403,6 +401,12 @@ def _boost_discrete(self, iboost, X, y, sample_weight, random_state):
         return sample_weight, estimator_weight, estimator_error
 
     def _boost(self, iboost, X, y, sample_weight, random_state):
+        if self.algorithm != "deprecated":
+            warnings.warn(
+                "`algorithm` parameter is deprecated in 0.12 and will be removed in "
+                "0.14. In the future, the SAMME algorithm will always be used.",
+                FutureWarning,
+            )
         if self.algorithm == "SAMME.R":
             return self._boost_real(iboost, X, y, sample_weight, random_state)
 
diff --git a/imblearn/ensemble/tests/test_weight_boosting.py b/imblearn/ensemble/tests/test_weight_boosting.py
index 8096a2b16..89589d248 100644
--- a/imblearn/ensemble/tests/test_weight_boosting.py
+++ b/imblearn/ensemble/tests/test_weight_boosting.py
@@ -24,7 +24,7 @@ def imbalanced_dataset():
 
 
 @pytest.mark.parametrize("algorithm", ["SAMME", "SAMME.R"])
-@pytest.mark.filterwarnings("ignore:The SAMME.R algorithm (the default) is")
+@pytest.mark.filterwarnings("ignore:`algorithm` parameter is deprecated in 0.12")
 def test_rusboost(imbalanced_dataset, algorithm):
     X, y = imbalanced_dataset
     X_train, X_test, y_train, y_test = train_test_split(
@@ -70,7 +70,7 @@ def test_rusboost(imbalanced_dataset, algorithm):
 
 
 @pytest.mark.parametrize("algorithm", ["SAMME", "SAMME.R"])
-@pytest.mark.filterwarnings("ignore:The SAMME.R algorithm (the default) is")
+@pytest.mark.filterwarnings("ignore:`algorithm` parameter is deprecated in 0.12")
 def test_rusboost_sample_weight(imbalanced_dataset, algorithm):
     X, y = imbalanced_dataset
     sample_weight = np.ones_like(y)
@@ -88,3 +88,13 @@ def test_rusboost_sample_weight(imbalanced_dataset, algorithm):
 
     with pytest.raises(AssertionError):
         assert_array_equal(y_pred_no_sample_weight, y_pred_sample_weight)
+
+
+@pytest.mark.parametrize("algorithm", ["SAMME", "SAMME.R"])
+def test_rusboost_algorithm_future_warning(imbalanced_dataset, algorithm):
+    X, y = imbalanced_dataset
+    rusboost = RUSBoostClassifier(algorithm=algorithm, random_state=0)
+
+    warning_msg = "`algorithm` parameter is deprecated in 0.12"
+    with pytest.warns(FutureWarning, match=warning_msg):
+        rusboost.fit(X, y)
diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py
index 654d1a5a2..c97d94995 100644
--- a/imblearn/pipeline.py
+++ b/imblearn/pipeline.py
@@ -95,6 +95,10 @@ class Pipeline(_ParamsValidationMixin, pipeline.Pipeline):
     n_features_in_ : int
         Number of features seen during first step `fit` method.
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Only defined if the
+        underlying estimator exposes such an attribute when fit.
+
     See Also
     --------
     make_pipeline : Helper function to make pipeline.
diff --git a/imblearn/utils/_tags.py b/imblearn/utils/_tags.py
index 84b5de60c..9b73eeb70 100644
--- a/imblearn/utils/_tags.py
+++ b/imblearn/utils/_tags.py
@@ -8,6 +8,7 @@
 
 if sklearn_version >= parse_version("1.6"):
     from sklearn.utils._tags import (
+        Tags,
         TargetTags,
         TransformerTags,
         ClassifierTags,
@@ -33,7 +34,7 @@ class SamplerTags:
         sample_indices: bool = False
 
     @dataclass(**_dataclass_args())
-    class Tags:
+    class Tags(Tags):
         """Tags for the estimator.
 
         See :ref:`estimator_tags` for more information.
@@ -86,15 +87,4 @@ class Tags:
             The input data(X) tags.
         """
 
-        estimator_type: str | None
-        target_tags: TargetTags
-        transformer_tags: TransformerTags | None
-        classifier_tags: ClassifierTags | None
-        regressor_tags: RegressorTags | None
-        sampler_tags: SamplerTags | None
-        array_api_support: bool = False
-        no_validation: bool = False
-        non_deterministic: bool = False
-        requires_fit: bool = True
-        _skip_test: bool = False
-        input_tags: InputTags = field(default_factory=InputTags)
+        sampler_tags: SamplerTags | None = None
diff --git a/imblearn/utils/_test_common/instance_generator.py b/imblearn/utils/_test_common/instance_generator.py
index f83111854..64ee971e2 100644
--- a/imblearn/utils/_test_common/instance_generator.py
+++ b/imblearn/utils/_test_common/instance_generator.py
@@ -54,7 +54,10 @@
         dict(estimator=DecisionTreeClassifier(random_state=42), random_state=42),
     ],
     Pipeline: dict(
-        steps=[("sampler", RandomUnderSampler()), ("logistic", LogisticRegression())]
+        steps=[
+            ("sampler", RandomUnderSampler(random_state=0)),
+            ("logistic", LogisticRegression()),
+        ]
     ),
     # over-sampling
     ADASYN: dict(random_state=42),

From c457b4ab8167c6dac440eb65c8557b390f9f754d Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Wed, 13 Nov 2024 23:08:15 +0100
Subject: [PATCH 19/20] iter

---
 imblearn/pipeline.py            | 228 ++++++++++++++++++++------------
 imblearn/tests/test_pipeline.py |   9 +-
 2 files changed, 148 insertions(+), 89 deletions(-)

diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py
index c97d94995..cdb3d0ba5 100644
--- a/imblearn/pipeline.py
+++ b/imblearn/pipeline.py
@@ -2,6 +2,7 @@
 The :mod:`imblearn.pipeline` module implements utilities to build a
 composite estimator, as a chain of transforms, samples and estimators.
 """
+
 # Adapted from scikit-learn
 
 # Author: Edouard Duchesnay
@@ -12,13 +13,18 @@
 #         Christos Aridas
 #         Guillaume Lemaitre <g.lemaitre58@gmail.com>
 # License: BSD
+import warnings
+from contextlib import contextmanager
+from copy import deepcopy
+
 import sklearn
 from sklearn import pipeline
 from sklearn.base import clone
+from sklearn.exceptions import NotFittedError
 from sklearn.utils import Bunch
 from sklearn.utils.fixes import parse_version
 from sklearn.utils.metaestimators import available_if
-from sklearn.utils.validation import check_memory
+from sklearn.utils.validation import check_memory, check_is_fitted
 
 from .base import _ParamsValidationMixin
 from .utils._metadata_requests import (
@@ -30,7 +36,7 @@
     process_routing,
 )
 from .utils._param_validation import HasMethods, validate_params
-from .utils.fixes import _fit_context
+from .utils.fixes import _fit_context, get_tags
 
 METHODS.append("fit_resample")
 
@@ -43,6 +49,31 @@
     from sklearn.utils._user_interface import _print_elapsed_time
 
 
+@contextmanager
+def _raise_or_warn_if_not_fitted(estimator):
+    """A context manager to make sure a NotFittedError is raised, if a sub-estimator
+    raises the error.
+    Otherwise, we raise a warning if the pipeline is not fitted, with the deprecation.
+    TODO(1.8): remove this context manager and replace with check_is_fitted.
+    """
+    try:
+        yield
+    except NotFittedError as exc:
+        raise NotFittedError("Pipeline is not fitted yet.") from exc
+
+    # we only get here if the above didn't raise
+    try:
+        check_is_fitted(estimator)
+    except NotFittedError:
+        warnings.warn(
+            "This Pipeline instance is not fitted yet. Call 'fit' with "
+            "appropriate arguments before using other methods such as transform, "
+            "predict, etc. This will raise an error in 1.8 instead of the current "
+            "warning.",
+            FutureWarning,
+        )
+
+
 class Pipeline(_ParamsValidationMixin, pipeline.Pipeline):
     """Pipeline of transforms and resamples with a final estimator.
 
@@ -456,18 +487,22 @@ def predict(self, X, **params):
         y_pred : ndarray
             Result of calling `predict` on the final estimator.
         """
-        Xt = X
+        # TODO(1.8): Remove the context manager and use check_is_fitted(self)
+        with _raise_or_warn_if_not_fitted(self):
+            Xt = X
 
-        if not _routing_enabled():
-            for _, name, transform in self._iter(with_final=False):
-                Xt = transform.transform(Xt)
-            return self.steps[-1][1].predict(Xt, **params)
+            if not _routing_enabled():
+                for _, name, transform in self._iter(with_final=False):
+                    Xt = transform.transform(Xt)
+                return self.steps[-1][1].predict(Xt, **params)
 
-        # metadata routing enabled
-        routed_params = process_routing(self, "predict", **params)
-        for _, name, transform in self._iter(with_final=False):
-            Xt = transform.transform(Xt, **routed_params[name].transform)
-        return self.steps[-1][1].predict(Xt, **routed_params[self.steps[-1][0]].predict)
+            # metadata routing enabled
+            routed_params = process_routing(self, "predict", **params)
+            for _, name, transform in self._iter(with_final=False):
+                Xt = transform.transform(Xt, **routed_params[name].transform)
+            return self.steps[-1][1].predict(
+                Xt, **routed_params[self.steps[-1][0]].predict
+            )
 
     def _can_fit_resample(self):
         return self._final_estimator == "passthrough" or hasattr(
@@ -646,20 +681,22 @@ def predict_proba(self, X, **params):
         y_proba : ndarray of shape (n_samples, n_classes)
             Result of calling `predict_proba` on the final estimator.
         """
-        Xt = X
+        # TODO(1.8): Remove the context manager and use check_is_fitted(self)
+        with _raise_or_warn_if_not_fitted(self):
+            Xt = X
+
+            if not _routing_enabled():
+                for _, name, transform in self._iter(with_final=False):
+                    Xt = transform.transform(Xt)
+                return self.steps[-1][1].predict_proba(Xt, **params)
 
-        if not _routing_enabled():
+            # metadata routing enabled
+            routed_params = process_routing(self, "predict_proba", **params)
             for _, name, transform in self._iter(with_final=False):
-                Xt = transform.transform(Xt)
-            return self.steps[-1][1].predict_proba(Xt, **params)
-
-        # metadata routing enabled
-        routed_params = process_routing(self, "predict_proba", **params)
-        for _, name, transform in self._iter(with_final=False):
-            Xt = transform.transform(Xt, **routed_params[name].transform)
-        return self.steps[-1][1].predict_proba(
-            Xt, **routed_params[self.steps[-1][0]].predict_proba
-        )
+                Xt = transform.transform(Xt, **routed_params[name].transform)
+            return self.steps[-1][1].predict_proba(
+                Xt, **routed_params[self.steps[-1][0]].predict_proba
+            )
 
     @available_if(pipeline._final_estimator_has("decision_function"))
     def decision_function(self, X, **params):
@@ -691,20 +728,23 @@ def decision_function(self, X, **params):
         y_score : ndarray of shape (n_samples, n_classes)
             Result of calling `decision_function` on the final estimator.
         """
-        _raise_for_params(params, self, "decision_function")
+        # TODO(1.8): Remove the context manager and use check_is_fitted(self)
+        with _raise_or_warn_if_not_fitted(self):
+            _raise_for_params(params, self, "decision_function")
 
-        # not branching here since params is only available if
-        # enable_metadata_routing=True
-        routed_params = process_routing(self, "decision_function", **params)
+            # not branching here since params is only available if
+            # enable_metadata_routing=True
+            routed_params = process_routing(self, "decision_function", **params)
 
-        Xt = X
-        for _, name, transform in self._iter(with_final=False):
-            Xt = transform.transform(
-                Xt, **routed_params.get(name, {}).get("transform", {})
+            Xt = X
+            for _, name, transform in self._iter(with_final=False):
+                Xt = transform.transform(
+                    Xt, **routed_params.get(name, {}).get("transform", {})
+                )
+            return self.steps[-1][1].decision_function(
+                Xt,
+                **routed_params.get(self.steps[-1][0], {}).get("decision_function", {}),
             )
-        return self.steps[-1][1].decision_function(
-            Xt, **routed_params.get(self.steps[-1][0], {}).get("decision_function", {})
-        )
 
     @available_if(pipeline._final_estimator_has("score_samples"))
     def score_samples(self, X):
@@ -726,10 +766,12 @@ def score_samples(self, X):
         y_score : ndarray of shape (n_samples,)
             Result of calling `score_samples` on the final estimator.
         """
-        Xt = X
-        for _, _, transformer in self._iter(with_final=False):
-            Xt = transformer.transform(Xt)
-        return self.steps[-1][1].score_samples(Xt)
+        # TODO(1.8): Remove the context manager and use check_is_fitted(self)
+        with _raise_or_warn_if_not_fitted(self):
+            Xt = X
+            for _, _, transformer in self._iter(with_final=False):
+                Xt = transformer.transform(Xt)
+            return self.steps[-1][1].score_samples(Xt)
 
     @available_if(pipeline._final_estimator_has("predict_log_proba"))
     def predict_log_proba(self, X, **params):
@@ -773,20 +815,22 @@ def predict_log_proba(self, X, **params):
         y_log_proba : ndarray of shape (n_samples, n_classes)
             Result of calling `predict_log_proba` on the final estimator.
         """
-        Xt = X
+        # TODO(1.8): Remove the context manager and use check_is_fitted(self)
+        with _raise_or_warn_if_not_fitted(self):
+            Xt = X
 
-        if not _routing_enabled():
+            if not _routing_enabled():
+                for _, name, transform in self._iter(with_final=False):
+                    Xt = transform.transform(Xt)
+                return self.steps[-1][1].predict_log_proba(Xt, **params)
+
+            # metadata routing enabled
+            routed_params = process_routing(self, "predict_log_proba", **params)
             for _, name, transform in self._iter(with_final=False):
-                Xt = transform.transform(Xt)
-            return self.steps[-1][1].predict_log_proba(Xt, **params)
-
-        # metadata routing enabled
-        routed_params = process_routing(self, "predict_log_proba", **params)
-        for _, name, transform in self._iter(with_final=False):
-            Xt = transform.transform(Xt, **routed_params[name].transform)
-        return self.steps[-1][1].predict_log_proba(
-            Xt, **routed_params[self.steps[-1][0]].predict_log_proba
-        )
+                Xt = transform.transform(Xt, **routed_params[name].transform)
+            return self.steps[-1][1].predict_log_proba(
+                Xt, **routed_params[self.steps[-1][0]].predict_log_proba
+            )
 
     def _can_transform(self):
         return self._final_estimator == "passthrough" or hasattr(
@@ -826,15 +870,17 @@ def transform(self, X, **params):
         Xt : ndarray of shape (n_samples, n_transformed_features)
             Transformed data.
         """
-        _raise_for_params(params, self, "transform")
-
-        # not branching here since params is only available if
-        # enable_metadata_routing=True
-        routed_params = process_routing(self, "transform", **params)
-        Xt = X
-        for _, name, transform in self._iter():
-            Xt = transform.transform(Xt, **routed_params[name].transform)
-        return Xt
+        # TODO(1.8): Remove the context manager and use check_is_fitted(self)
+        with _raise_or_warn_if_not_fitted(self):
+            _raise_for_params(params, self, "transform")
+
+            # not branching here since params is only available if
+            # enable_metadata_routing=True
+            routed_params = process_routing(self, "transform", **params)
+            Xt = X
+            for _, name, transform in self._iter():
+                Xt = transform.transform(Xt, **routed_params[name].transform)
+            return Xt
 
     def _can_inverse_transform(self):
         return all(hasattr(t, "inverse_transform") for _, _, t in self._iter())
@@ -869,17 +915,19 @@ def inverse_transform(self, Xt, **params):
             Inverse transformed data, that is, data in the original feature
             space.
         """
-        _raise_for_params(params, self, "inverse_transform")
-
-        # we don't have to branch here, since params is only non-empty if
-        # enable_metadata_routing=True.
-        routed_params = process_routing(self, "inverse_transform", **params)
-        reverse_iter = reversed(list(self._iter()))
-        for _, name, transform in reverse_iter:
-            Xt = transform.inverse_transform(
-                Xt, **routed_params[name].inverse_transform
-            )
-        return Xt
+        # TODO(1.8): Remove the context manager and use check_is_fitted(self)
+        with _raise_or_warn_if_not_fitted(self):
+            _raise_for_params(params, self, "inverse_transform")
+
+            # we don't have to branch here, since params is only non-empty if
+            # enable_metadata_routing=True.
+            routed_params = process_routing(self, "inverse_transform", **params)
+            reverse_iter = reversed(list(self._iter()))
+            for _, name, transform in reverse_iter:
+                Xt = transform.inverse_transform(
+                    Xt, **routed_params[name].inverse_transform
+                )
+            return Xt
 
     @available_if(pipeline._final_estimator_has("score"))
     def score(self, X, y=None, sample_weight=None, **params):
@@ -918,24 +966,28 @@ def score(self, X, y=None, sample_weight=None, **params):
         score : float
             Result of calling `score` on the final estimator.
         """
-        Xt = X
-        if not _routing_enabled():
-            for _, name, transform in self._iter(with_final=False):
-                Xt = transform.transform(Xt)
-            score_params = {}
-            if sample_weight is not None:
-                score_params["sample_weight"] = sample_weight
-            return self.steps[-1][1].score(Xt, y, **score_params)
-
-        # metadata routing is enabled.
-        routed_params = process_routing(
-            self, "score", sample_weight=sample_weight, **params
-        )
+        # TODO(1.8): Remove the context manager and use check_is_fitted(self)
+        with _raise_or_warn_if_not_fitted(self):
+            Xt = X
+            if not _routing_enabled():
+                for _, name, transform in self._iter(with_final=False):
+                    Xt = transform.transform(Xt)
+                score_params = {}
+                if sample_weight is not None:
+                    score_params["sample_weight"] = sample_weight
+                return self.steps[-1][1].score(Xt, y, **score_params)
+
+            # metadata routing is enabled.
+            routed_params = process_routing(
+                self, "score", sample_weight=sample_weight, **params
+            )
 
-        Xt = X
-        for _, name, transform in self._iter(with_final=False):
-            Xt = transform.transform(Xt, **routed_params[name].transform)
-        return self.steps[-1][1].score(Xt, y, **routed_params[self.steps[-1][0]].score)
+            Xt = X
+            for _, name, transform in self._iter(with_final=False):
+                Xt = transform.transform(Xt, **routed_params[name].transform)
+            return self.steps[-1][1].score(
+                Xt, y, **routed_params[self.steps[-1][0]].score
+            )
 
     # TODO: once scikit-learn >= 1.4, the following function should be simplified by
     # calling `super().get_metadata_routing()`
diff --git a/imblearn/tests/test_pipeline.py b/imblearn/tests/test_pipeline.py
index ed90b263c..bafd50261 100644
--- a/imblearn/tests/test_pipeline.py
+++ b/imblearn/tests/test_pipeline.py
@@ -49,7 +49,7 @@
 R_TOL = 1e-4
 
 
-class NoFit:
+class NoFit(BaseEstimator):
     """Small class to test parameter dispatching."""
 
     def __init__(self, a=None, b=None):
@@ -109,6 +109,9 @@ def predict(self, X):
     def score(self, X, y=None):
         return np.sum(X)
 
+    def __sklearn_is_fitted__(self):
+        return True
+
 
 class FitParamT(BaseEstimator):
     """Mock classifier"""
@@ -118,6 +121,7 @@ def __init__(self):
 
     def fit(self, X, y, should_succeed=False):
         self.successful = should_succeed
+        self.fitted_ = True
 
     def predict(self, X):
         return self.successful
@@ -146,6 +150,9 @@ def fit(self, X, y):
 class DummyEstimatorParams(BaseEstimator):
     """Mock classifier that takes params on predict"""
 
+    def __sklearn_is_fitted__(self):
+        return True
+
     def fit(self, X, y):
         return self
 

From 762fa4836a6912c1330fa91a126d8dc55eba9a87 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Wed, 13 Nov 2024 23:54:28 +0100
Subject: [PATCH 20/20] iter

---
 imblearn/tests/test_common.py                 | 19 +++++++++-
 .../utils/_test_common/instance_generator.py  | 37 ++++++++++++++++++-
 imblearn/utils/estimator_checks.py            |  1 -
 3 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/imblearn/tests/test_common.py b/imblearn/tests/test_common.py
index 4028f439a..43028a33c 100644
--- a/imblearn/tests/test_common.py
+++ b/imblearn/tests/test_common.py
@@ -9,8 +9,10 @@
 
 import numpy as np
 import pytest
+import sklearn
 from sklearn.base import clone
 from sklearn.exceptions import ConvergenceWarning
+from sklearn.utils.fixes import parse_version
 from sklearn.utils._testing import ignore_warnings
 from sklearn.utils.estimator_checks import (
     parametrize_with_checks as parametrize_with_checks_sklearn,
@@ -27,9 +29,18 @@
 from imblearn.utils.testing import all_estimators
 from imblearn.utils._test_common.instance_generator import (
     _get_check_estimator_ids,
+    _get_expected_failed_checks,
     _tested_estimators,
 )
 
+sklearn_version = parse_version(parse_version(sklearn.__version__).base_version)
+if sklearn_version >= parse_version("1.6"):
+    kwargs_parametrize_with_checks = {
+        "expected_failed_checks": _get_expected_failed_checks
+    }
+else:
+    kwargs_parametrize_with_checks = {}
+
 
 @pytest.mark.parametrize("name, Estimator", all_estimators())
 def test_all_estimator_no_base_class(name, Estimator):
@@ -38,13 +49,17 @@ def test_all_estimator_no_base_class(name, Estimator):
     assert not name.lower().startswith("base"), msg
 
 
-@parametrize_with_checks_sklearn(list(_tested_estimators()))
+@parametrize_with_checks_sklearn(
+    list(_tested_estimators()), **kwargs_parametrize_with_checks
+)
 def test_estimators_compatibility_sklearn(estimator, check, request):
     _set_checking_parameters(estimator)
     check(estimator)
 
 
-@parametrize_with_checks(list(_tested_estimators()))
+@parametrize_with_checks(
+    list(_tested_estimators()), expected_failed_checks=_get_expected_failed_checks
+)
 def test_estimators_imblearn(estimator, check, request):
     # Common tests for estimator instances
     with ignore_warnings(
diff --git a/imblearn/utils/_test_common/instance_generator.py b/imblearn/utils/_test_common/instance_generator.py
index 64ee971e2..82fdebe25 100644
--- a/imblearn/utils/_test_common/instance_generator.py
+++ b/imblearn/utils/_test_common/instance_generator.py
@@ -19,6 +19,7 @@
     BalancedBaggingClassifier,
     BalancedRandomForestClassifier,
     EasyEnsembleClassifier,
+    RUSBoostClassifier,
 )
 from imblearn.over_sampling import (
     ADASYN,
@@ -83,7 +84,13 @@
 # same check with multiple instances of the same estimator with different parameters.
 # The special key "*" allows to apply the parameters to all checks.
 # TODO(devtools): allow third-party developers to pass test specific params to checks
-PER_ESTIMATOR_CHECK_PARAMS: dict = {}
+PER_ESTIMATOR_CHECK_PARAMS: dict = {
+    Pipeline: {
+        "check_classifiers_with_encoded_labels": dict(
+            sampler__sampling_strategy={"setosa": 20, "virginica": 20}
+        )
+    }
+}
 
 SKIPPED_ESTIMATORS = [SMOTENC]
 
@@ -187,3 +194,31 @@ def _yield_instances_for_check(check, estimator_orig):
         estimator = clone(estimator_orig)
         estimator.set_params(**params)
         yield estimator
+
+
+PER_ESTIMATOR_XFAIL_CHECKS = {
+    BalancedRandomForestClassifier: {
+        "check_sample_weight_equivalence": "FIXME",
+    },
+    NearMiss: {
+        "check_samplers_fit_resample": "FIXME",
+    },
+    Pipeline: {
+        "check_dont_overwrite_parameters": (
+            "Pipeline changes the `steps` parameter, which it shouldn't."
+            "Therefore this test is x-fail until we fix this."
+        ),
+        "check_estimators_overwrite_params": (
+            "Pipeline changes the `steps` parameter, which it shouldn't."
+            "Therefore this test is x-fail until we fix this."
+        ),
+    },
+    RUSBoostClassifier: {
+        "check_sample_weight_equivalence": "FIXME",
+    },
+}
+
+def _get_expected_failed_checks(estimator):
+    """Get the expected failed checks for all estimators in scikit-learn."""
+    failed_checks = PER_ESTIMATOR_XFAIL_CHECKS.get(type(estimator), {})
+    return failed_checks
\ No newline at end of file
diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py
index ffb5de129..dfba7e50d 100644
--- a/imblearn/utils/estimator_checks.py
+++ b/imblearn/utils/estimator_checks.py
@@ -668,7 +668,6 @@ def check_classifiers_with_encoded_labels(name, classifier_orig):
             "virginica": 50,
         },
     )
-    classifier.set_params(sampling_strategy={"setosa": 20, "virginica": 20})
     classifier.fit(df, y)
     assert set(classifier.classes_) == set(y.cat.categories.tolist())
     y_pred = classifier.predict(df)