Merge remote-tracking branch 'origin' into pr/fritshermans/1125

glemaitre · glemaitre · commit bf260c427b4e · 2025-08-13T09:26:33.000+02:00
diff --git a/.github/workflows/linters.yml b/.github/workflows/linters.yml
@@ -15,9 +15,9 @@ jobs:
 
     steps:
       - uses: actions/checkout@v4
-      - uses: prefix-dev/setup-pixi@v0.8.1
+      - uses: prefix-dev/setup-pixi@v0.8.14
         with:
-          pixi-version: v0.39.2
+          pixi-version: v0.51.0
           frozen: true
 
       - name: Run tests
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -20,6 +20,7 @@ jobs:
             ci-py310-min-tensorflow,
             ci-py311-sklearn-1-4,
             ci-py311-sklearn-1-5,
+            ci-py312-sklearn-1-6,
             ci-py311-latest-keras,
             ci-py311-latest-tensorflow,
             ci-py313-latest-dependencies,
@@ -37,9 +38,9 @@ jobs:
     runs-on: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v4
-      - uses: prefix-dev/setup-pixi@v0.8.1
+      - uses: prefix-dev/setup-pixi@v0.8.14
         with:
-          pixi-version: v0.39.2
+          pixi-version: v0.51.0
           environments: ${{ matrix.environment }}
           # we can freeze the environment and manually bump the dependencies to the
           # latest version time to time.
@@ -49,7 +50,7 @@ jobs:
         run: pixi run -e ${{ matrix.environment }} tests -n 3
 
       - name: Upload coverage reports to Codecov
-        uses: codecov/codecov-action@v5.1.2
+        uses: codecov/codecov-action@v5.4.3
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           slug: scikit-learn-contrib/imbalanced-learn
diff --git a/doc/ensemble.rst b/doc/ensemble.rst
@@ -19,7 +19,7 @@ Bagging classifier
 In ensemble classifiers, bagging methods build several estimators on different
 randomly selected subset of data. In scikit-learn, this classifier is named
 :class:`~sklearn.ensemble.BaggingClassifier`. However, this classifier does not
-allow to balance each subset of data. Therefore, when training on imbalanced
+allow each subset of data to be balanced. Therefore, when training on an imbalanced
 data set, this classifier will favor the majority classes::
 
   >>> from sklearn.datasets import make_classification
@@ -59,10 +59,10 @@ sampling is controlled by the parameter `sampler` or the two parameters
   >>> balanced_accuracy_score(y_test, y_pred)
   0.8...
 
-Changing the `sampler` will give rise to different known implementation
+Changing the `sampler` will give rise to different known implementations
 :cite:`maclin1997empirical`, :cite:`hido2009roughly`,
-:cite:`wang2009diversity`. You can refer to the following example shows in
-practice these different methods:
+:cite:`wang2009diversity`. You can refer to the following example which shows these
+different methods in practice:
 :ref:`sphx_glr_auto_examples_ensemble_plot_bagging_classifier.py`
 
 .. _forest:
@@ -93,7 +93,7 @@ Boosting
 
 Several methods taking advantage of boosting have been designed.
 
-:class:`RUSBoostClassifier` randomly under-sample the dataset before to perform
+:class:`RUSBoostClassifier` randomly under-samples the dataset before performing
 a boosting iteration :cite:`seiffert2009rusboost`::
 
   >>> from imblearn.ensemble import RUSBoostClassifier
@@ -107,7 +107,7 @@ a boosting iteration :cite:`seiffert2009rusboost`::
 
 A specific method which uses :class:`~sklearn.ensemble.AdaBoostClassifier` as
 learners in the bagging classifier is called "EasyEnsemble". The
-:class:`EasyEnsembleClassifier` allows to bag AdaBoost learners which are
+:class:`EasyEnsembleClassifier` allows bagging AdaBoost learners which are
 trained on balanced bootstrap samples :cite:`liu2008exploratory`. Similarly to
 the :class:`BalancedBaggingClassifier` API, one can construct the ensemble as::
 
diff --git a/imblearn/ensemble/_easy_ensemble.py b/imblearn/ensemble/_easy_ensemble.py
@@ -5,6 +5,7 @@
 # License: MIT
 
 import copy
+import inspect
 import numbers
 
 import numpy as np
@@ -13,7 +14,6 @@
 from sklearn.ensemble._bagging import _parallel_decision_function
 from sklearn.ensemble._base import _partition_estimators
 from sklearn.utils._param_validation import Interval, StrOptions
-from sklearn.utils._tags import _safe_tags
 from sklearn.utils.fixes import parse_version
 from sklearn.utils.metaestimators import available_if
 from sklearn.utils.parallel import Parallel, delayed
@@ -312,11 +312,16 @@ def decision_function(self, X):
         # Parallel loop
         n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)
 
+        kwargs = {}
+        if "params" in inspect.signature(_parallel_decision_function).parameters:
+            kwargs["params"] = {}
+
         all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
             delayed(_parallel_decision_function)(
                 self.estimators_[starts[i] : starts[i + 1]],
                 self.estimators_features_[starts[i] : starts[i + 1]],
                 X,
+                **kwargs,
             )
             for i in range(n_jobs)
         )
@@ -343,7 +348,7 @@ def _get_estimator(self):
         return self.estimator
 
     def _more_tags(self):
-        return {"allow_nan": _safe_tags(self._get_estimator(), "allow_nan")}
+        return {"allow_nan": get_tags(self._get_estimator()).input_tags.allow_nan}
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
diff --git a/imblearn/keras/tests/test_generator.py b/imblearn/keras/tests/test_generator.py
@@ -24,7 +24,8 @@ def data():
     X, y = make_imbalance(
         iris.data, iris.target, sampling_strategy={0: 30, 1: 50, 2: 40}
     )
-    y = LabelBinarizer().fit_transform(y)
+    X = X.astype(np.float32)
+    y = LabelBinarizer().fit_transform(y).astype(np.int32)
     return X, y
 
 
@@ -103,7 +104,7 @@ def test_balanced_batch_generator_function_no_return_indices(data):
         (None, None),
         (RandomOverSampler(), None),
         (NearMiss(), None),
-        (None, np.random.uniform(size=120)),
+        (None, np.random.uniform(size=120).astype(np.float32)),
     ],
 )
 def test_balanced_batch_generator_function(data, sampler, sample_weight):
@@ -117,6 +118,7 @@ def test_balanced_batch_generator_function(data, sampler, sample_weight):
         batch_size=10,
         random_state=42,
     )
+    print(next(training_generator))
     model.fit(
         training_generator,
         steps_per_epoch=steps_per_epoch,
diff --git a/imblearn/metrics/tests/test_classification.py b/imblearn/metrics/tests/test_classification.py
@@ -454,7 +454,7 @@ def test_iba_error_y_score_prob_error(score_loss):
     y_true, y_pred, _ = make_prediction(binary=True)
 
     aps = make_index_balanced_accuracy(alpha=0.5, squared=True)(score_loss)
-    with pytest.raises(AttributeError):
+    with pytest.raises((AttributeError, TypeError)):
         aps(y_true, y_pred)
 
 
diff --git a/imblearn/over_sampling/_smote/base.py b/imblearn/over_sampling/_smote/base.py
@@ -981,6 +981,5 @@ def _more_tags(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = False
         tags.input_tags.string = True
         return tags
diff --git a/imblearn/tests/test_pipeline.py b/imblearn/tests/test_pipeline.py
@@ -39,7 +39,7 @@
 from imblearn.pipeline import Pipeline, make_pipeline
 from imblearn.under_sampling import EditedNearestNeighbours as ENN
 from imblearn.under_sampling import RandomUnderSampler
-from imblearn.utils._sklearn_compat import sklearn_version
+from imblearn.utils._sklearn_compat import Tags, sklearn_version
 from imblearn.utils.estimator_checks import check_param_validation
 
 JUNK_FOOD_DOCS = (
@@ -61,6 +61,9 @@ def __init__(self, a=None, b=None):
         self.a = a
         self.b = b
 
+    def __sklearn_tags__(self):
+        return Tags()
+
 
 class NoTrans(NoFit):
     def fit(self, X, y):
diff --git a/pixi.lock b/pixi.lock
diff --git a/pyproject.toml b/pyproject.toml