Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@
.. _Black: :target: https://github.com/psf/black

.. |PythonMinVersion| replace:: 3.10
.. |NumPyMinVersion| replace:: 1.24.3
.. |SciPyMinVersion| replace:: 1.10.1
.. |ScikitLearnMinVersion| replace:: 1.3.2
.. |NumPyMinVersion| replace:: 1.25.2
.. |SciPyMinVersion| replace:: 1.11.4
.. |ScikitLearnMinVersion| replace:: 1.4.2
.. |MatplotlibMinVersion| replace:: 3.7.3
.. |PandasMinVersion| replace:: 1.5.3
.. |TensorflowMinVersion| replace:: 2.13.1
.. |KerasMinVersion| replace:: 3.0.5
.. |PandasMinVersion| replace:: 2.0.3
.. |TensorflowMinVersion| replace:: 2.16.1
.. |KerasMinVersion| replace:: 3.3.3
.. |SeabornMinVersion| replace:: 0.12.2
.. |PytestMinVersion| replace:: 7.2.2

Expand Down
12 changes: 6 additions & 6 deletions doc/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ Prerequisites
=============

.. |PythonMinVersion| replace:: 3.10
.. |NumPyMinVersion| replace:: 1.24.3
.. |SciPyMinVersion| replace:: 1.10.1
.. |ScikitLearnMinVersion| replace:: 1.3.2
.. |NumPyMinVersion| replace:: 1.25.2
.. |SciPyMinVersion| replace:: 1.11.4
.. |ScikitLearnMinVersion| replace:: 1.4.2
.. |MatplotlibMinVersion| replace:: 3.7.3
.. |PandasMinVersion| replace:: 1.5.3
.. |TensorflowMinVersion| replace:: 2.13.1
.. |KerasMinVersion| replace:: 3.0.5
.. |PandasMinVersion| replace:: 2.0.3
.. |TensorflowMinVersion| replace:: 2.16.1
.. |KerasMinVersion| replace:: 3.3.3
.. |SeabornMinVersion| replace:: 0.12.2
.. |PytestMinVersion| replace:: 7.2.2

Expand Down
9 changes: 6 additions & 3 deletions doc/whats_new/0.14.rst
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
.. _changes_0_14:

Version 0.14.0 (Under development)
==================================
Version 0.14.0
==============

**TBD**
**August 14, 2025**

Changelog
---------
Expand All @@ -21,5 +21,8 @@ Enhancements
Compatibility
.............

- Compatibility with scikit-learn 1.7
:pr:`1137`, :pr:`1145`, :pr:`1146` by :user:`Guillaume Lemaitre <glemaitre>`.

Deprecations
............
8 changes: 4 additions & 4 deletions examples/model_selection/plot_instance_hardness_cv.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@
from sklearn.datasets import make_blobs

X, y = make_blobs(n_samples=[950, 50], centers=((-3, 0), (3, 0)), random_state=10)
plt.scatter(X[:, 0], X[:, 1], c=y)
_ = plt.scatter(X[:, 0], X[:, 1], c=y)

# %%
# To introduce instance hardness in our dataset, we add some hard to classify samples:
X_hard, y_hard = make_blobs(
n_samples=10, centers=((3, 0), (-3, 0)), cluster_std=1, random_state=10
)
X, y = np.vstack((X, X_hard)), np.hstack((y, y_hard))
plt.scatter(X[:, 0], X[:, 1], c=y)
_ = plt.scatter(X[:, 0], X[:, 1], c=y)

# %%
# Compare cross validation scores using `StratifiedKFold` and `InstanceHardnessCV`
Expand Down Expand Up @@ -69,7 +69,7 @@
results = {}
for cv in (
StratifiedKFold(n_splits=5, shuffle=True, random_state=10),
InstanceHardnessCV(estimator=LogisticRegression(), n_splits=5, random_state=10),
InstanceHardnessCV(estimator=LogisticRegression()),
):
result = cross_validate(
logistic_regression,
Expand All @@ -83,7 +83,7 @@

# %%
ax = results.plot.box(vert=False, whis=[0, 100])
ax.set(
_ = ax.set(
xlabel="Average precision",
title="Cross validation scores with different splitters",
xlim=(0, 1),
Expand Down
13 changes: 2 additions & 11 deletions imblearn/ensemble/_bagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,13 @@
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils._param_validation import HasMethods, Interval, StrOptions
from sklearn.utils.fixes import parse_version

from ..pipeline import Pipeline
from ..under_sampling import RandomUnderSampler
from ..under_sampling.base import BaseUnderSampler
from ..utils import Substitution, check_sampling_strategy, check_target_type
from ..utils._docstring import _n_jobs_docstring, _random_state_docstring
from ..utils._sklearn_compat import _fit_context, sklearn_version
from ._common import _bagging_parameter_constraints
from ..utils._sklearn_compat import _fit_context


@Substitution(
Expand Down Expand Up @@ -224,11 +222,7 @@ class BalancedBaggingClassifier(BaggingClassifier):
"""

# make a deepcopy to not modify the original dictionary
if sklearn_version >= parse_version("1.4"):
_parameter_constraints = copy.deepcopy(BaggingClassifier._parameter_constraints)
else:
_parameter_constraints = copy.deepcopy(_bagging_parameter_constraints)

_parameter_constraints = copy.deepcopy(BaggingClassifier._parameter_constraints)
_parameter_constraints.update(
{
"sampling_strategy": [
Expand All @@ -241,9 +235,6 @@ class BalancedBaggingClassifier(BaggingClassifier):
"sampler": [HasMethods(["fit_resample"]), None],
}
)
# TODO: remove when minimum supported version of scikit-learn is 1.4
if "base_estimator" in _parameter_constraints:
del _parameter_constraints["base_estimator"]

def __init__(
self,
Expand Down
9 changes: 1 addition & 8 deletions imblearn/ensemble/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,8 @@ def _estimator_has(attr):
def check(self):
if hasattr(self, "estimators_"):
return hasattr(self.estimators_[0], attr)
elif self.estimator is not None:
else: # self.estimator is not None
return hasattr(self.estimator, attr)
else: # TODO(1.4): Remove when the base_estimator deprecation cycle ends
return hasattr(self.base_estimator, attr)

return check

Expand All @@ -45,11 +43,6 @@ def check(self):
"n_jobs": [None, Integral],
"random_state": ["random_state"],
"verbose": ["verbose"],
"base_estimator": [
HasMethods(["fit", "predict"]),
StrOptions({"deprecated"}),
None,
],
}

_adaboost_classifier_parameter_constraints = {
Expand Down
64 changes: 1 addition & 63 deletions imblearn/ensemble/_easy_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,13 @@
# License: MIT

import copy
import inspect
import numbers

import numpy as np
from sklearn.base import clone
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier
from sklearn.ensemble._bagging import _parallel_decision_function
from sklearn.ensemble._base import _partition_estimators
from sklearn.utils._param_validation import Interval, StrOptions
from sklearn.utils.fixes import parse_version
from sklearn.utils.metaestimators import available_if
from sklearn.utils.parallel import Parallel, delayed
from sklearn.utils.validation import check_is_fitted

from ..pipeline import Pipeline
from ..under_sampling import RandomUnderSampler
Expand All @@ -28,9 +22,8 @@
_fit_context,
get_tags,
sklearn_version,
validate_data,
)
from ._common import _bagging_parameter_constraints, _estimator_has
from ._common import _bagging_parameter_constraints

MAX_INT = np.iinfo(np.int32).max

Expand Down Expand Up @@ -276,61 +269,6 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
# None.
return super()._fit(X, y, self.max_samples)

# TODO: remove when minimum supported version of scikit-learn is 1.1
@available_if(_estimator_has("decision_function"))
def decision_function(self, X):
"""Average of the decision functions of the base classifiers.

Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
The training input samples. Sparse matrices are accepted only if
they are supported by the base estimator.

Returns
-------
score : ndarray of shape (n_samples, k)
The decision function of the input samples. The columns correspond
to the classes in sorted order, as they appear in the attribute
``classes_``. Regression and binary classification are special
cases with ``k == 1``, otherwise ``k==n_classes``.
"""
check_is_fitted(self)

# Check data
X = validate_data(
self,
X=X,
accept_sparse=["csr", "csc"],
dtype=None,
ensure_all_finite=(
"allow_nan" if get_tags(self).input_tags.allow_nan else True
),
reset=False,
)

# Parallel loop
n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)

kwargs = {}
if "params" in inspect.signature(_parallel_decision_function).parameters:
kwargs["params"] = {}

all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
delayed(_parallel_decision_function)(
self.estimators_[starts[i] : starts[i + 1]],
self.estimators_features_[starts[i] : starts[i + 1]],
X,
**kwargs,
)
for i in range(n_jobs)
)

# Reduce
decisions = sum(all_decisions) / self.n_estimators

return decisions

@property
def base_estimator_(self):
"""Attribute for older sklearn version compatibility."""
Expand Down
55 changes: 15 additions & 40 deletions imblearn/ensemble/_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,9 @@ def _local_parallel_build_trees(
"bootstrap": bootstrap,
}

if sklearn_version >= parse_version("1.4"):
# TODO: remove when the minimum supported version of scikit-learn will be 1.4
# support for missing values
params_parallel_build_trees["missing_values_in_feature_mask"] = (
missing_values_in_feature_mask
)
params_parallel_build_trees["missing_values_in_feature_mask"] = (
missing_values_in_feature_mask
)

tree = _parallel_build_trees(**params_parallel_build_trees)

Expand Down Expand Up @@ -469,20 +466,9 @@ def __init__(
"min_impurity_decrease": min_impurity_decrease,
"ccp_alpha": ccp_alpha,
"max_samples": max_samples,
"monotonic_cst": monotonic_cst,
}
# TODO: remove when the minimum supported version of scikit-learn will be 1.4
if sklearn_version >= parse_version("1.4"):
# use scikit-learn support for monotonic constraints
params_random_forest["monotonic_cst"] = monotonic_cst
else:
if monotonic_cst is not None:
raise ValueError(
"Monotonic constraints are not supported for scikit-learn "
"version < 1.4."
)
# create an attribute for compatibility with other scikit-learn tools such
# as HTML representation.
self.monotonic_cst = monotonic_cst

super().__init__(**params_random_forest)

self.sampling_strategy = sampling_strategy
Expand Down Expand Up @@ -548,37 +534,26 @@ def fit(self, X, y, sample_weight=None):
if issparse(y):
raise ValueError("sparse multilabel-indicator for y is not supported.")

# TODO: remove when the minimum supported version of scipy will be 1.4
# Support for missing values
if sklearn_version >= parse_version("1.4"):
ensure_all_finite = False
else:
ensure_all_finite = True

X, y = validate_data(
self,
X=X,
y=y,
multi_output=True,
accept_sparse="csc",
dtype=DTYPE,
ensure_all_finite=ensure_all_finite,
ensure_all_finite=False,
)

# TODO: remove when the minimum supported version of scikit-learn will be 1.4
if sklearn_version >= parse_version("1.4"):
# _compute_missing_values_in_feature_mask checks if X has missing values and
# will raise an error if the underlying tree base estimator can't handle
# missing values. Only the criterion is required to determine if the tree
# supports missing values.
estimator = type(self.estimator)(criterion=self.criterion)
missing_values_in_feature_mask = (
estimator._compute_missing_values_in_feature_mask(
X, estimator_name=self.__class__.__name__
)
# _compute_missing_values_in_feature_mask checks if X has missing values and
# will raise an error if the underlying tree base estimator can't handle
# missing values. Only the criterion is required to determine if the tree
# supports missing values.
estimator = type(self.estimator)(criterion=self.criterion)
missing_values_in_feature_mask = (
estimator._compute_missing_values_in_feature_mask(
X, estimator_name=self.__class__.__name__
)
else:
missing_values_in_feature_mask = None
)

if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X)
Expand Down
4 changes: 2 additions & 2 deletions imblearn/keras/_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def import_from_keras():
if hasattr(keras.utils, "Sequence"):
return (keras.utils.Sequence,), True
else:
return (keras.utils.data_utils.Sequence,), True
return (keras.utils.PyDataset,), True
except ImportError:
return tuple(), False

Expand All @@ -31,7 +31,7 @@ def import_from_tensforflow():
if hasattr(keras.utils, "Sequence"):
return (keras.utils.Sequence,), True
else:
return (keras.utils.data_utils.Sequence,), True
return (keras.utils.PyDataset,), True
except ImportError:
return tuple(), False

Expand Down
Loading
Loading