Skip to content

Commit b037591

Browse files
authored
MNT adapt code for scikit-learn 0.23 (#710)
1 parent b861b3a commit b037591

File tree

19 files changed

+328
-296
lines changed

19 files changed

+328
-296
lines changed

azure-pipelines.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ jobs:
7272
- template: build_tools/azure/posix.yml
7373
parameters:
7474
name: macOS
75-
vmImage: xcode9-macos10.13
75+
vmImage: macOS-10.14
7676
dependsOn: [linting]
7777
matrix:
7878
pylatest_conda_mkl:

build_tools/azure/install.cmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ if "%COVERAGE%" == "true" (
3030
python --version
3131
pip --version
3232

33-
pip install git+https://github.com/scikit-learn/scikit-learn.git
33+
pip install scikit-learn
3434

3535
@rem Install the build and runtime dependencies of the project.
3636
python setup.py bdist_wheel bdist_wininst

build_tools/azure/install.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ if [[ "$DISTRIB" == "conda" ]]; then
3232
fi
3333

3434
make_conda $TO_INSTALL
35-
python -m pip install --pre -f https://sklearn-nightly.scdn8.secure.raxcdn.com scikit-learn
35+
python -m pip install scikit-learn
3636

3737
TO_INSTALL=""
3838

@@ -75,22 +75,22 @@ elif [[ "$DISTRIB" == "ubuntu" ]]; then
7575
python3 -m virtualenv --system-site-packages --python=python3 $VIRTUALENV
7676
source $VIRTUALENV/bin/activate
7777
python -m pip install pytest==$PYTEST_VERSION pytest-cov joblib cython
78-
python -m pip install git+https://github.com/scikit-learn/scikit-learn.git
78+
python -m pip install scikit-learn
7979
elif [[ "$DISTRIB" == "ubuntu-32" ]]; then
8080
apt-get update
8181
apt-get install -y python3-dev python3-scipy libatlas3-base libatlas-base-dev python3-virtualenv git
8282
python3 -m virtualenv --system-site-packages --python=python3 $VIRTUALENV
8383
source $VIRTUALENV/bin/activate
8484
python -m pip install pytest==$PYTEST_VERSION pytest-cov joblib cython
85-
python -m pip install git+https://github.com/scikit-learn/scikit-learn.git
85+
python -m pip install scikit-learn
8686
elif [[ "$DISTRIB" == "conda-pip-latest" ]]; then
8787
# Since conda main channel usually lacks behind on the latest releases,
8888
# we use pypi to test against the latest releases of the dependencies.
8989
# conda is still used as a convenient way to install Python and pip.
9090
make_conda "python=$PYTHON_VERSION"
9191
python -m pip install -U pip
9292
python -m pip install numpy scipy joblib cython
93-
python -m pip install git+https://github.com/scikit-learn/scikit-learn.git
93+
python -m pip install scikit-learn
9494
python -m pip install pytest==$PYTEST_VERSION pytest-cov pytest-xdist
9595
python -m pip install pandas
9696
fi

imblearn/base.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010

1111
from sklearn.base import BaseEstimator
1212
from sklearn.preprocessing import label_binarize
13-
from sklearn.utils import check_X_y
1413
from sklearn.utils.multiclass import check_classification_targets
1514

1615
from .utils import check_sampling_strategy, check_target_type
@@ -131,7 +130,9 @@ def _check_X_y(self, X, y, accept_sparse=None):
131130
if accept_sparse is None:
132131
accept_sparse = ["csr", "csc"]
133132
y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
134-
X, y = check_X_y(X, y, accept_sparse=accept_sparse)
133+
X, y = self._validate_data(
134+
X, y, reset=True, accept_sparse=accept_sparse
135+
)
135136
return X, y, binarize_y
136137

137138

imblearn/ensemble/_bagging.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,3 +261,14 @@ def fit(self, X, y):
261261
# RandomUnderSampler is not supporting sample_weight. We need to pass
262262
# None.
263263
return self._fit(X, y, self.max_samples, sample_weight=None)
264+
265+
def _more_tags(self):
266+
tags = super()._more_tags()
267+
tags_key = "_xfail_checks"
268+
failing_test = "check_estimators_nan_inf"
269+
reason = "Fails because the sampler removed infinity and NaN values"
270+
if tags_key in tags:
271+
tags[tags_key][failing_test] = reason
272+
else:
273+
tags[tags_key] = {failing_test: reason}
274+
return tags

imblearn/ensemble/_forest.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from sklearn.utils import check_array
2626
from sklearn.utils import check_random_state
2727
from sklearn.utils import _safe_indexing
28+
from sklearn.utils.validation import _check_sample_weight
2829

2930
from ..pipeline import make_pipeline
3031
from ..under_sampling import RandomUnderSampler
@@ -412,10 +413,15 @@ def fit(self, X, y, sample_weight=None):
412413
"""
413414

414415
# Validate or convert input data
415-
X = check_array(X, accept_sparse="csc", dtype=DTYPE)
416-
y = check_array(y, accept_sparse="csc", ensure_2d=False, dtype=None)
416+
if issparse(y):
417+
raise ValueError(
418+
"sparse multilabel-indicator for y is not supported."
419+
)
420+
X, y = self._validate_data(X, y, multi_output=True,
421+
accept_sparse="csc", dtype=DTYPE)
417422
if sample_weight is not None:
418-
sample_weight = check_array(sample_weight, ensure_2d=False)
423+
sample_weight = _check_sample_weight(sample_weight, X)
424+
419425
if issparse(X):
420426
# Pre-sort indices to avoid that each individual tree of the
421427
# ensemble sorts the indices.

imblearn/metrics/_classification.py

Lines changed: 36 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,8 @@
1212
# Dariusz Brzezinski
1313
# License: MIT
1414

15-
import warnings
1615
import functools
17-
18-
from inspect import getcallargs
16+
import warnings
1917

2018
import numpy as np
2119
import scipy as sp
@@ -731,56 +729,56 @@ def make_index_balanced_accuracy(alpha=0.1, squared=True):
731729
def decorate(scoring_func):
732730
@functools.wraps(scoring_func)
733731
def compute_score(*args, **kwargs):
734-
# Create the list of tags
735-
tags_scoring_func = getcallargs(scoring_func, *args, **kwargs)
732+
signature_scoring_func = signature(scoring_func)
733+
params_scoring_func = set(signature_scoring_func.parameters.keys())
734+
736735
# check that the scoring function does not need a score
737736
# and only a prediction
738-
if (
739-
"y_score" in tags_scoring_func
740-
or "y_prob" in tags_scoring_func
741-
or "y2" in tags_scoring_func
742-
):
737+
prohibitied_y_pred = set(["y_score", "y_prob", "y2"])
738+
if prohibitied_y_pred.intersection(params_scoring_func):
743739
raise AttributeError(
744740
"The function {} has an unsupported"
745741
" attribute. Metric with`y_pred` are the"
746742
" only supported metrics is the only"
747-
" supported."
743+
" supported.".format(scoring_func.__name__)
748744
)
749-
# Compute the score from the scoring function
750-
_score = scoring_func(*args, **kwargs)
751-
# Square if desired
745+
746+
args_scoring_func = signature_scoring_func.bind(*args, **kwargs)
747+
args_scoring_func.apply_defaults()
748+
_score = scoring_func(
749+
*args_scoring_func.args, **args_scoring_func.kwargs
750+
)
752751
if squared:
753752
_score = np.power(_score, 2)
754-
# Get the signature of the sens/spec function
755-
sens_spec_sig = signature(sensitivity_specificity_support)
756-
# We need to extract from kwargs only the one needed by the
757-
# specificity and specificity
758-
params_sens_spec = set(sens_spec_sig._parameters.keys())
759-
# Make the intersection between the parameters
760-
sel_params = params_sens_spec.intersection(set(tags_scoring_func))
761-
# Create a sub dictionary
762-
tags_scoring_func = {k: tags_scoring_func[k] for k in sel_params}
763-
# Check if the metric is the geometric mean
753+
754+
signature_sens_spec = signature(sensitivity_specificity_support)
755+
params_sens_spec = set(signature_sens_spec.parameters.keys())
756+
common_params = params_sens_spec.intersection(
757+
set(args_scoring_func.arguments.keys())
758+
)
759+
760+
args_sens_spec = {
761+
k: args_scoring_func.arguments[k] for k in common_params
762+
}
763+
764764
if scoring_func.__name__ == "geometric_mean_score":
765-
if "average" in tags_scoring_func:
766-
if tags_scoring_func["average"] == "multiclass":
767-
tags_scoring_func["average"] = "macro"
768-
# We do not support multilabel so the only average supported
769-
# is binary
765+
if "average" in args_sens_spec:
766+
if args_sens_spec["average"] == "multiclass":
767+
args_sens_spec["average"] = "macro"
770768
elif (
771769
scoring_func.__name__ == "accuracy_score"
772770
or scoring_func.__name__ == "jaccard_score"
773771
):
774-
tags_scoring_func["average"] = "binary"
775-
# Create the list of parameters through signature binding
776-
tags_sens_spec = sens_spec_sig.bind(**tags_scoring_func)
777-
# Call the sens/spec function
778-
sen, spe, _ = sensitivity_specificity_support(
779-
*tags_sens_spec.args, **tags_sens_spec.kwargs
772+
# We do not support multilabel so the only average supported
773+
# is binary
774+
args_sens_spec["average"] = "binary"
775+
776+
sensitivity, specificity, _ = sensitivity_specificity_support(
777+
**args_sens_spec
780778
)
781-
# Compute the dominance
782-
dom = sen - spe
783-
return (1.0 + alpha * dom) * _score
779+
780+
dominance = sensitivity - specificity
781+
return (1.0 + alpha * dominance) * _score
784782

785783
return compute_score
786784

imblearn/over_sampling/_random_over_sampler.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from collections import Counter
88

99
import numpy as np
10-
from sklearn.utils import check_array
1110
from sklearn.utils import check_random_state
1211
from sklearn.utils import _safe_indexing
1312

@@ -75,10 +74,9 @@ def __init__(self, sampling_strategy="auto", random_state=None):
7574

7675
def _check_X_y(self, X, y):
7776
y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
78-
X = check_array(X, accept_sparse=["csr", "csc"], dtype=None,
79-
force_all_finite=False)
80-
y = check_array(
81-
y, accept_sparse=["csr", "csc"], dtype=None, ensure_2d=False
77+
X, y = self._validate_data(
78+
X, y, reset=True, accept_sparse=["csr", "csc"], dtype=None,
79+
force_all_finite=False,
8280
)
8381
return X, y, binarize_y
8482

imblearn/over_sampling/_smote.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -872,6 +872,8 @@ class SMOTENC(SMOTE):
872872
Resampled dataset samples per class Counter({0: 900, 1: 900})
873873
"""
874874

875+
_required_parameters = ["categorical_features"]
876+
875877
def __init__(
876878
self,
877879
categorical_features,

imblearn/pipeline.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -456,12 +456,8 @@ def make_pipeline(*steps, **kwargs):
456456
>>> from sklearn.preprocessing import StandardScaler
457457
>>> make_pipeline(StandardScaler(), GaussianNB(priors=None))
458458
... # doctest: +NORMALIZE_WHITESPACE
459-
Pipeline(memory=None,
460-
steps=[('standardscaler',
461-
StandardScaler(copy=True, with_mean=True, with_std=True)),
462-
('gaussiannb',
463-
GaussianNB(priors=None, var_smoothing=1e-09))],
464-
verbose=False)
459+
Pipeline(steps=[('standardscaler', StandardScaler()),
460+
('gaussiannb', GaussianNB())])
465461
"""
466462
memory = kwargs.pop("memory", None)
467463
verbose = kwargs.pop('verbose', False)

0 commit comments

Comments
 (0)