scikit-learn-contrib
diff --git a/‎azure-pipelines.yml
Lines changed: 1 addition & 1 deletion b/‎azure-pipelines.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎build_tools/azure/install.cmd
Lines changed: 1 addition & 1 deletion b/‎build_tools/azure/install.cmd
Lines changed: 1 addition & 1 deletion
diff --git a/‎build_tools/azure/install.sh
Lines changed: 4 additions & 4 deletions b/‎build_tools/azure/install.sh
Lines changed: 4 additions & 4 deletions
diff --git a/‎imblearn/base.py
Lines changed: 3 additions & 2 deletions b/‎imblearn/base.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎imblearn/ensemble/_bagging.py
Lines changed: 11 additions & 0 deletions b/‎imblearn/ensemble/_bagging.py
Lines changed: 11 additions & 0 deletions
diff --git a/‎imblearn/ensemble/_forest.py
Lines changed: 9 additions & 3 deletions b/‎imblearn/ensemble/_forest.py
Lines changed: 9 additions & 3 deletions
diff --git a/‎imblearn/metrics/_classification.py
Lines changed: 36 additions & 38 deletions b/‎imblearn/metrics/_classification.py
Lines changed: 36 additions & 38 deletions
diff --git a/‎imblearn/over_sampling/_random_over_sampler.py
Lines changed: 3 additions & 5 deletions b/‎imblearn/over_sampling/_random_over_sampler.py
Lines changed: 3 additions & 5 deletions
diff --git a/‎imblearn/over_sampling/_smote.py
Lines changed: 2 additions & 0 deletions b/‎imblearn/over_sampling/_smote.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎imblearn/pipeline.py
Lines changed: 2 additions & 6 deletions b/‎imblearn/pipeline.py
Lines changed: 2 additions & 6 deletions
@@ -72,7 +72,7 @@ jobs:
 - template: build_tools/azure/posix.yml
   parameters:
     name: macOS
-    vmImage: xcode9-macos10.13
+    vmImage: macOS-10.14
     dependsOn: [linting]
     matrix:
       pylatest_conda_mkl:
 
@@ -30,7 +30,7 @@ if "%COVERAGE%" == "true" (
 python --version
 pip --version
 
-pip install git+https://github.com/scikit-learn/scikit-learn.git
+pip install scikit-learn
 
 @rem Install the build and runtime dependencies of the project.
 python setup.py bdist_wheel bdist_wininst
 
@@ -32,7 +32,7 @@ if [[ "$DISTRIB" == "conda" ]]; then
     fi
 
     make_conda $TO_INSTALL
-    python -m pip install --pre -f https://sklearn-nightly.scdn8.secure.raxcdn.com scikit-learn
+    python -m pip install scikit-learn
 
     TO_INSTALL=""
 
@@ -75,22 +75,22 @@ elif [[ "$DISTRIB" == "ubuntu" ]]; then
     python3 -m virtualenv --system-site-packages --python=python3 $VIRTUALENV
     source $VIRTUALENV/bin/activate
     python -m pip install pytest==$PYTEST_VERSION pytest-cov joblib cython
-    python -m pip install git+https://github.com/scikit-learn/scikit-learn.git
+    python -m pip install scikit-learn
 elif [[ "$DISTRIB" == "ubuntu-32" ]]; then
     apt-get update
     apt-get install -y python3-dev python3-scipy libatlas3-base libatlas-base-dev python3-virtualenv git
     python3 -m virtualenv --system-site-packages --python=python3 $VIRTUALENV
     source $VIRTUALENV/bin/activate
     python -m pip install pytest==$PYTEST_VERSION pytest-cov joblib cython
-    python -m pip install git+https://github.com/scikit-learn/scikit-learn.git
+    python -m pip install scikit-learn
 elif [[ "$DISTRIB" == "conda-pip-latest" ]]; then
     # Since conda main channel usually lacks behind on the latest releases,
     # we use pypi to test against the latest releases of the dependencies.
     # conda is still used as a convenient way to install Python and pip.
     make_conda "python=$PYTHON_VERSION"
     python -m pip install -U pip
     python -m pip install numpy scipy joblib cython
-    python -m pip install git+https://github.com/scikit-learn/scikit-learn.git
+    python -m pip install scikit-learn
     python -m pip install pytest==$PYTEST_VERSION pytest-cov pytest-xdist
     python -m pip install pandas
 fi
 
@@ -10,7 +10,6 @@
 
 from sklearn.base import BaseEstimator
 from sklearn.preprocessing import label_binarize
-from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import check_classification_targets
 
 from .utils import check_sampling_strategy, check_target_type
@@ -131,7 +130,9 @@ def _check_X_y(self, X, y, accept_sparse=None):
         if accept_sparse is None:
             accept_sparse = ["csr", "csc"]
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
-        X, y = check_X_y(X, y, accept_sparse=accept_sparse)
+        X, y = self._validate_data(
+            X, y, reset=True, accept_sparse=accept_sparse
+        )
         return X, y, binarize_y
 
 
 
@@ -261,3 +261,14 @@ def fit(self, X, y):
         # RandomUnderSampler is not supporting sample_weight. We need to pass
         # None.
         return self._fit(X, y, self.max_samples, sample_weight=None)
+
+    def _more_tags(self):
+        tags = super()._more_tags()
+        tags_key = "_xfail_checks"
+        failing_test = "check_estimators_nan_inf"
+        reason = "Fails because the sampler removed infinity and NaN values"
+        if tags_key in tags:
+            tags[tags_key][failing_test] = reason
+        else:
+            tags[tags_key] = {failing_test: reason}
+        return tags
@@ -25,6 +25,7 @@
 from sklearn.utils import check_array
 from sklearn.utils import check_random_state
 from sklearn.utils import _safe_indexing
+from sklearn.utils.validation import _check_sample_weight
 
 from ..pipeline import make_pipeline
 from ..under_sampling import RandomUnderSampler
@@ -412,10 +413,15 @@ def fit(self, X, y, sample_weight=None):
         """
 
         # Validate or convert input data
-        X = check_array(X, accept_sparse="csc", dtype=DTYPE)
-        y = check_array(y, accept_sparse="csc", ensure_2d=False, dtype=None)
+        if issparse(y):
+            raise ValueError(
+                "sparse multilabel-indicator for y is not supported."
+            )
+        X, y = self._validate_data(X, y, multi_output=True,
+                                   accept_sparse="csc", dtype=DTYPE)
         if sample_weight is not None:
-            sample_weight = check_array(sample_weight, ensure_2d=False)
+            sample_weight = _check_sample_weight(sample_weight, X)
+
         if issparse(X):
             # Pre-sort indices to avoid that each individual tree of the
             # ensemble sorts the indices.
 
@@ -12,10 +12,8 @@
 #          Dariusz Brzezinski
 # License: MIT
 
-import warnings
 import functools
-
-from inspect import getcallargs
+import warnings
 
 import numpy as np
 import scipy as sp
@@ -731,56 +729,56 @@ def make_index_balanced_accuracy(alpha=0.1, squared=True):
     def decorate(scoring_func):
         @functools.wraps(scoring_func)
         def compute_score(*args, **kwargs):
-            # Create the list of tags
-            tags_scoring_func = getcallargs(scoring_func, *args, **kwargs)
+            signature_scoring_func = signature(scoring_func)
+            params_scoring_func = set(signature_scoring_func.parameters.keys())
+
             # check that the scoring function does not need a score
             # and only a prediction
-            if (
-                "y_score" in tags_scoring_func
-                or "y_prob" in tags_scoring_func
-                or "y2" in tags_scoring_func
-            ):
+            prohibitied_y_pred = set(["y_score", "y_prob", "y2"])
+            if prohibitied_y_pred.intersection(params_scoring_func):
                 raise AttributeError(
                     "The function {} has an unsupported"
                     " attribute. Metric with`y_pred` are the"
                     " only supported metrics is the only"
-                    " supported."
+                    " supported.".format(scoring_func.__name__)
                 )
-            # Compute the score from the scoring function
-            _score = scoring_func(*args, **kwargs)
-            # Square if desired
+
+            args_scoring_func = signature_scoring_func.bind(*args, **kwargs)
+            args_scoring_func.apply_defaults()
+            _score = scoring_func(
+                *args_scoring_func.args, **args_scoring_func.kwargs
+            )
             if squared:
                 _score = np.power(_score, 2)
-            # Get the signature of the sens/spec function
-            sens_spec_sig = signature(sensitivity_specificity_support)
-            # We need to extract from kwargs only the one needed by the
-            # specificity and specificity
-            params_sens_spec = set(sens_spec_sig._parameters.keys())
-            # Make the intersection between the parameters
-            sel_params = params_sens_spec.intersection(set(tags_scoring_func))
-            # Create a sub dictionary
-            tags_scoring_func = {k: tags_scoring_func[k] for k in sel_params}
-            # Check if the metric is the geometric mean
+
+            signature_sens_spec = signature(sensitivity_specificity_support)
+            params_sens_spec = set(signature_sens_spec.parameters.keys())
+            common_params = params_sens_spec.intersection(
+                set(args_scoring_func.arguments.keys())
+            )
+
+            args_sens_spec = {
+                k: args_scoring_func.arguments[k] for k in common_params
+            }
+
             if scoring_func.__name__ == "geometric_mean_score":
-                if "average" in tags_scoring_func:
-                    if tags_scoring_func["average"] == "multiclass":
-                        tags_scoring_func["average"] = "macro"
-            # We do not support multilabel so the only average supported
-            # is binary
+                if "average" in args_sens_spec:
+                    if args_sens_spec["average"] == "multiclass":
+                        args_sens_spec["average"] = "macro"
             elif (
                 scoring_func.__name__ == "accuracy_score"
                 or scoring_func.__name__ == "jaccard_score"
             ):
-                tags_scoring_func["average"] = "binary"
-            # Create the list of parameters through signature binding
-            tags_sens_spec = sens_spec_sig.bind(**tags_scoring_func)
-            # Call the sens/spec function
-            sen, spe, _ = sensitivity_specificity_support(
-                *tags_sens_spec.args, **tags_sens_spec.kwargs
+                # We do not support multilabel so the only average supported
+                # is binary
+                args_sens_spec["average"] = "binary"
+
+            sensitivity, specificity, _ = sensitivity_specificity_support(
+                **args_sens_spec
             )
-            # Compute the dominance
-            dom = sen - spe
-            return (1.0 + alpha * dom) * _score
+
+            dominance = sensitivity - specificity
+            return (1.0 + alpha * dominance) * _score
 
         return compute_score
 
 
@@ -7,7 +7,6 @@
 from collections import Counter
 
 import numpy as np
-from sklearn.utils import check_array
 from sklearn.utils import check_random_state
 from sklearn.utils import _safe_indexing
 
@@ -75,10 +74,9 @@ def __init__(self, sampling_strategy="auto", random_state=None):
 
     def _check_X_y(self, X, y):
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
-        X = check_array(X, accept_sparse=["csr", "csc"], dtype=None,
-                        force_all_finite=False)
-        y = check_array(
-            y, accept_sparse=["csr", "csc"], dtype=None, ensure_2d=False
+        X, y = self._validate_data(
+            X, y, reset=True, accept_sparse=["csr", "csc"], dtype=None,
+            force_all_finite=False,
         )
         return X, y, binarize_y
 
 
@@ -872,6 +872,8 @@ class SMOTENC(SMOTE):
     Resampled dataset samples per class Counter({0: 900, 1: 900})
     """
 
+    _required_parameters = ["categorical_features"]
+
     def __init__(
         self,
         categorical_features,
 
@@ -456,12 +456,8 @@ def make_pipeline(*steps, **kwargs):
     >>> from sklearn.preprocessing import StandardScaler
     >>> make_pipeline(StandardScaler(), GaussianNB(priors=None))
     ... # doctest: +NORMALIZE_WHITESPACE
-    Pipeline(memory=None,
-             steps=[('standardscaler',
-                     StandardScaler(copy=True, with_mean=True, with_std=True)),
-                    ('gaussiannb',
-                     GaussianNB(priors=None, var_smoothing=1e-09))],
-             verbose=False)
+    Pipeline(steps=[('standardscaler', StandardScaler()),
+                    ('gaussiannb', GaussianNB())])
     """
     memory = kwargs.pop("memory", None)
     verbose = kwargs.pop('verbose', False)