scikit-learn-contrib
diff --git a/‎doc/api.rst
Lines changed: 0 additions & 1 deletion b/‎doc/api.rst
Lines changed: 0 additions & 1 deletion
diff --git a/‎doc/combine.rst
Lines changed: 2 additions & 2 deletions b/‎doc/combine.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/ensemble.rst
Lines changed: 2 additions & 2 deletions b/‎doc/ensemble.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/introduction.rst
Lines changed: 2 additions & 6 deletions b/‎doc/introduction.rst
Lines changed: 2 additions & 6 deletions
diff --git a/‎doc/miscellaneous.rst
Lines changed: 1 addition & 1 deletion b/‎doc/miscellaneous.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/over_sampling.rst
Lines changed: 5 additions & 5 deletions b/‎doc/over_sampling.rst
Lines changed: 5 additions & 5 deletions
diff --git a/‎doc/under_sampling.rst
Lines changed: 12 additions & 12 deletions b/‎doc/under_sampling.rst
Lines changed: 12 additions & 12 deletions
diff --git a/‎doc/whats_new/v0.0.4.rst
Lines changed: 5 additions & 0 deletions b/‎doc/whats_new/v0.0.4.rst
Lines changed: 5 additions & 0 deletions
diff --git a/‎examples/applications/plot_over_sampling_benchmark_lfw.py
Lines changed: 1 addition & 1 deletion b/‎examples/applications/plot_over_sampling_benchmark_lfw.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/applications/porto_seguro_keras_under_sampling.py
Lines changed: 1 addition & 1 deletion b/‎examples/applications/porto_seguro_keras_under_sampling.py
Lines changed: 1 addition & 1 deletion
@@ -247,4 +247,3 @@ Imbalance-learn provides some fast-prototyping tools.
    utils.check_neighbors_object
    utils.check_ratio
    utils.check_sampling_strategy
-   utils.hash_X_y
@@ -33,12 +33,12 @@ to their former samplers::
   [(0, 64), (1, 262), (2, 4674)]
   >>> from imblearn.combine import SMOTEENN
   >>> smote_enn = SMOTEENN(random_state=0)
-  >>> X_resampled, y_resampled = smote_enn.fit_sample(X, y)
+  >>> X_resampled, y_resampled = smote_enn.fit_resample(X, y)
   >>> print(sorted(Counter(y_resampled).items()))
   [(0, 4060), (1, 4381), (2, 3502)]
   >>> from imblearn.combine import SMOTETomek
   >>> smote_tomek = SMOTETomek(random_state=0)
-  >>> X_resampled, y_resampled = smote_tomek.fit_sample(X, y)
+  >>> X_resampled, y_resampled = smote_tomek.fit_resample(X, y)
   >>> print(sorted(Counter(y_resampled).items()))
   [(0, 4499), (1, 4566), (2, 4413)]
 
 
@@ -33,7 +33,7 @@ under-sampling the original set::
   [(0, 64), (1, 262), (2, 4674)]
   >>> from imblearn.ensemble import EasyEnsemble
   >>> ee = EasyEnsemble(random_state=0, n_subsets=10)
-  >>> X_resampled, y_resampled = ee.fit_sample(X, y)
+  >>> X_resampled, y_resampled = ee.fit_resample(X, y)
   >>> print(X_resampled.shape)
   (10, 192, 2)
   >>> print(sorted(Counter(y_resampled[0]).items()))
@@ -55,7 +55,7 @@ parameter ``n_max_subset`` and an additional bootstraping can be activated with
   >>> bc = BalanceCascade(random_state=0,
   ...                     estimator=LogisticRegression(random_state=0),
   ...                     n_max_subset=4)
-  >>> X_resampled, y_resampled = bc.fit_sample(X, y)
+  >>> X_resampled, y_resampled = bc.fit_resample(X, y)
   >>> print(X_resampled.shape)
   (4, 192, 2)
   >>> print(sorted(Counter(y_resampled[0]).items()))
 
@@ -18,15 +18,11 @@ and adding a sampling functionality through the ``sample`` method:
 
       estimator = obj.fit(data, targets)
 
-:Sampler:
+:Resampler:
 
     To resample a data sets, each sampler implements::
 
-      data_resampled, targets_resampled = obj.sample(data, targets)
-
-    Fitting and sampling can also be done in one step::
-
-      data_resampled, targets_resampled = obj.fit_sample(data, targets)
+      data_resampled, targets_resampled = obj.fit_resample(data, targets)
 
 Imbalanced-learn samplers accept the same inputs that in scikit-learn:
 
 
@@ -28,7 +28,7 @@ to retain the 10 first elements of the array ``X`` and ``y``::
   >>> def func(X, y):
   ...   return X[:10], y[:10]
   >>> sampler = FunctionSampler(func=func)
-  >>> X_res, y_res = sampler.fit_sample(X, y)
+  >>> X_res, y_res = sampler.fit_resample(X, y)
   >>> np.all(X_res == X[:10])
   True
   >>> np.all(y_res == y[:10])
 
@@ -27,7 +27,7 @@ randomly sampling with replacement the current available samples. The
    ...                            class_sep=0.8, random_state=0)
    >>> from imblearn.over_sampling import RandomOverSampler
    >>> ros = RandomOverSampler(random_state=0)
-   >>> X_resampled, y_resampled = ros.fit_sample(X, y)
+   >>> X_resampled, y_resampled = ros.fit_resample(X, y)
    >>> from collections import Counter
    >>> print(sorted(Counter(y_resampled).items()))
    [(0, 4674), (1, 4674), (2, 4674)]
@@ -59,7 +59,7 @@ In addition, :class:`RandomOverSampler` allows to sample heterogeneous data
   >>> X_hetero = np.array([['xxx', 1, 1.0], ['yyy', 2, 2.0], ['zzz', 3, 3.0]],
   ...                     dtype=np.object)
   >>> y_hetero = np.array([0, 0, 1])
-  >>> X_resampled, y_resampled = ros.fit_sample(X_hetero, y_hetero)
+  >>> X_resampled, y_resampled = ros.fit_resample(X_hetero, y_hetero)
   >>> print(X_resampled)
   [['xxx' 1 1.0]
    ['yyy' 2 2.0]
@@ -82,11 +82,11 @@ to over-sample minority classes: (i) the Synthetic Minority Oversampling Techniq
 can be used in the same manner::
 
   >>> from imblearn.over_sampling import SMOTE, ADASYN
-  >>> X_resampled, y_resampled = SMOTE().fit_sample(X, y)
+  >>> X_resampled, y_resampled = SMOTE().fit_resample(X, y)
   >>> print(sorted(Counter(y_resampled).items()))
   [(0, 4674), (1, 4674), (2, 4674)]
   >>> clf_smote = LinearSVC().fit(X_resampled, y_resampled)
-  >>> X_resampled, y_resampled = ADASYN().fit_sample(X, y)
+  >>> X_resampled, y_resampled = ADASYN().fit_resample(X, y)
   >>> print(sorted(Counter(y_resampled).items()))
   [(0, 4673), (1, 4662), (2, 4674)]
   >>> clf_adasyn = LinearSVC().fit(X_resampled, y_resampled)
@@ -147,7 +147,7 @@ The :class:`BorderlineSMOTE` and :class:`SVMSMOTE` offer some variant of the SMO
 algorithm::
 
   >>> from imblearn.over_sampling import BorderlineSMOTE
-  >>> X_resampled, y_resampled = BorderlineSMOTE().fit_sample(X, y)
+  >>> X_resampled, y_resampled = BorderlineSMOTE().fit_resample(X, y)
   >>> print(sorted(Counter(y_resampled).items()))
   [(0, 4674), (1, 4674), (2, 4674)]
 
 
@@ -32,7 +32,7 @@ K-means method instead of the original samples::
   [(0, 64), (1, 262), (2, 4674)]
   >>> from imblearn.under_sampling import ClusterCentroids
   >>> cc = ClusterCentroids(random_state=0)
-  >>> X_resampled, y_resampled = cc.fit_sample(X, y)
+  >>> X_resampled, y_resampled = cc.fit_resample(X, y)
   >>> print(sorted(Counter(y_resampled).items()))
   [(0, 64), (1, 64), (2, 64)]
 
@@ -82,7 +82,7 @@ randomly selecting a subset of data for the targeted classes::
 
   >>> from imblearn.under_sampling import RandomUnderSampler
   >>> rus = RandomUnderSampler(random_state=0)
-  >>> X_resampled, y_resampled = rus.fit_sample(X, y)
+  >>> X_resampled, y_resampled = rus.fit_resample(X, y)
   >>> print(sorted(Counter(y_resampled).items()))
   [(0, 64), (1, 64), (2, 64)]
 
@@ -99,7 +99,7 @@ by considering independently each targeted class::
   >>> print(np.vstack({tuple(row) for row in X_resampled}).shape)
   (192, 2)
   >>> rus = RandomUnderSampler(random_state=0, replacement=True)
-  >>> X_resampled, y_resampled = rus.fit_sample(X, y)
+  >>> X_resampled, y_resampled = rus.fit_resample(X, y)
   >>> print(np.vstack({tuple(row) for row in X_resampled}).shape)
   (181, 2)
 
@@ -109,7 +109,7 @@ In addition, :class:`RandomUnderSampler` allows to sample heterogeneous data
   >>> X_hetero = np.array([['xxx', 1, 1.0], ['yyy', 2, 2.0], ['zzz', 3, 3.0]],
   ...                     dtype=np.object)
   >>> y_hetero = np.array([0, 0, 1])
-  >>> X_resampled, y_resampled = rus.fit_sample(X_hetero, y_hetero)
+  >>> X_resampled, y_resampled = rus.fit_resample(X_hetero, y_hetero)
   >>> print(X_resampled)
   [['xxx' 1 1.0]
    ['zzz' 3 3.0]]
@@ -126,7 +126,7 @@ be selected with the parameter ``version``::
 
   >>> from imblearn.under_sampling import NearMiss
   >>> nm1 = NearMiss(version=1)
-  >>> X_resampled_nm1, y_resampled = nm1.fit_sample(X, y)
+  >>> X_resampled_nm1, y_resampled = nm1.fit_resample(X, y)
   >>> print(sorted(Counter(y_resampled).items()))
   [(0, 64), (1, 64), (2, 64)]
 
@@ -261,7 +261,7 @@ the sample inspected to keep it in the dataset::
   [(0, 64), (1, 262), (2, 4674)]
   >>> from imblearn.under_sampling import EditedNearestNeighbours
   >>> enn = EditedNearestNeighbours()
-  >>> X_resampled, y_resampled = enn.fit_sample(X, y)
+  >>> X_resampled, y_resampled = enn.fit_resample(X, y)
   >>> print(sorted(Counter(y_resampled).items()))
   [(0, 64), (1, 213), (2, 4568)]
 
@@ -275,7 +275,7 @@ Generally, repeating the algorithm will delete more data::
 
    >>> from imblearn.under_sampling import RepeatedEditedNearestNeighbours
    >>> renn = RepeatedEditedNearestNeighbours()
-   >>> X_resampled, y_resampled = renn.fit_sample(X, y)
+   >>> X_resampled, y_resampled = renn.fit_resample(X, y)
    >>> print(sorted(Counter(y_resampled).items()))
    [(0, 64), (1, 208), (2, 4551)]
 
@@ -285,7 +285,7 @@ internal nearest neighbors algorithm is increased at each iteration::
 
   >>> from imblearn.under_sampling import AllKNN
   >>> allknn = AllKNN()
-  >>> X_resampled, y_resampled = allknn.fit_sample(X, y)
+  >>> X_resampled, y_resampled = allknn.fit_resample(X, y)
   >>> print(sorted(Counter(y_resampled).items()))
   [(0, 64), (1, 220), (2, 4601)]
 
@@ -323,7 +323,7 @@ The :class:`CondensedNearestNeighbour` can be used in the following manner::
 
   >>> from imblearn.under_sampling import CondensedNearestNeighbour
   >>> cnn = CondensedNearestNeighbour(random_state=0)
-  >>> X_resampled, y_resampled = cnn.fit_sample(X, y)
+  >>> X_resampled, y_resampled = cnn.fit_resample(X, y)
   >>> print(sorted(Counter(y_resampled).items()))
   [(0, 64), (1, 24), (2, 115)]
 
@@ -338,7 +338,7 @@ used as::
 
   >>> from imblearn.under_sampling import OneSidedSelection
   >>> oss = OneSidedSelection(random_state=0)
-  >>> X_resampled, y_resampled = oss.fit_sample(X, y)
+  >>> X_resampled, y_resampled = oss.fit_resample(X, y)
   >>> print(sorted(Counter(y_resampled).items()))
   [(0, 64), (1, 174), (2, 4403)]
 
@@ -352,7 +352,7 @@ neighbors classifier. The class can be used as::
 
   >>> from imblearn.under_sampling import NeighbourhoodCleaningRule
   >>> ncr = NeighbourhoodCleaningRule()
-  >>> X_resampled, y_resampled = ncr.fit_sample(X, y)
+  >>> X_resampled, y_resampled = ncr.fit_resample(X, y)
   >>> print(sorted(Counter(y_resampled).items()))
   [(0, 64), (1, 234), (2, 4666)]
 
@@ -380,7 +380,7 @@ removed. The class can be used as::
   >>> from imblearn.under_sampling import InstanceHardnessThreshold
   >>> iht = InstanceHardnessThreshold(random_state=0,
   ...                                 estimator=LogisticRegression())
-  >>> X_resampled, y_resampled = iht.fit_sample(X, y)
+  >>> X_resampled, y_resampled = iht.fit_resample(X, y)
   >>> print(sorted(Counter(y_resampled).items()))
   [(0, 64), (1, 64), (2, 64)]
 
 
@@ -18,6 +18,11 @@ API
 - Enable to use a ``list`` for the cleaning methods to specify the class to
   sample. :issue:`411` by :user:`Guillaume Lemaitre <glemaitre>`.
 
+- Replace ``fit_sample`` by ``fit_resample``. An alias is still available for
+  backward compatibility. In addition, ``sample`` has been removed to avoid
+  resampling on different set of data.
+  :issue:`462` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 New features
 ............
 
 
@@ -39,7 +39,7 @@ def sample(self, X, y):
     def fit(self, X, y):
         return self
 
-    def fit_sample(self, X, y):
+    def fit_resample(self, X, y):
         return self.sample(X, y)
 
 
 
@@ -49,7 +49,7 @@
 ###############################################################################
 
 from sklearn.compose import ColumnTransformer
-from sklearn.pipeline import Pipeline, make_pipeline
+from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import OneHotEncoder
 from sklearn.preprocessing import StandardScaler
 from sklearn.preprocessing import FunctionTransformer