Skip to content

Commit 6fe0e41

Browse files
authored
MAINT deprecate random_state and make hash deterministic (#374)
1 parent e00e7f8 commit 6fe0e41

30 files changed

+241
-129
lines changed

doc/under_sampling.rst

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ samples. :class:`NearMiss` implements 3 different types of heuristic which can
112112
be selected with the parameter ``version``::
113113

114114
>>> from imblearn.under_sampling import NearMiss
115-
>>> nm1 = NearMiss(random_state=0, version=1)
115+
>>> nm1 = NearMiss(version=1)
116116
>>> X_resampled_nm1, y_resampled = nm1.fit_sample(X, y)
117117
>>> print(sorted(Counter(y_resampled).items()))
118118
[(0, 64), (1, 64), (2, 64)]
@@ -247,7 +247,7 @@ the sample inspected to keep it in the dataset::
247247
>>> sorted(Counter(y).items())
248248
[(0, 64), (1, 262), (2, 4674)]
249249
>>> from imblearn.under_sampling import EditedNearestNeighbours
250-
>>> enn = EditedNearestNeighbours(random_state=0)
250+
>>> enn = EditedNearestNeighbours()
251251
>>> X_resampled, y_resampled = enn.fit_sample(X, y)
252252
>>> print(sorted(Counter(y_resampled).items()))
253253
[(0, 64), (1, 213), (2, 4568)]
@@ -261,7 +261,7 @@ the decision to keep a given sample or not.
261261
Generally, repeating the algorithm will delete more data::
262262

263263
>>> from imblearn.under_sampling import RepeatedEditedNearestNeighbours
264-
>>> renn = RepeatedEditedNearestNeighbours(random_state=0)
264+
>>> renn = RepeatedEditedNearestNeighbours()
265265
>>> X_resampled, y_resampled = renn.fit_sample(X, y)
266266
>>> print(sorted(Counter(y_resampled).items()))
267267
[(0, 64), (1, 208), (2, 4551)]
@@ -271,7 +271,7 @@ Generally, repeating the algorithm will delete more data::
271271
internal nearest neighbors algorithm is increased at each iteration::
272272

273273
>>> from imblearn.under_sampling import AllKNN
274-
>>> allknn = AllKNN(random_state=0)
274+
>>> allknn = AllKNN()
275275
>>> X_resampled, y_resampled = allknn.fit_sample(X, y)
276276
>>> print(sorted(Counter(y_resampled).items()))
277277
[(0, 64), (1, 220), (2, 4601)]
@@ -338,7 +338,7 @@ between the :class:`EditedNearestNeighbours` and the output a 3 nearest
338338
neighbors classifier. The class can be used as::
339339

340340
>>> from imblearn.under_sampling import NeighbourhoodCleaningRule
341-
>>> ncr = NeighbourhoodCleaningRule(random_state=0)
341+
>>> ncr = NeighbourhoodCleaningRule()
342342
>>> X_resampled, y_resampled = ncr.fit_sample(X, y)
343343
>>> print(sorted(Counter(y_resampled).items()))
344344
[(0, 64), (1, 234), (2, 4666)]

examples/under-sampling/plot_illustration_tomek_links.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def make_plot_despine(ax):
6767
# samples. If ``ratio='auto'`` only the sample from the majority class will be
6868
# removed. If ``ratio='all'`` both samples will be removed.
6969

70-
sampler = TomekLinks(random_state=0)
70+
sampler = TomekLinks()
7171

7272
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
7373

imblearn/base.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,8 @@ class BaseSampler(SamplerMixin):
131131
instead.
132132
"""
133133

134-
def __init__(self, ratio='auto', random_state=None):
134+
def __init__(self, ratio='auto'):
135135
self.ratio = ratio
136-
self.random_state = random_state
137136
self.logger = logging.getLogger(__name__)
138137

139138
def fit(self, X, y):

imblearn/combine/smote_enn.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,7 @@ def _validate_estimator(self):
125125
' Got {} instead.'.format(type(self.enn)))
126126
# Otherwise create a default EditedNearestNeighbours
127127
else:
128-
self.enn_ = EditedNearestNeighbours(ratio='all',
129-
random_state=self.random_state)
128+
self.enn_ = EditedNearestNeighbours(ratio='all')
130129

131130
def fit(self, X, y):
132131
"""Find the classes statistics before to perform sampling.

imblearn/combine/smote_tomek.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,7 @@ def _validate_estimator(self):
134134
'Got {} instead.'.format(type(self.tomek)))
135135
# Otherwise create a default TomekLinks
136136
else:
137-
self.tomek_ = TomekLinks(ratio='all',
138-
random_state=self.random_state)
137+
self.tomek_ = TomekLinks(ratio='all')
139138

140139
def fit(self, X, y):
141140
"""Find the classes statistics before to perform sampling.

imblearn/ensemble/balance_cascade.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,8 @@ def __init__(self,
113113
random_state=None,
114114
n_max_subset=None,
115115
estimator=None):
116-
super(BalanceCascade, self).__init__(ratio=ratio,
117-
random_state=random_state)
116+
super(BalanceCascade, self).__init__(ratio=ratio)
117+
self.random_state = random_state
118118
self.return_indices = return_indices
119119
self.estimator = estimator
120120
self.n_max_subset = n_max_subset

imblearn/ensemble/easy_ensemble.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@ def __init__(self,
101101
random_state=None,
102102
replacement=False,
103103
n_subsets=10):
104-
super(EasyEnsemble, self).__init__(ratio=ratio,
105-
random_state=random_state)
104+
super(EasyEnsemble, self).__init__(ratio=ratio)
105+
self.random_state = random_state
106106
self.return_indices = return_indices
107107
self.replacement = replacement
108108
self.n_subsets = n_subsets

imblearn/over_sampling/adasyn.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ def __init__(self,
104104
random_state=None,
105105
n_neighbors=5,
106106
n_jobs=1):
107-
super(ADASYN, self).__init__(ratio=ratio, random_state=random_state)
107+
super(ADASYN, self).__init__(ratio=ratio)
108+
self.random_state = random_state
108109
self.n_neighbors = n_neighbors
109110
self.n_jobs = n_jobs
110111

imblearn/over_sampling/base.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
# Christos Aridas
66
# License: MIT
77

8-
from sklearn.utils import check_X_y
9-
108
from ..base import BaseSampler
119

1210

imblearn/over_sampling/random_over_sampler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ class RandomOverSampler(BaseOverSampler):
7676
"""
7777

7878
def __init__(self, ratio='auto', random_state=None):
79-
super(RandomOverSampler, self).__init__(
80-
ratio=ratio, random_state=random_state)
79+
super(RandomOverSampler, self).__init__(ratio=ratio)
80+
self.random_state = random_state
8181

8282
def _sample(self, X, y):
8383
"""Resample the dataset.

0 commit comments

Comments
 (0)