Skip to content

Commit ed88516

Browse files
author
Guillaume Lemaitre
committed
Modify verbose for logging messages
1 parent 6482830 commit ed88516

33 files changed

+133
-278
lines changed

imblearn/base.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class SamplerMixin(six.with_metaclass(ABCMeta, BaseEstimator)):
2929

3030
_estimator_type = "sampler"
3131

32-
def __init__(self, ratio='auto', verbose=True):
32+
def __init__(self, ratio='auto'):
3333
"""Initialize this object and its instance variables.
3434
3535
Parameters
@@ -40,20 +40,13 @@ def __init__(self, ratio='auto', verbose=True):
4040
of samples in the minority class over the the number of samples
4141
in the majority class.
4242
43-
random_state : int or None, optional (default=None)
44-
Seed for random number generation.
45-
46-
verbose : bool, optional (default=True)
47-
Boolean to either or not print information about the processing
48-
4943
Returns
5044
-------
5145
None
5246
5347
"""
5448

5549
self.ratio = ratio
56-
self.verbose = verbose
5750
self.logger = logging.getLogger(__name__)
5851

5952
def fit(self, X, y):
@@ -85,8 +78,7 @@ def fit(self, X, y):
8578
if hasattr(self, 'ratio'):
8679
self._validate_ratio()
8780

88-
if self.verbose:
89-
print("Determining classes statistics... ", end="")
81+
self.logger.info('Compute classes statistics ...')
9082

9183
# Get all the unique elements in the target array
9284
uniques = np.unique(y)
@@ -110,9 +102,8 @@ def fit(self, X, y):
110102
self.min_c_ = min(self.stats_c_, key=self.stats_c_.get)
111103
self.maj_c_ = max(self.stats_c_, key=self.stats_c_.get)
112104

113-
if self.verbose:
114-
print('{} classes detected: {}'.format(uniques.size,
115-
self.stats_c_))
105+
self.logger.info('{} classes detected: {}'.format(uniques.size,
106+
self.stats_c_))
116107

117108
# Check if the ratio provided at initialisation make sense
118109
if isinstance(self.ratio, float):

imblearn/combine/smote_enn.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,6 @@ class SMOTEENN(SamplerMixin):
2626
If None, the random number generator is the RandomState instance used
2727
by np.random.
2828
29-
verbose : bool, optional (default=True)
30-
Whether or not to print information about the processing.
31-
3229
k : int, optional (default=5)
3330
Number of nearest neighbours to used to construct synthetic
3431
samples.
@@ -88,12 +85,11 @@ class SMOTEENN(SamplerMixin):
8885
8986
"""
9087

91-
def __init__(self, ratio='auto', random_state=None, verbose=True,
88+
def __init__(self, ratio='auto', random_state=None,
9289
k=5, m=10, out_step=0.5, kind_smote='regular',
9390
size_ngh=3, kind_enn='all', n_jobs=-1, **kwargs):
9491

95-
super(SMOTEENN, self).__init__(ratio=ratio,
96-
verbose=verbose)
92+
super(SMOTEENN, self).__init__(ratio=ratio)
9793
self.random_state = random_state
9894
self.k = k
9995
self.m = m
@@ -104,11 +100,10 @@ def __init__(self, ratio='auto', random_state=None, verbose=True,
104100
self.n_jobs = n_jobs
105101
self.kwargs = kwargs
106102
self.sm = SMOTE(ratio=self.ratio, random_state=self.random_state,
107-
verbose=self.verbose, k=self.k, m=self.m,
108-
out_step=self.out_step, kind=self.kind_smote,
109-
n_jobs=self.n_jobs, **self.kwargs)
103+
k=self.k, m=self.m, out_step=self.out_step,
104+
kind=self.kind_smote, n_jobs=self.n_jobs,
105+
**self.kwargs)
110106
self.enn = EditedNearestNeighbours(random_state=self.random_state,
111-
verbose=self.verbose,
112107
size_ngh=self.size_ngh,
113108
kind_sel=self.kind_enn,
114109
n_jobs=self.n_jobs)

imblearn/combine/smote_tomek.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,6 @@ class SMOTETomek(SamplerMixin):
2727
If None, the random number generator is the RandomState instance used
2828
by np.random.
2929
30-
verbose : bool, optional (default=True)
31-
Whether or not to print information about the processing.
32-
3330
k : int, optional (default=5)
3431
Number of nearest neighbours to used to construct synthetic
3532
samples.
@@ -88,10 +85,10 @@ class SMOTETomek(SamplerMixin):
8885
8986
"""
9087

91-
def __init__(self, ratio='auto', random_state=None, verbose=True,
88+
def __init__(self, ratio='auto', random_state=None,
9289
k=5, m=10, out_step=0.5, kind_smote='regular',
9390
n_jobs=-1, **kwargs):
94-
super(SMOTETomek, self).__init__(ratio=ratio, verbose=verbose)
91+
super(SMOTETomek, self).__init__(ratio=ratio)
9592
self.random_state = random_state
9693
self.k = k
9794
self.m = m
@@ -100,11 +97,10 @@ def __init__(self, ratio='auto', random_state=None, verbose=True,
10097
self.n_jobs = n_jobs
10198
self.kwargs = kwargs
10299
self.sm = SMOTE(ratio=self.ratio, random_state=self.random_state,
103-
verbose=self.verbose, k=self.k, m=self.m,
104-
out_step=self.out_step, kind=self.kind_smote,
105-
n_jobs=self.n_jobs, **self.kwargs)
106-
self.tomek = TomekLinks(random_state=self.random_state,
107-
verbose=self.verbose)
100+
k=self.k, m=self.m, out_step=self.out_step,
101+
kind=self.kind_smote, n_jobs=self.n_jobs,
102+
**self.kwargs)
103+
self.tomek = TomekLinks(random_state=self.random_state)
108104

109105
def fit(self, X, y):
110106
"""Find the classes statistics before to perform sampling.

imblearn/ensemble/balance_cascade.py

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,6 @@ class BalanceCascade(SamplerMixin):
3737
If None, the random number generator is the RandomState instance used
3838
by np.random.
3939
40-
verbose : bool, optional (default=True)
41-
Whether or not to print information about the processing.
42-
4340
n_max_subset : int or None, optional (default=None)
4441
Maximum number of subsets to generate. By default, all data from
4542
the training will be selected that could lead to a large number of
@@ -87,10 +84,9 @@ class BalanceCascade(SamplerMixin):
8784
8885
"""
8986
def __init__(self, ratio='auto', return_indices=False, random_state=None,
90-
verbose=True, n_max_subset=None, classifier='knn',
91-
bootstrap=True, **kwargs):
92-
super(BalanceCascade, self).__init__(ratio=ratio,
93-
verbose=verbose)
87+
n_max_subset=None, classifier='knn', bootstrap=True,
88+
**kwargs):
89+
super(BalanceCascade, self).__init__(ratio=ratio)
9490
self.return_indices = return_indices
9591
self.random_state = random_state
9692
self.classifier = classifier
@@ -242,17 +238,17 @@ def _sample(self, X, y):
242238
# Find the misclassified index to keep them for the next round
243239
idx_mis_class = idx_sel_from_maj[np.nonzero(pred_label !=
244240
N_y[idx_sel_from_maj])]
245-
if self.verbose:
246-
print("Elements misclassified: {}".format(idx_mis_class))
241+
self.logger.debug('Elements misclassified: {}'.format(
242+
idx_mis_class))
243+
247244
# Count how many random element will be selected
248245
if self.ratio == 'auto':
249246
num_samples = self.stats_c_[self.min_c_]
250247
else:
251248
num_samples = int(self.stats_c_[self.min_c_] / self.ratio)
252249
num_samples -= idx_mis_class.size
253250

254-
if self.verbose:
255-
print("Creation of the subset #{}".format(n_subsets))
251+
self.logger.debug('Creation of the subset #{}'.format(n_subsets))
256252

257253
# We found a new subset, increase the counter
258254
n_subsets += 1
@@ -279,13 +275,14 @@ def _sample(self, X, y):
279275
idx_sel_from_maj),
280276
axis=0))
281277

282-
if self.verbose:
283-
print("Creation of the subset #" + str(n_subsets))
278+
self.logger.debug('Creation of the subset #{}'.format(
279+
n_subsets))
280+
281+
# We found a new subset, increase the counter
282+
n_subsets += 1
284283

285-
# We found a new subset, increase the counter
286-
n_subsets += 1
287-
if self.verbose:
288-
print('The number of subset achieved their maximum')
284+
self.logger.debug('The number of subset reached is'
285+
' maximum.')
289286

290287
# Also check that we will have enough sample to extract at the
291288
# next round
@@ -307,14 +304,14 @@ def _sample(self, X, y):
307304
idx_under.append(np.concatenate((idx_min,
308305
idx_sel_from_maj),
309306
axis=0))
310-
if self.verbose:
311-
print("Creation of the subset #" + str(n_subsets))
307+
self.logger.debug('Creation of the subset #{}'.format(
308+
n_subsets))
312309

313310
# We found a new subset, increase the counter
314311
n_subsets += 1
315312

316-
if self.verbose:
317-
print('Not enough samples to continue creating subsets')
313+
self.logger.debug('Not enough samples to continue creating'
314+
' subsets.')
318315

319316
if self.return_indices:
320317
return (np.array(X_resampled), np.array(y_resampled),

imblearn/ensemble/easy_ensemble.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,6 @@ class EasyEnsemble(SamplerMixin):
3131
If None, the random number generator is the RandomState instance used
3232
by np.random.
3333
34-
verbose : bool, optional (default=True)
35-
Whether or not to print information about the processing.
36-
3734
replacement : bool, optional (default=False)
3835
Whether or not to sample randomly with replacement or not.
3936
@@ -68,10 +65,9 @@ class EasyEnsemble(SamplerMixin):
6865
6966
"""
7067

71-
def __init__(self, ratio='auto', return_indices=False, verbose=True,
68+
def __init__(self, ratio='auto', return_indices=False,
7269
random_state=None, replacement=False, n_subsets=10):
73-
super(EasyEnsemble, self).__init__(ratio=ratio,
74-
verbose=verbose)
70+
super(EasyEnsemble, self).__init__(ratio=ratio)
7571
self.return_indices = return_indices
7672
self.random_state = random_state
7773
self.replacement = replacement
@@ -108,14 +104,12 @@ def _sample(self, X, y):
108104
idx_under = []
109105

110106
for s in range(self.n_subsets):
111-
if self.verbose:
112-
print("Creation of the set #{}".format(s))
107+
self.logger.debug('Creation of the set #{}'.format(s))
113108

114109
# Create the object for random under-sampling
115110
rus = RandomUnderSampler(ratio=self.ratio,
116111
return_indices=self.return_indices,
117112
random_state=self.random_state,
118-
verbose=self.verbose,
119113
replacement=self.replacement)
120114
if self.return_indices:
121115
sel_x, sel_y, sel_idx = rus.fit_sample(X, y)

imblearn/ensemble/tests/test_balance_cascade.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,12 @@ def test_bc_init():
5757

5858
# Define a ratio
5959
ratio = 1.
60-
verbose = True
61-
bc = BalanceCascade(ratio=ratio, random_state=RND_SEED, verbose=verbose)
60+
bc = BalanceCascade(ratio=ratio, random_state=RND_SEED)
6261

6362
assert_equal(bc.ratio, ratio)
6463
assert_equal(bc.bootstrap, True)
6564
assert_equal(bc.n_max_subset, None)
6665
assert_equal(bc.random_state, RND_SEED)
67-
assert_equal(bc.verbose, verbose)
6866

6967

7068
def test_bc_fit_single_class():

imblearn/ensemble/tests/test_easy_ensemble.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,12 @@ def test_ee_init():
5757

5858
# Define a ratio
5959
ratio = 1.
60-
verbose = True
61-
ee = EasyEnsemble(ratio=ratio, random_state=RND_SEED, verbose=verbose)
60+
ee = EasyEnsemble(ratio=ratio, random_state=RND_SEED)
6261

6362
assert_equal(ee.ratio, ratio)
6463
assert_equal(ee.replacement, False)
6564
assert_equal(ee.n_subsets, 10)
6665
assert_equal(ee.random_state, RND_SEED)
67-
assert_equal(ee.verbose, verbose)
6866

6967

7068
def test_ee_fit_single_class():

imblearn/over_sampling/adasyn.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,6 @@ class ADASYN(SamplerMixin):
3333
If None, the random number generator is the RandomState instance used
3434
by np.random.
3535
36-
verbose : bool, optional (default=True)
37-
Whether or not to print information about the processing.
38-
3936
k : int, optional (default=5)
4037
Number of nearest neighbours to used to construct synthetic samples.
4138
@@ -75,11 +72,9 @@ class ADASYN(SamplerMixin):
7572
def __init__(self,
7673
ratio='auto',
7774
random_state=None,
78-
verbose=True,
7975
k=5,
8076
n_jobs=1):
81-
super(ADASYN, self).__init__(ratio=ratio,
82-
verbose=verbose)
77+
super(ADASYN, self).__init__(ratio=ratio)
8378
self.random_state = random_state
8479
self.k = k
8580
self.n_jobs = n_jobs
@@ -125,8 +120,8 @@ def _sample(self, X, y):
125120
X_min = X[y == self.min_c_]
126121

127122
# Print if verbose is true
128-
if self.verbose:
129-
print('Finding the {} nearest neighbours...'.format(self.k))
123+
self.logger.debug('Finding the {} nearest neighbours...'.format(
124+
self.k))
130125

131126
# Look for k-th nearest neighbours, excluding, of course, the
132127
# point itself.
@@ -156,7 +151,7 @@ def _sample(self, X, y):
156151
X_resampled = np.vstack((X_resampled, x_gen))
157152
y_resampled = np.hstack((y_resampled, self.min_c_))
158153

159-
if self.verbose:
160-
print("Over-sampling performed: {}".format(Counter(y_resampled)))
154+
self.logger.info('Over-sampling performed: {}'.format(Counter(
155+
y_resampled)))
161156

162157
return X_resampled, y_resampled

imblearn/over_sampling/random_over_sampler.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,6 @@ class RandomOverSampler(SamplerMixin):
2626
of samples in the minority class over the the number of samples
2727
in the majority class.
2828
29-
verbose : bool, optional (default=True)
30-
Whether or not to print information about the processing.
31-
3229
random_state : int, RandomState instance or None, optional (default=None)
3330
If int, random_state is the seed used by the random number generator;
3431
If RandomState instance, random_state is the random number generator;
@@ -57,11 +54,9 @@ class RandomOverSampler(SamplerMixin):
5754

5855
def __init__(self,
5956
ratio='auto',
60-
verbose=True,
6157
random_state=None):
6258

63-
super(RandomOverSampler, self).__init__(ratio=ratio,
64-
verbose=verbose)
59+
super(RandomOverSampler, self).__init__(ratio=ratio)
6560
self.random_state = random_state
6661

6762
def _sample(self, X, y):
@@ -119,7 +114,7 @@ def _sample(self, X, y):
119114
y[y == key],
120115
y[y == key][indx]), axis=0)
121116

122-
if self.verbose:
123-
print("Over-sampling performed: {}".format(Counter(y_resampled)))
117+
self.logger.info('Over-sampling performed: {}'.format(Counter(
118+
y_resampled)))
124119

125120
return X_resampled, y_resampled

0 commit comments

Comments
 (0)