Skip to content

Commit fa91ee1

Browse files
author
Guillaume Lemaitre
committed
Finish the combine method
1 parent 6a3c5de commit fa91ee1

10 files changed

+34
-148
lines changed

imblearn/combine/smote_enn.py

Lines changed: 10 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,11 @@ class SMOTEENN(SamplerMixin):
2222
number of samples in the minority class over the the number of
2323
samples in the majority class.
2424
25-
random_state : int or None, optional (default=None)
26-
Seed for random number generation.
25+
random_state : int, RandomState instance or None, optional (default=None)
26+
If int, random_state is the seed used by the random number generator;
27+
If RandomState instance, random_state is the random number generator;
28+
If None, the random number generator is the RandomState instance used
29+
by np.random.
2730
2831
verbose : bool, optional (default=True)
2932
Whether or not to print information about the processing.
@@ -60,15 +63,6 @@ class SMOTEENN(SamplerMixin):
6063
6164
Attributes
6265
----------
63-
ratio : str or float
64-
If 'auto', the ratio will be defined automatically to balance
65-
the dataset. Otherwise, the ratio is defined as the
66-
number of samples in the minority class over the the number of
67-
samples in the majority class.
68-
69-
random_state : int or None
70-
Seed for random number generation.
71-
7266
min_c_ : str or int
7367
The identifier of the minority class.
7468
@@ -100,75 +94,21 @@ def __init__(self, ratio='auto', random_state=None, verbose=True,
10094
k=5, m=10, out_step=0.5, kind_smote='regular',
10195
size_ngh=3, kind_enn='all', n_jobs=-1, **kwargs):
10296

103-
"""Initialise the SMOTE ENN object.
104-
105-
Parameters
106-
----------
107-
ratio : str or float, optional (default='auto')
108-
If 'auto', the ratio will be defined automatically to balance
109-
the dataset. Otherwise, the ratio is defined as the
110-
number of samples in the minority class over the the number of
111-
samples in the majority class.
112-
113-
random_state : int or None, optional (default=None)
114-
Seed for random number generation.
115-
116-
verbose : bool, optional (default=True)
117-
Whether or not to print information about the processing.
118-
119-
k : int, optional (default=5)
120-
Number of nearest neighbours to used to construct synthetic
121-
samples.
122-
123-
m : int, optional (default=10)
124-
Number of nearest neighbours to use to determine if a minority
125-
sample is in danger.
126-
127-
out_step : float, optional (default=0.5)
128-
Step size when extrapolating.
129-
130-
kind_smote : str, optional (default='regular')
131-
The type of SMOTE algorithm to use one of the following
132-
options: 'regular', 'borderline1', 'borderline2', 'svm'.
133-
134-
size_ngh : int, optional (default=3)
135-
Size of the neighbourhood to consider to compute the average
136-
distance to the minority point samples.
137-
138-
kind_sel : str, optional (default='all')
139-
Strategy to use in order to exclude samples.
140-
141-
- If 'all', all neighbours will have to agree with the samples of
142-
interest to not be excluded.
143-
- If 'mode', the majority vote of the neighbours will be used in
144-
order to exclude a sample.
145-
146-
n_jobs : int, optional (default=-1)
147-
The number of threads to open if possible.
148-
149-
Returns
150-
-------
151-
None
152-
153-
"""
154-
super(SMOTEENN, self).__init__(ratio=ratio, random_state=random_state,
97+
super(SMOTEENN, self).__init__(ratio=ratio,
15598
verbose=verbose)
156-
99+
self.random_state = random_state
157100
self.k = k
158101
self.m = m
159102
self.out_step = out_step
160103
self.kind_smote = kind_smote
104+
self.size_ngh = size_ngh
105+
self.kind_enn = kind_enn
161106
self.n_jobs = n_jobs
162107
self.kwargs = kwargs
163-
164108
self.sm = SMOTE(ratio=self.ratio, random_state=self.random_state,
165109
verbose=self.verbose, k=self.k, m=self.m,
166110
out_step=self.out_step, kind=self.kind_smote,
167111
n_jobs=self.n_jobs, **self.kwargs)
168-
169-
self.size_ngh = size_ngh
170-
self.kind_enn = kind_enn
171-
172112
self.enn = EditedNearestNeighbours(random_state=self.random_state,
173113
verbose=self.verbose,
174114
size_ngh=self.size_ngh,
@@ -192,8 +132,6 @@ def fit(self, X, y):
192132
Return self.
193133
194134
"""
195-
# Check the consistency of X and y
196-
X, y = check_X_y(X, y)
197135

198136
super(SMOTEENN, self).fit(X, y)
199137

@@ -202,7 +140,7 @@ def fit(self, X, y):
202140

203141
return self
204142

205-
def sample(self, X, y):
143+
def _sample(self, X, y):
206144
"""Resample the dataset.
207145
208146
Parameters
@@ -222,10 +160,6 @@ def sample(self, X, y):
222160
The corresponding label of `X_resampled`
223161
224162
"""
225-
# Check the consistency of X and y
226-
X, y = check_X_y(X, y)
227-
228-
super(SMOTEENN, self).sample(X, y)
229163

230164
# Transform using SMOTE
231165
X, y = self.sm.sample(X, y)

imblearn/combine/smote_tomek.py

Lines changed: 8 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,11 @@ class SMOTETomek(SamplerMixin):
2323
number of samples in the minority class over the the number of
2424
samples in the majority class.
2525
26-
random_state : int or None, optional (default=None)
27-
Seed for random number generation.
26+
random_state : int, RandomState instance or None, optional (default=None)
27+
If int, random_state is the seed used by the random number generator;
28+
If RandomState instance, random_state is the random number generator;
29+
If None, the random number generator is the RandomState instance used
30+
by np.random.
2831
2932
verbose : bool, optional (default=True)
3033
Whether or not to print information about the processing.
@@ -61,15 +64,6 @@ class SMOTETomek(SamplerMixin):
6164
6265
Attributes
6366
----------
64-
ratio : str or float
65-
If 'auto', the ratio will be defined automatically to balance
66-
the dataset. Otherwise, the ratio is defined as the
67-
number of samples in the minority class over the the number of
68-
samples in the majority class.
69-
70-
random_state : int or None
71-
Seed for random number generation.
72-
7367
min_c_ : str or int
7468
The identifier of the minority class.
7569
@@ -99,62 +93,18 @@ class SMOTETomek(SamplerMixin):
9993
def __init__(self, ratio='auto', random_state=None, verbose=True,
10094
k=5, m=10, out_step=0.5, kind_smote='regular',
10195
n_jobs=-1, **kwargs):
102-
103-
"""Initialise the SMOTE Tomek links object.
104-
105-
Parameters
106-
----------
107-
ratio : str or float, optional (default='auto')
108-
If 'auto', the ratio will be defined automatically to balance
109-
the dataset. Otherwise, the ratio is defined as the
110-
number of samples in the minority class over the the number of
111-
samples in the majority class.
112-
113-
random_state : int or None, optional (default=None)
114-
Seed for random number generation.
115-
116-
verbose : bool, optional (default=True)
117-
Whether or not to print information about the processing.
118-
119-
k : int, optional (default=5)
120-
Number of nearest neighbours to used to construct synthetic
121-
samples.
122-
123-
m : int, optional (default=10)
124-
Number of nearest neighbours to use to determine if a minority
125-
sample is in danger.
126-
127-
out_step : float, optional (default=0.5)
128-
Step size when extrapolating.
129-
130-
kind_smote : str, optional (default='regular')
131-
The type of SMOTE algorithm to use one of the following
132-
options: 'regular', 'borderline1', 'borderline2', 'svm'.
133-
134-
n_jobs : int, optional (default=-1)
135-
Number of threads to run the algorithm when it is possible.
136-
137-
Returns
138-
-------
139-
None
140-
141-
"""
142-
super(SMOTETomek, self).__init__(ratio=ratio,
143-
random_state=random_state,
144-
verbose=verbose)
145-
96+
super(SMOTETomek, self).__init__(ratio=ratio, verbose=verbose)
97+
self.random_state = random_state
14698
self.k = k
14799
self.m = m
148100
self.out_step = out_step
149101
self.kind_smote = kind_smote
150102
self.n_jobs = n_jobs
151103
self.kwargs = kwargs
152-
153104
self.sm = SMOTE(ratio=self.ratio, random_state=self.random_state,
154105
verbose=self.verbose, k=self.k, m=self.m,
155106
out_step=self.out_step, kind=self.kind_smote,
156107
n_jobs=self.n_jobs, **self.kwargs)
157-
158108
self.tomek = TomekLinks(random_state=self.random_state,
159109
verbose=self.verbose)
160110

@@ -175,8 +125,6 @@ def fit(self, X, y):
175125
Return self.
176126
177127
"""
178-
# Check the consistency of X and y
179-
X, y = check_X_y(X, y)
180128

181129
super(SMOTETomek, self).fit(X, y)
182130

@@ -185,7 +133,7 @@ def fit(self, X, y):
185133

186134
return self
187135

188-
def sample(self, X, y):
136+
def _sample(self, X, y):
189137
"""Resample the dataset.
190138
191139
Parameters
@@ -205,10 +153,6 @@ def sample(self, X, y):
205153
The corresponding label of `X_resampled`
206154
207155
"""
208-
# Check the consistency of X and y
209-
X, y = check_X_y(X, y)
210-
211-
super(SMOTETomek, self).sample(X, y)
212156

213157
# Transform using SMOTE
214158
X, y = self.sm.sample(X, y)
960 Bytes
Binary file not shown.
480 Bytes
Binary file not shown.
48 Bytes
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

imblearn/combine/tests/test_smote_enn.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,23 @@ def test_senn_bad_ratio():
3333

3434
# Define a negative ratio
3535
ratio = -1.0
36-
assert_raises(ValueError, SMOTEENN, ratio=ratio)
36+
smote = SMOTEENN(ratio=ratio)
37+
assert_raises(ValueError, smote.fit, X, Y)
3738

3839
# Define a ratio greater than 1
3940
ratio = 100.0
40-
assert_raises(ValueError, SMOTEENN, ratio=ratio)
41+
smote = SMOTEENN(ratio=ratio)
42+
assert_raises(ValueError, smote.fit, X, Y)
4143

4244
# Define ratio as an unknown string
4345
ratio = 'rnd'
44-
assert_raises(ValueError, SMOTEENN, ratio=ratio)
46+
smote = SMOTEENN(ratio=ratio)
47+
assert_raises(ValueError, smote.fit, X, Y)
4548

4649
# Define ratio as a list which is not supported
4750
ratio = [.5, .5]
48-
assert_raises(ValueError, SMOTEENN, ratio=ratio)
51+
smote = SMOTEENN(ratio=ratio)
52+
assert_raises(ValueError, smote.fit, X, Y)
4953

5054

5155
def test_smote_fit_single_class():

imblearn/combine/tests/test_smote_tomek.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,23 @@ def test_smote_bad_ratio():
3333

3434
# Define a negative ratio
3535
ratio = -1.0
36-
assert_raises(ValueError, SMOTETomek, ratio=ratio)
36+
smote = SMOTETomek(ratio=ratio)
37+
assert_raises(ValueError, smote.fit, X, Y)
3738

3839
# Define a ratio greater than 1
3940
ratio = 100.0
40-
assert_raises(ValueError, SMOTETomek, ratio=ratio)
41+
smote = SMOTETomek(ratio=ratio)
42+
assert_raises(ValueError, smote.fit, X, Y)
4143

4244
# Define ratio as an unknown string
4345
ratio = 'rnd'
44-
assert_raises(ValueError, SMOTETomek, ratio=ratio)
46+
smote = SMOTETomek(ratio=ratio)
47+
assert_raises(ValueError, smote.fit, X, Y)
4548

4649
# Define ratio as a list which is not supported
4750
ratio = [.5, .5]
48-
assert_raises(ValueError, SMOTETomek, ratio=ratio)
51+
smote = SMOTETomek(ratio=ratio)
52+
assert_raises(ValueError, smote.fit, X, Y)
4953

5054

5155
def test_smote_fit_single_class():

0 commit comments

Comments
 (0)