Skip to content

Commit fbeb485

Browse files
author
Guillaume Lemaitre
committed
chnage smote initialisation
1 parent 399f4a7 commit fbeb485

File tree

1 file changed

+9
-19
lines changed
  • unbalanced_dataset/over_sampling

1 file changed

+9
-19
lines changed

unbalanced_dataset/over_sampling/smote.py

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -149,14 +149,11 @@ def __init__(self, ratio='auto', random_state=None, verbose=True,
149149
else:
150150
raise ValueError('Unknown kind for SMOTE algorithm.')
151151

152-
# --- Verbose
153-
# Control whether or not status and progress information should be
154-
self.verbose = verbose
155-
156-
# --- Nearest Neighbours for synthetic samples
157-
# The smote algorithm uses the k-th nearest neighbours of a minority
158-
# sample to generate new synthetic samples.
159152
self.k = k
153+
self.m = m
154+
self.out_step = out_step
155+
self.verbose = verbose
156+
self.kwargs = kwargs
160157

161158
# --- NN object
162159
# Import the NN object from scikit-learn library. Since in the smote
@@ -166,22 +163,18 @@ def __init__(self, ratio='auto', random_state=None, verbose=True,
166163
# Regular smote does not look for samples in danger, instead it
167164
# creates synthetic samples directly from the k-th nearest
168165
# neighbours with not filtering
169-
self.nearest_neighbour = NearestNeighbors(n_neighbors=k + 1,
170-
n_jobs=self.n_jobs)
166+
self.nearest_neighbour = NearestNeighbors(n_neighbors=self.k + 1,
167+
n_jobs=self.n_jobs)
171168
else:
172169
# Borderline1, 2 and SVM variations of smote must first look for
173170
# samples that could be considered noise and samples that live
174171
# near the boundary between the classes. Therefore, before
175172
# creating synthetic samples from the k-th nns, it first look
176173
# for m nearest neighbors to decide whether or not a sample is
177174
# noise or near the boundary.
178-
self.nearest_neighbour = NearestNeighbors(n_neighbors=m + 1,
179-
n_jobs=self.n_jobs)
175+
self.nearest_neighbour = NearestNeighbors(n_neighbors=self.m + 1,
176+
n_jobs=self.n_jobs)
180177

181-
# --- Nearest Neighbours for noise and boundary (in danger)
182-
# Before creating synthetic samples we must first decide if
183-
# a given entry is noise or in danger. We use m nns in this step
184-
self.m = m
185178

186179
# --- SVM smote
187180
# Unlike the borderline variations, the SVM variation uses the support
@@ -191,11 +184,8 @@ def __init__(self, ratio='auto', random_state=None, verbose=True,
191184
# in danger (near the boundary). The level of extrapolation is
192185
# controled by the out_step.
193186
if kind == 'svm':
194-
# Store extrapolation size
195-
self.out_step = out_step
196-
197187
# Store SVM object with any parameters
198-
self.svm = SVC(random_state=self.random_state, **kwargs)
188+
self.svm = SVC(random_state=self.random_state, **self.kwargs)
199189

200190
def fit(self, X, y):
201191
"""Find the classes statistics before to perform sampling.

0 commit comments

Comments
 (0)