@@ -68,7 +68,7 @@ def _validate_estimator(self):
68
68
)
69
69
70
70
def _make_samples (
71
- self , X , y_dtype , y_type , nn_data , nn_num , n_samples , step_size = 1.0
71
+ self , X , y_dtype , y_type , nn_data , nn_num , n_samples , step_size = 1.0 , y = None
72
72
):
73
73
"""A support function that returns artificial samples constructed along
74
74
the line connecting nearest neighbours.
@@ -98,6 +98,10 @@ def _make_samples(
98
98
step_size : float, default=1.0
99
99
The step size to create samples.
100
100
101
+ y : ndarray of shape (n_samples_all,), default=None
102
+ The true target associated with `nn_data`. Used by Borderline SMOTE-2 to
103
+ weight the distances in the sample generation process.
104
+
101
105
Returns
102
106
-------
103
107
X_new : {ndarray, sparse matrix} of shape (n_samples_new, n_features)
@@ -114,11 +118,13 @@ def _make_samples(
114
118
rows = np .floor_divide (samples_indices , nn_num .shape [1 ])
115
119
cols = np .mod (samples_indices , nn_num .shape [1 ])
116
120
117
- X_new = self ._generate_samples (X , nn_data , nn_num , rows , cols , steps , y_type )
121
+ X_new = self ._generate_samples (X , nn_data , nn_num , rows , cols , steps , y_type , y )
118
122
y_new = np .full (n_samples , fill_value = y_type , dtype = y_dtype )
119
123
return X_new , y_new
120
124
121
- def _generate_samples (self , X , nn_data , nn_num , rows , cols , steps , y_type = None ):
125
+ def _generate_samples (
126
+ self , X , nn_data , nn_num , rows , cols , steps , y_type = None , y = None
127
+ ):
122
128
r"""Generate a synthetic sample.
123
129
124
130
The rule for the generation is:
@@ -153,15 +159,26 @@ def _generate_samples(self, X, nn_data, nn_num, rows, cols, steps, y_type=None):
153
159
steps : ndarray of shape (n_samples,), dtype=float
154
160
Step sizes for new samples.
155
161
156
- y_type : None
157
- Unused parameter. Only for compatibility reason with SMOTE-NC.
162
+ y_type : str, int or None, default=None
163
+ Class label of the current target classes for which we want to generate
164
+ samples.
165
+
166
+ y : ndarray of shape (n_samples_all,), default=None
167
+ The true target associated with `nn_data`. Used by Borderline SMOTE-2 to
168
+ weight the distances in the sample generation process.
158
169
159
170
Returns
160
171
-------
161
172
X_new : {ndarray, sparse matrix} of shape (n_samples, n_features)
162
173
Synthetically generated samples.
163
174
"""
164
175
diffs = nn_data [nn_num [rows , cols ]] - X [rows ]
176
+ if y is not None : # only entering for BorderlineSMOTE-2
177
+ random_state = check_random_state (self .random_state )
178
+ mask_pair_samples = y [nn_num [rows , cols ]] != y_type
179
+ diffs [mask_pair_samples ] *= random_state .uniform (
180
+ low = 0.0 , high = 0.5 , size = (mask_pair_samples .sum (), 1 )
181
+ )
165
182
166
183
if sparse .issparse (X ):
167
184
sparse_func = type (X ).__name__
@@ -736,7 +753,7 @@ def _fit_resample(self, X, y):
736
753
737
754
return X_resampled , y_resampled
738
755
739
- def _generate_samples (self , X , nn_data , nn_num , rows , cols , steps , y_type ):
756
+ def _generate_samples (self , X , nn_data , nn_num , rows , cols , steps , y_type , y = None ):
740
757
"""Generate a synthetic sample with an additional steps for the
741
758
categorical features.
742
759
0 commit comments