@@ -121,13 +121,13 @@ template <typename T>
121
121
static void CalcBoxLocationLoss (T* loss, const T* input, Box<T> gt,
122
122
std::vector<int > anchors, int an_idx,
123
123
int box_idx, int gi, int gj, int grid_size,
124
- int input_size, int stride) {
124
+ int input_size, int stride, T score ) {
125
125
T tx = gt.x * grid_size - gi;
126
126
T ty = gt.y * grid_size - gj;
127
127
T tw = std::log (gt.w * input_size / anchors[2 * an_idx]);
128
128
T th = std::log (gt.h * input_size / anchors[2 * an_idx + 1 ]);
129
129
130
- T scale = 2.0 - gt.w * gt.h ;
130
+ T scale = ( 2.0 - gt.w * gt.h ) * score ;
131
131
loss[0 ] += SCE<T>(input[box_idx], tx) * scale;
132
132
loss[0 ] += SCE<T>(input[box_idx + stride], ty) * scale;
133
133
loss[0 ] += L1Loss<T>(input[box_idx + 2 * stride], tw) * scale;
@@ -138,13 +138,14 @@ template <typename T>
138
138
static void CalcBoxLocationLossGrad (T* input_grad, const T loss, const T* input,
139
139
Box<T> gt, std::vector<int > anchors,
140
140
int an_idx, int box_idx, int gi, int gj,
141
- int grid_size, int input_size, int stride) {
141
+ int grid_size, int input_size, int stride,
142
+ T score) {
142
143
T tx = gt.x * grid_size - gi;
143
144
T ty = gt.y * grid_size - gj;
144
145
T tw = std::log (gt.w * input_size / anchors[2 * an_idx]);
145
146
T th = std::log (gt.h * input_size / anchors[2 * an_idx + 1 ]);
146
147
147
- T scale = 2.0 - gt.w * gt.h ;
148
+ T scale = ( 2.0 - gt.w * gt.h ) * score ;
148
149
input_grad[box_idx] = SCEGrad<T>(input[box_idx], tx) * scale * loss;
149
150
input_grad[box_idx + stride] =
150
151
SCEGrad<T>(input[box_idx + stride], ty) * scale * loss;
@@ -157,23 +158,24 @@ static void CalcBoxLocationLossGrad(T* input_grad, const T loss, const T* input,
157
158
template <typename T>
158
159
static inline void CalcLabelLoss (T* loss, const T* input, const int index,
159
160
const int label, const int class_num,
160
- const int stride, const T pos, const T neg) {
161
+ const int stride, const T pos, const T neg,
162
+ T score) {
161
163
for (int i = 0 ; i < class_num; i++) {
162
164
T pred = input[index + i * stride];
163
- loss[0 ] += SCE<T>(pred, (i == label) ? pos : neg);
165
+ loss[0 ] += SCE<T>(pred, (i == label) ? pos : neg) * score ;
164
166
}
165
167
}
166
168
167
169
template <typename T>
168
170
static inline void CalcLabelLossGrad (T* input_grad, const T loss,
169
171
const T* input, const int index,
170
172
const int label, const int class_num,
171
- const int stride, const T pos,
172
- const T neg ) {
173
+ const int stride, const T pos, const T neg,
174
+ T score ) {
173
175
for (int i = 0 ; i < class_num; i++) {
174
176
T pred = input[index + i * stride];
175
177
input_grad[index + i * stride] =
176
- SCEGrad<T>(pred, (i == label) ? pos : neg) * loss;
178
+ SCEGrad<T>(pred, (i == label) ? pos : neg) * score * loss;
177
179
}
178
180
}
179
181
@@ -187,8 +189,12 @@ static inline void CalcObjnessLoss(T* loss, const T* input, const T* objness,
187
189
for (int k = 0 ; k < h; k++) {
188
190
for (int l = 0 ; l < w; l++) {
189
191
T obj = objness[k * w + l];
190
- if (obj > -0.5 ) {
191
- loss[i] += SCE<T>(input[k * w + l], obj);
192
+ if (obj > 1e-5 ) {
193
+ // positive sample: obj = mixup score
194
+ loss[i] += SCE<T>(input[k * w + l], 1.0 ) * obj;
195
+ } else if (obj > -0.5 ) {
196
+ // negetive sample: obj = 0
197
+ loss[i] += SCE<T>(input[k * w + l], 0.0 );
192
198
}
193
199
}
194
200
}
@@ -209,8 +215,11 @@ static inline void CalcObjnessLossGrad(T* input_grad, const T* loss,
209
215
for (int k = 0 ; k < h; k++) {
210
216
for (int l = 0 ; l < w; l++) {
211
217
T obj = objness[k * w + l];
212
- if (obj > -0.5 ) {
213
- input_grad[k * w + l] = SCEGrad<T>(input[k * w + l], obj) * loss[i];
218
+ if (obj > 1e-5 ) {
219
+ input_grad[k * w + l] =
220
+ SCEGrad<T>(input[k * w + l], 1.0 ) * obj * loss[i];
221
+ } else if (obj > -0.5 ) {
222
+ input_grad[k * w + l] = SCEGrad<T>(input[k * w + l], 0.0 ) * loss[i];
214
223
}
215
224
}
216
225
}
@@ -315,7 +324,7 @@ class Yolov3LossKernel : public framework::OpKernel<T> {
315
324
316
325
if (best_iou > ignore_thresh) {
317
326
int obj_idx = (i * mask_num + j) * stride + k * w + l;
318
- obj_mask_data[obj_idx] = static_cast <T>(-1.0 );
327
+ obj_mask_data[obj_idx] = static_cast <T>(-1 );
319
328
}
320
329
// TODO(dengkaipeng): all losses should be calculated if best IoU
321
330
// is bigger then truth thresh should be calculated here, but
@@ -357,20 +366,20 @@ class Yolov3LossKernel : public framework::OpKernel<T> {
357
366
int mask_idx = GetMaskIndex (anchor_mask, best_n);
358
367
gt_match_mask_data[i * b + t] = mask_idx;
359
368
if (mask_idx >= 0 ) {
369
+ T score = gt_score_data[i * b + t];
360
370
int box_idx = GetEntryIndex (i, mask_idx, gj * w + gi, mask_num,
361
371
an_stride, stride, 0 );
362
372
CalcBoxLocationLoss<T>(loss_data + i, input_data, gt, anchors, best_n,
363
- box_idx, gi, gj, h, input_size, stride);
373
+ box_idx, gi, gj, h, input_size, stride, score );
364
374
365
- T score = gt_score_data[i * b + t];
366
375
int obj_idx = (i * mask_num + mask_idx) * stride + gj * w + gi;
367
376
obj_mask_data[obj_idx] = score;
368
377
369
378
int label = gt_label_data[i * b + t];
370
379
int label_idx = GetEntryIndex (i, mask_idx, gj * w + gi, mask_num,
371
380
an_stride, stride, 5 );
372
381
CalcLabelLoss<T>(loss_data + i, input_data, label_idx, label,
373
- class_num, stride, label_pos, label_neg);
382
+ class_num, stride, label_pos, label_neg, score );
374
383
}
375
384
}
376
385
}
@@ -387,6 +396,7 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
387
396
auto * input = ctx.Input <Tensor>(" X" );
388
397
auto * gt_box = ctx.Input <Tensor>(" GTBox" );
389
398
auto * gt_label = ctx.Input <Tensor>(" GTLabel" );
399
+ auto * gt_score = ctx.Input <Tensor>(" GTScore" );
390
400
auto * input_grad = ctx.Output <Tensor>(framework::GradVarName (" X" ));
391
401
auto * loss_grad = ctx.Input <Tensor>(framework::GradVarName (" Loss" ));
392
402
auto * objness_mask = ctx.Input <Tensor>(" ObjectnessMask" );
@@ -418,6 +428,7 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
418
428
const T* input_data = input->data <T>();
419
429
const T* gt_box_data = gt_box->data <T>();
420
430
const int * gt_label_data = gt_label->data <int >();
431
+ const T* gt_score_data = gt_score->data <T>();
421
432
const T* loss_grad_data = loss_grad->data <T>();
422
433
const T* obj_mask_data = objness_mask->data <T>();
423
434
const int * gt_match_mask_data = gt_match_mask->data <int >();
@@ -429,22 +440,24 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
429
440
for (int t = 0 ; t < b; t++) {
430
441
int mask_idx = gt_match_mask_data[i * b + t];
431
442
if (mask_idx >= 0 ) {
443
+ T score = gt_score_data[i * b + t];
432
444
Box<T> gt = GetGtBox (gt_box_data, i, b, t);
433
445
int gi = static_cast <int >(gt.x * w);
434
446
int gj = static_cast <int >(gt.y * h);
435
447
436
448
int box_idx = GetEntryIndex (i, mask_idx, gj * w + gi, mask_num,
437
449
an_stride, stride, 0 );
438
- CalcBoxLocationLossGrad<T>(
439
- input_grad_data, loss_grad_data[i], input_data, gt, anchors,
440
- anchor_mask[mask_idx], box_idx, gi, gj, h, input_size, stride);
450
+ CalcBoxLocationLossGrad<T>(input_grad_data, loss_grad_data[i],
451
+ input_data, gt, anchors,
452
+ anchor_mask[mask_idx], box_idx, gi, gj, h,
453
+ input_size, stride, score);
441
454
442
455
int label = gt_label_data[i * b + t];
443
456
int label_idx = GetEntryIndex (i, mask_idx, gj * w + gi, mask_num,
444
457
an_stride, stride, 5 );
445
458
CalcLabelLossGrad<T>(input_grad_data, loss_grad_data[i], input_data,
446
459
label_idx, label, class_num, stride, label_pos,
447
- label_neg);
460
+ label_neg, score );
448
461
}
449
462
}
450
463
}
0 commit comments