Skip to content

Commit c945ffa

Browse files
committed
fix label_smooth and mixup score
1 parent 20200e1 commit c945ffa

File tree

2 files changed

+55
-60
lines changed

2 files changed

+55
-60
lines changed

paddle/fluid/operators/yolov3_loss_op.h

Lines changed: 45 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -156,57 +156,39 @@ static void CalcBoxLocationLossGrad(T* input_grad, const T loss, const T* input,
156156

157157
template <typename T>
158158
static inline void CalcLabelLoss(T* loss, const T* input, const int index,
159-
const int label, const T score,
160-
const int class_num, const int stride,
161-
const bool use_label_smooth) {
162-
if (use_label_smooth) {
163-
for (int i = 0; i < class_num; i++) {
164-
T pred = input[index + i * stride] < -0.5 ? input[index + i * stride]
165-
: 1.0 / class_num;
166-
loss[0] += SCE<T>(pred, (i == label) ? score : 0.0);
167-
}
168-
} else {
169-
for (int i = 0; i < class_num; i++) {
170-
T pred = input[index + i * stride];
171-
loss[0] += SCE<T>(pred, (i == label) ? score : 0.0);
172-
}
159+
const int label, const int class_num,
160+
const int stride, const T pos, const T neg) {
161+
for (int i = 0; i < class_num; i++) {
162+
T pred = input[index + i * stride];
163+
loss[0] += SCE<T>(pred, (i == label) ? pos : neg);
173164
}
174165
}
175166

176167
template <typename T>
177168
static inline void CalcLabelLossGrad(T* input_grad, const T loss,
178169
const T* input, const int index,
179-
const int label, const T score,
180-
const int class_num, const int stride,
181-
const bool use_label_smooth) {
182-
if (use_label_smooth) {
183-
for (int i = 0; i < class_num; i++) {
184-
T pred = input[index + i * stride] < -0.5 ? input[index + i * stride]
185-
: 1.0 / class_num;
186-
input_grad[index + i * stride] =
187-
SCEGrad<T>(pred, (i == label) ? score : 0.0) * loss;
188-
}
189-
} else {
190-
for (int i = 0; i < class_num; i++) {
191-
T pred = input[index + i * stride];
192-
input_grad[index + i * stride] =
193-
SCEGrad<T>(pred, (i == label) ? score : 0.0) * loss;
194-
}
170+
const int label, const int class_num,
171+
const int stride, const T pos,
172+
const T neg) {
173+
for (int i = 0; i < class_num; i++) {
174+
T pred = input[index + i * stride];
175+
input_grad[index + i * stride] =
176+
SCEGrad<T>(pred, (i == label) ? pos : neg) * loss;
195177
}
196178
}
197179

198180
template <typename T>
199-
static inline void CalcObjnessLoss(T* loss, const T* input, const int* objness,
181+
static inline void CalcObjnessLoss(T* loss, const T* input, const T* objness,
200182
const int n, const int an_num, const int h,
201183
const int w, const int stride,
202184
const int an_stride) {
203185
for (int i = 0; i < n; i++) {
204186
for (int j = 0; j < an_num; j++) {
205187
for (int k = 0; k < h; k++) {
206188
for (int l = 0; l < w; l++) {
207-
int obj = objness[k * w + l];
208-
if (obj >= 0) {
209-
loss[i] += SCE<T>(input[k * w + l], static_cast<T>(obj));
189+
T obj = objness[k * w + l];
190+
if (obj > -0.5) {
191+
loss[i] += SCE<T>(input[k * w + l], obj);
210192
}
211193
}
212194
}
@@ -218,18 +200,17 @@ static inline void CalcObjnessLoss(T* loss, const T* input, const int* objness,
218200

219201
template <typename T>
220202
static inline void CalcObjnessLossGrad(T* input_grad, const T* loss,
221-
const T* input, const int* objness,
203+
const T* input, const T* objness,
222204
const int n, const int an_num,
223205
const int h, const int w,
224206
const int stride, const int an_stride) {
225207
for (int i = 0; i < n; i++) {
226208
for (int j = 0; j < an_num; j++) {
227209
for (int k = 0; k < h; k++) {
228210
for (int l = 0; l < w; l++) {
229-
int obj = objness[k * w + l];
230-
if (obj >= 0) {
231-
input_grad[k * w + l] =
232-
SCEGrad<T>(input[k * w + l], static_cast<T>(obj)) * loss[i];
211+
T obj = objness[k * w + l];
212+
if (obj > -0.5) {
213+
input_grad[k * w + l] = SCEGrad<T>(input[k * w + l], obj) * loss[i];
233214
}
234215
}
235216
}
@@ -285,15 +266,22 @@ class Yolov3LossKernel : public framework::OpKernel<T> {
285266
const int stride = h * w;
286267
const int an_stride = (class_num + 5) * stride;
287268

269+
T label_pos = 1.0;
270+
T label_neg = 0.0;
271+
if (use_label_smooth) {
272+
label_pos = 1.0 - 1.0 / static_cast<T>(class_num);
273+
label_neg = 1.0 / static_cast<T>(class_num);
274+
}
275+
288276
const T* input_data = input->data<T>();
289277
const T* gt_box_data = gt_box->data<T>();
290278
const int* gt_label_data = gt_label->data<int>();
291279
const T* gt_score_data = gt_score->data<T>();
292280
T* loss_data = loss->mutable_data<T>({n}, ctx.GetPlace());
293281
memset(loss_data, 0, loss->numel() * sizeof(T));
294-
int* obj_mask_data =
295-
objness_mask->mutable_data<int>({n, mask_num, h, w}, ctx.GetPlace());
296-
memset(obj_mask_data, 0, objness_mask->numel() * sizeof(int));
282+
T* obj_mask_data =
283+
objness_mask->mutable_data<T>({n, mask_num, h, w}, ctx.GetPlace());
284+
memset(obj_mask_data, 0, objness_mask->numel() * sizeof(T));
297285
int* gt_match_mask_data =
298286
gt_match_mask->mutable_data<int>({n, b}, ctx.GetPlace());
299287

@@ -327,7 +315,7 @@ class Yolov3LossKernel : public framework::OpKernel<T> {
327315

328316
if (best_iou > ignore_thresh) {
329317
int obj_idx = (i * mask_num + j) * stride + k * w + l;
330-
obj_mask_data[obj_idx] = -1;
318+
obj_mask_data[obj_idx] = static_cast<T>(-1.0);
331319
}
332320
// TODO(dengkaipeng): all losses should be calculated if best IoU
333321
// is bigger then truth thresh should be calculated here, but
@@ -374,15 +362,15 @@ class Yolov3LossKernel : public framework::OpKernel<T> {
374362
CalcBoxLocationLoss<T>(loss_data + i, input_data, gt, anchors, best_n,
375363
box_idx, gi, gj, h, input_size, stride);
376364

365+
T score = gt_score_data[i * b + t];
377366
int obj_idx = (i * mask_num + mask_idx) * stride + gj * w + gi;
378-
obj_mask_data[obj_idx] = 1;
367+
obj_mask_data[obj_idx] = score;
379368

380369
int label = gt_label_data[i * b + t];
381-
T score = gt_score_data[i * b + t];
382370
int label_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num,
383371
an_stride, stride, 5);
384-
CalcLabelLoss<T>(loss_data + i, input_data, label_idx, label, score,
385-
class_num, stride, use_label_smooth);
372+
CalcLabelLoss<T>(loss_data + i, input_data, label_idx, label,
373+
class_num, stride, label_pos, label_neg);
386374
}
387375
}
388376
}
@@ -399,7 +387,6 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
399387
auto* input = ctx.Input<Tensor>("X");
400388
auto* gt_box = ctx.Input<Tensor>("GTBox");
401389
auto* gt_label = ctx.Input<Tensor>("GTLabel");
402-
auto* gt_score = ctx.Input<Tensor>("GTScore");
403390
auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
404391
auto* loss_grad = ctx.Input<Tensor>(framework::GradVarName("Loss"));
405392
auto* objness_mask = ctx.Input<Tensor>("ObjectnessMask");
@@ -421,12 +408,18 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
421408
const int stride = h * w;
422409
const int an_stride = (class_num + 5) * stride;
423410

411+
T label_pos = 1.0;
412+
T label_neg = 0.0;
413+
if (use_label_smooth) {
414+
label_pos = 1.0 - 1.0 / static_cast<T>(class_num);
415+
label_neg = 1.0 / static_cast<T>(class_num);
416+
}
417+
424418
const T* input_data = input->data<T>();
425419
const T* gt_box_data = gt_box->data<T>();
426420
const int* gt_label_data = gt_label->data<int>();
427-
const T* gt_score_data = gt_score->data<T>();
428421
const T* loss_grad_data = loss_grad->data<T>();
429-
const int* obj_mask_data = objness_mask->data<int>();
422+
const T* obj_mask_data = objness_mask->data<T>();
430423
const int* gt_match_mask_data = gt_match_mask->data<int>();
431424
T* input_grad_data =
432425
input_grad->mutable_data<T>({n, c, h, w}, ctx.GetPlace());
@@ -447,12 +440,11 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
447440
anchor_mask[mask_idx], box_idx, gi, gj, h, input_size, stride);
448441

449442
int label = gt_label_data[i * b + t];
450-
T score = gt_score_data[i * b + t];
451443
int label_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num,
452444
an_stride, stride, 5);
453445
CalcLabelLossGrad<T>(input_grad_data, loss_grad_data[i], input_data,
454-
label_idx, label, score, class_num, stride,
455-
use_label_smooth);
446+
label_idx, label, class_num, stride, label_pos,
447+
label_neg);
456448
}
457449
}
458450
}

python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs):
8181
x = x.reshape((n, mask_num, 5 + class_num, h, w)).transpose((0, 1, 3, 4, 2))
8282
loss = np.zeros((n)).astype('float32')
8383

84+
label_pos = 1.0 - 1.0 / class_num if use_label_smooth else 1.0
85+
label_neg = 1.0 / class_num if use_label_smooth else 0.0
86+
8487
pred_box = x[:, :, :, :, :4].copy()
8588
grid_x = np.tile(np.arange(w).reshape((1, w)), (h, 1))
8689
grid_y = np.tile(np.arange(h).reshape((h, 1)), (1, w))
@@ -103,7 +106,7 @@ def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs):
103106

104107
pred_box = pred_box.reshape((n, -1, 4))
105108
pred_obj = x[:, :, :, :, 4].reshape((n, -1))
106-
objness = np.zeros(pred_box.shape[:2])
109+
objness = np.zeros(pred_box.shape[:2]).astype('float32')
107110
ious = batch_xywh_box_iou(pred_box, gtbox)
108111
ious_max = np.max(ious, axis=-1)
109112
objness = np.where(ious_max > ignore_thresh, -np.ones_like(objness),
@@ -145,17 +148,17 @@ def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs):
145148
loss[i] += l1loss(x[i, an_idx, gj, gi, 2], tw) * scale
146149
loss[i] += l1loss(x[i, an_idx, gj, gi, 3], th) * scale
147150

148-
objness[i, an_idx * h * w + gj * w + gi] = 1
151+
objness[i, an_idx * h * w + gj * w + gi] = gtscore[i, j]
149152

150153
for label_idx in range(class_num):
151-
loss[i] += sce(x[i, an_idx, gj, gi, 5 + label_idx],
152-
int(label_idx == gtlabel[i, j]) * gtscore[i, j])
154+
loss[i] += sce(x[i, an_idx, gj, gi, 5 + label_idx], label_pos
155+
if label_idx == gtlabel[i, j] else label_neg)
153156

154157
for j in range(mask_num * h * w):
155158
if objness[i, j] >= 0:
156159
loss[i] += sce(pred_obj[i, j], objness[i, j])
157160

158-
return (loss, objness.reshape((n, mask_num, h, w)).astype('int32'), \
161+
return (loss, objness.reshape((n, mask_num, h, w)).astype('float32'), \
159162
gt_matches.astype('int32'))
160163

161164

@@ -220,9 +223,9 @@ def initTestCase(self):
220223
self.use_label_smooth = True
221224

222225

223-
class TestYolov3LossWithLabelSmooth(TestYolov3LossOp):
226+
class TestYolov3LossWithoutLabelSmooth(TestYolov3LossOp):
224227
def set_label_smooth(self):
225-
self.use_label_smooth = True
228+
self.use_label_smooth = False
226229

227230

228231
if __name__ == "__main__":

0 commit comments

Comments
 (0)