Skip to content

Commit 53563d6

Browse files
authored
Merge pull request #359 from google/iou
Add IoU loss for box regression.
2 parents df587df + d9c2668 commit 53563d6

File tree

4 files changed

+285
-4
lines changed

4 files changed

+285
-4
lines changed

efficientdet/det_model_fn.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import coco_metric
2929
import efficientdet_arch
3030
import hparams_config
31+
import iou_utils
3132
import retinanet_arch
3233
import utils
3334

@@ -230,6 +231,14 @@ def _box_loss(box_outputs, box_targets, num_positives, delta=0.1):
230231
return box_loss
231232

232233

234+
def _box_iou_loss(box_outputs, box_targets, num_positives, iou_loss_type):
235+
"""Computes box iou loss."""
236+
normalizer = num_positives * 4.0
237+
box_iou_loss = iou_utils.iou_loss(box_outputs, box_targets, iou_loss_type)
238+
box_iou_loss = tf.reduce_sum(box_iou_loss) / normalizer
239+
return box_iou_loss
240+
241+
233242
def detection_loss(cls_outputs, box_outputs, labels, params):
234243
"""Computes total detection loss.
235244
@@ -249,6 +258,7 @@ def detection_loss(cls_outputs, box_outputs, labels, params):
249258
class and box losses from all levels.
250259
cls_loss: an integer tensor representing total class loss.
251260
box_loss: an integer tensor representing total box regression loss.
261+
box_iou_loss: an integer tensor representing total box iou loss.
252262
"""
253263
# Sum all positives in a batch for normalization and avoid zero
254264
# num_positives_sum, which would lead to inf loss during training
@@ -257,6 +267,7 @@ class and box losses from all levels.
257267

258268
cls_losses = []
259269
box_losses = []
270+
box_iou_losses = []
260271
for level in levels:
261272
if params['data_format'] == 'channels_first':
262273
labels['cls_targets_%d' % level] = tf.transpose(
@@ -297,12 +308,19 @@ class and box losses from all levels.
297308
box_targets_at_level,
298309
num_positives_sum,
299310
delta=params['delta']))
311+
if params['iou_loss_type']:
312+
box_iou_losses.append(
313+
_box_iou_loss(box_outputs[level], box_targets_at_level,
314+
num_positives_sum, params['iou_loss_type']))
300315

301316
# Sum per level losses to total loss.
302317
cls_loss = tf.add_n(cls_losses)
303318
box_loss = tf.add_n(box_losses)
304-
total_loss = cls_loss + params['box_loss_weight'] * box_loss
305-
return total_loss, cls_loss, box_loss
319+
box_iou_loss = tf.add_n(box_iou_losses) if box_iou_losses else 0.0
320+
total_loss = (
321+
cls_loss + params['box_loss_weight'] * box_loss +
322+
params['iou_loss_weight'] * box_iou_loss)
323+
return total_loss, cls_loss, box_loss, box_iou_loss
306324

307325

308326
def add_metric_fn_inputs(params,
@@ -463,15 +481,16 @@ def _model_outputs(inputs):
463481
learning_rate = learning_rate_schedule(params, global_step)
464482

465483
# cls_loss and box_loss are for logging. only total_loss is optimized.
466-
det_loss, cls_loss, box_loss = detection_loss(cls_outputs, box_outputs,
467-
labels, params)
484+
det_loss, cls_loss, box_loss, box_iou_loss = detection_loss(
485+
cls_outputs, box_outputs, labels, params)
468486
l2loss = reg_l2_loss(params['weight_decay'])
469487
total_loss = det_loss + l2loss
470488

471489
if mode == tf.estimator.ModeKeys.TRAIN:
472490
utils.scalar('lrn_rate', learning_rate)
473491
utils.scalar('trainloss/cls_loss', cls_loss)
474492
utils.scalar('trainloss/box_loss', box_loss)
493+
utils.scalar('trainloss/box_iou_loss', box_iou_loss)
475494
utils.scalar('trainloss/det_loss', det_loss)
476495
utils.scalar('trainloss/l2_loss', l2loss)
477496
utils.scalar('trainloss/loss', total_loss)

efficientdet/hparams_config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,8 @@ def default_detection_configs():
209209
# localization loss
210210
h.delta = 0.1
211211
h.box_loss_weight = 50.0
212+
h.iou_loss_type = None
213+
h.iou_loss_weight = 1.0
212214
# regularization l2 loss.
213215
h.weight_decay = 4e-5
214216
# enable bfloat

efficientdet/iou_utils.py

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
# Copyright 2020 Google Research. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ==============================================================================
15+
"""IoU utils for box regression with iou losses.
16+
17+
Distance-IoU Loss: Faster and Better Learning for Bounding Box Regression.
18+
https://arxiv.org/pdf/1911.08287.pdf
19+
"""
20+
21+
from __future__ import absolute_import
22+
from __future__ import division
23+
# gtype import
24+
from __future__ import print_function
25+
26+
import math
27+
import numpy as np
28+
import tensorflow.compat.v1 as tf
29+
from typing import Union, Text
30+
31+
FloatType = Union[tf.Tensor, float, np.float32, np.float64]
32+
33+
34+
def _get_v(b1_height: FloatType, b1_width: FloatType, b2_height: FloatType,
35+
b2_width: FloatType) -> tf.Tensor:
36+
"""Get the consistency measurement of aspect ratio for ciou."""
37+
38+
@tf.custom_gradient
39+
def _get_grad_v(height, width):
40+
"""backpropogate gradient."""
41+
arctan = tf.atan(tf.math.divide_no_nan(b1_width, b1_height)) - tf.atan(
42+
tf.math.divide_no_nan(width, height))
43+
v = 4 * ((arctan / math.pi)**2)
44+
45+
def _grad_v(dv, variables=None):
46+
gdw = dv * 8 * arctan * height / (math.pi**2)
47+
gdh = -dv * 8 * arctan * width / (math.pi**2)
48+
return [gdh, gdw], tf.gradients(v, variables, grad_ys=dv)
49+
50+
return v, _grad_v
51+
52+
return _get_grad_v(b2_height, b2_width)
53+
54+
55+
def _iou_per_anchor(pred_boxes: FloatType,
56+
target_boxes: FloatType,
57+
iou_type: Text = 'iou') -> tf.Tensor:
58+
"""Computing the IoU for a single anchor.
59+
60+
Args:
61+
pred_boxes: predicted boxes, with coordinate [y_min, x_min, y_max, x_max].
62+
target_boxes: target boxes, with coordinate [y_min, x_min, y_max, x_max].
63+
iou_type: one of ['iou', 'ciou', 'diou', 'giou'].
64+
65+
Returns:
66+
IoU loss float `Tensor`.
67+
"""
68+
# t_ denotes target boxes and p_ denotes predicted boxes.
69+
t_ymin, t_xmin, t_ymax, t_xmax = target_boxes
70+
p_ymin, p_xmin, p_ymax, p_xmax = pred_boxes
71+
72+
zero = tf.convert_to_tensor(0.0, t_ymin.dtype)
73+
p_width = tf.maximum(zero, p_xmax - p_xmin)
74+
p_height = tf.maximum(zero, p_ymax - p_ymin)
75+
t_width = tf.maximum(zero, t_xmax - t_xmin)
76+
t_height = tf.maximum(zero, t_ymax - t_ymin)
77+
p_area = p_width * p_height
78+
t_area = t_width * t_height
79+
80+
intersect_ymin = tf.maximum(p_ymin, t_ymin)
81+
intersect_xmin = tf.maximum(p_xmin, t_xmin)
82+
intersect_ymax = tf.minimum(p_ymax, t_ymax)
83+
intersect_xmax = tf.minimum(p_xmax, t_xmax)
84+
intersect_width = tf.maximum(zero, intersect_xmax - intersect_xmin)
85+
intersect_height = tf.maximum(zero, intersect_ymax - intersect_ymin)
86+
intersect_area = intersect_width * intersect_height
87+
88+
union_area = p_area + t_area - intersect_area
89+
iou_v = tf.math.divide_no_nan(intersect_area, union_area)
90+
if iou_type == 'iou':
91+
return iou_v # iou is the simplest form.
92+
93+
enclose_ymin = tf.minimum(p_ymin, t_ymin)
94+
enclose_xmin = tf.minimum(p_xmin, t_xmin)
95+
enclose_ymax = tf.maximum(p_ymax, t_ymax)
96+
enclose_xmax = tf.maximum(p_xmax, t_xmax)
97+
98+
assert iou_type in ('giou', 'diou', 'ciou')
99+
if iou_type == 'giou': # giou is the generalized iou.
100+
enclose_width = tf.maximum(zero, enclose_xmax - enclose_xmin)
101+
enclose_height = tf.maximum(zero, enclose_ymax - enclose_ymin)
102+
enclose_area = enclose_width * enclose_height
103+
giou_v = iou_v - tf.math.divide_no_nan(
104+
(enclose_area - union_area), enclose_area)
105+
return giou_v
106+
107+
assert iou_type in ('diou', 'ciou')
108+
p_center = tf.stack([(p_ymin + p_ymax) / 2, (p_xmin + p_xmax) / 2])
109+
t_center = tf.stack([(t_ymin + t_ymax) / 2, (t_xmin + t_xmax) / 2])
110+
euclidean = tf.linalg.norm(t_center - p_center)
111+
diag_length = tf.linalg.norm(
112+
[enclose_ymax - enclose_ymin, enclose_xmax - enclose_xmin])
113+
diou_v = iou_v - tf.math.divide_no_nan(euclidean**2, diag_length**2)
114+
if iou_type == 'diou': # diou is the distance iou.
115+
return diou_v
116+
117+
assert iou_type == 'ciou'
118+
v = _get_v(p_height, p_width, t_height, t_width)
119+
alpha = tf.math.divide_no_nan(v, ((1 - iou_v) + v))
120+
return diou_v - alpha * v # the last one is ciou.
121+
122+
123+
def iou_loss(pred_boxes: FloatType,
124+
target_boxes: FloatType,
125+
iou_type: Text = 'iou') -> tf.Tensor:
126+
"""A unified interface for computing various IoU losses.
127+
128+
Let B and B_gt denotes the pred_box and B_gt is the target box (ground truth):
129+
130+
IoU = |B & B_gt| / |B | B_gt|
131+
132+
GIoU = IoU - |C - B U B_gt| / C, where C is the smallest box covering B and
133+
B_gt.
134+
135+
DIoU = IoU - E(B, B_gt)^2 / c^2, E is the Euclidean distance of the center
136+
points of B and B_gt, and c is the diagonal length of the smallest box
137+
covering the two boxes
138+
139+
CIoU = IoU - DIoU - a * v, where a is a positive trade-off parameter, and
140+
v measures the consistency of aspect ratio:
141+
v = (arctan(w_gt / h_gt) - arctan(w / h)) * 4 / pi^2
142+
where (w_gt, h_gt) and (w, h) are the width and height of the target and
143+
predicted box respectively.
144+
145+
The returned loss is computed as 1 - one of {IoU, GIoU, DIoU, CIoU}.
146+
147+
Args:
148+
pred_boxes: predicted boxes, with coordinate [y_min, x_min, y_max, x_max]*.
149+
It can be multiple anchors, with each anchor box has four coordinates.
150+
target_boxes: target boxes, with coordinate [y_min, x_min, y_max, x_max]*.
151+
It can be multiple anchors, with each anchor box has four coordinates.
152+
iou_type: one of ['iou', 'ciou', 'diou', 'giou'].
153+
154+
Returns:
155+
IoU loss float `Tensor`.
156+
"""
157+
if iou_type not in ('iou', 'ciou', 'diou', 'giou'):
158+
raise ValueError(
159+
'Unknown loss_type {}, not iou/ciou/diou/giou'.format(iou_type))
160+
161+
pred_boxes = tf.convert_to_tensor(pred_boxes, tf.float32)
162+
target_boxes = tf.cast(target_boxes, pred_boxes.dtype)
163+
164+
# t_ denotes target boxes and p_ denotes predicted boxes: (y, x, y_max, x_max)
165+
pred_boxes_list = tf.unstack(pred_boxes, None, axis=-1)
166+
target_boxes_list = tf.unstack(target_boxes, None, axis=-1)
167+
assert len(pred_boxes_list) == len(target_boxes_list)
168+
assert len(pred_boxes_list) % 4 == 0
169+
170+
iou_loss_list = []
171+
for i in range(0, len(pred_boxes_list), 4):
172+
pred_boxes = pred_boxes_list[i: i + 4]
173+
target_boxes = target_boxes_list[i: i + 4]
174+
175+
# Compute mask.
176+
t_ymin, t_xmin, t_ymax, t_xmax = target_boxes
177+
mask = tf.not_equal((t_ymax - t_ymin) * (t_xmax - t_xmin), 0)
178+
mask = tf.cast(mask, t_ymin.dtype)
179+
# Loss should be mask * (1 - iou) = mask - masked_iou.
180+
pred_boxes = [b * mask for b in pred_boxes]
181+
iou_loss_list.append(
182+
mask - tf.squeeze(_iou_per_anchor(pred_boxes, target_boxes, iou_type)))
183+
if len(iou_loss_list) == 1:
184+
return iou_loss_list[0]
185+
return tf.reduce_sum(tf.stack(iou_loss_list), 0)
186+

efficientdet/iou_utils_test.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# Copyright 2020 Google Research. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ======================================
15+
"""Tests for iou_utils."""
16+
17+
from __future__ import absolute_import
18+
from __future__ import division
19+
from __future__ import print_function
20+
21+
import tensorflow.compat.v1 as tf
22+
import iou_utils
23+
24+
25+
class IouUtilsTest(tf.test.TestCase):
26+
"""IoU test class."""
27+
28+
def setUp(self):
29+
super(IouUtilsTest, self).setUp()
30+
self.pb = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]],
31+
dtype=tf.float32)
32+
self.tb = tf.constant(
33+
[[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]], dtype=tf.float32)
34+
self.zeros = tf.constant([[0, 0, 0, 0], [0, 0, 0, 0]], dtype=tf.float32)
35+
36+
def test_iou(self):
37+
self.assertAllClose(
38+
iou_utils.iou_loss(self.pb, self.tb, 'iou'), [0.875, 1.])
39+
40+
def test_ciou(self):
41+
self.assertAllClose(
42+
iou_utils.iou_loss(self.pb, self.tb, 'ciou'), [1.408893, 1.548753])
43+
44+
def test_diou(self):
45+
self.assertAllClose(
46+
iou_utils.iou_loss(self.pb, self.tb, 'diou'), [1.406532, 1.531532])
47+
48+
def test_giou(self):
49+
self.assertAllClose(
50+
iou_utils.iou_loss(self.pb, self.tb, 'giou'), [1.075000, 1.933333])
51+
52+
def test_iou_zero_target(self):
53+
self.assertAllClose(
54+
iou_utils.iou_loss(self.pb, self.zeros, 'iou'), [0.0, 0.0])
55+
self.assertAllClose(
56+
iou_utils.iou_loss(self.pb, self.zeros, 'ciou'), [0.0, 0.0])
57+
self.assertAllClose(
58+
iou_utils.iou_loss(self.pb, self.zeros, 'diou'), [0.0, 0.0])
59+
self.assertAllClose(
60+
iou_utils.iou_loss(self.pb, self.zeros, 'giou'), [0.0, 0.0])
61+
62+
def test_iou_multiple_anchors(self):
63+
pb = tf.tile(self.pb, [1, 2])
64+
tb = tf.tile(self.tb, [1, 2])
65+
self.assertAllClose(iou_utils.iou_loss(pb, tb, 'iou'), [1.75, 2.0])
66+
67+
def test_iou_multiple_anchors_mixed(self):
68+
pb = tf.concat([self.pb, self.zeros], axis=-1)
69+
tb = tf.concat([self.tb, self.zeros], axis=-1)
70+
self.assertAllClose(iou_utils.iou_loss(pb, tb, 'iou'), [0.875, 1.0])
71+
72+
73+
if __name__ == '__main__':
74+
tf.test.main()

0 commit comments

Comments
 (0)