Merge pull request #359 from google/iou

mingxingtan · web-flow · commit 53563d63454e · 2020-05-06T20:27:00.000-07:00
Add IoU loss for box regression.
diff --git a/efficientdet/det_model_fn.py b/efficientdet/det_model_fn.py
@@ -28,6 +28,7 @@
 import coco_metric
 import efficientdet_arch
 import hparams_config
+import iou_utils
 import retinanet_arch
 import utils
 
@@ -230,6 +231,14 @@ def _box_loss(box_outputs, box_targets, num_positives, delta=0.1):
   return box_loss
 
 
+def _box_iou_loss(box_outputs, box_targets, num_positives, iou_loss_type):
+  """Computes box iou loss."""
+  normalizer = num_positives * 4.0
+  box_iou_loss = iou_utils.iou_loss(box_outputs, box_targets, iou_loss_type)
+  box_iou_loss = tf.reduce_sum(box_iou_loss) / normalizer
+  return box_iou_loss
+
+
 def detection_loss(cls_outputs, box_outputs, labels, params):
   """Computes total detection loss.
 
@@ -249,6 +258,7 @@ def detection_loss(cls_outputs, box_outputs, labels, params):
       class and box losses from all levels.
     cls_loss: an integer tensor representing total class loss.
     box_loss: an integer tensor representing total box regression loss.
+    box_iou_loss: an integer tensor representing total box iou loss.
   """
   # Sum all positives in a batch for normalization and avoid zero
   # num_positives_sum, which would lead to inf loss during training
@@ -257,6 +267,7 @@ class and box losses from all levels.
 
   cls_losses = []
   box_losses = []
+  box_iou_losses = []
   for level in levels:
     if params['data_format'] == 'channels_first':
       labels['cls_targets_%d' % level] = tf.transpose(
@@ -297,12 +308,19 @@ class and box losses from all levels.
             box_targets_at_level,
             num_positives_sum,
             delta=params['delta']))
+    if params['iou_loss_type']:
+      box_iou_losses.append(
+          _box_iou_loss(box_outputs[level], box_targets_at_level,
+                        num_positives_sum, params['iou_loss_type']))
 
   # Sum per level losses to total loss.
   cls_loss = tf.add_n(cls_losses)
   box_loss = tf.add_n(box_losses)
-  total_loss = cls_loss + params['box_loss_weight'] * box_loss
-  return total_loss, cls_loss, box_loss
+  box_iou_loss = tf.add_n(box_iou_losses) if box_iou_losses else 0.0
+  total_loss = (
+      cls_loss + params['box_loss_weight'] * box_loss +
+      params['iou_loss_weight'] * box_iou_loss)
+  return total_loss, cls_loss, box_loss, box_iou_loss
 
 
 def add_metric_fn_inputs(params,
@@ -463,15 +481,16 @@ def _model_outputs(inputs):
   learning_rate = learning_rate_schedule(params, global_step)
 
   # cls_loss and box_loss are for logging. only total_loss is optimized.
-  det_loss, cls_loss, box_loss = detection_loss(cls_outputs, box_outputs,
-                                                labels, params)
+  det_loss, cls_loss, box_loss, box_iou_loss = detection_loss(
+      cls_outputs, box_outputs, labels, params)
   l2loss = reg_l2_loss(params['weight_decay'])
   total_loss = det_loss + l2loss
 
   if mode == tf.estimator.ModeKeys.TRAIN:
     utils.scalar('lrn_rate', learning_rate)
     utils.scalar('trainloss/cls_loss', cls_loss)
     utils.scalar('trainloss/box_loss', box_loss)
+    utils.scalar('trainloss/box_iou_loss', box_iou_loss)
     utils.scalar('trainloss/det_loss', det_loss)
     utils.scalar('trainloss/l2_loss', l2loss)
     utils.scalar('trainloss/loss', total_loss)
diff --git a/efficientdet/hparams_config.py b/efficientdet/hparams_config.py
@@ -209,6 +209,8 @@ def default_detection_configs():
   # localization loss
   h.delta = 0.1
   h.box_loss_weight = 50.0
+  h.iou_loss_type = None
+  h.iou_loss_weight = 1.0
   # regularization l2 loss.
   h.weight_decay = 4e-5
   # enable bfloat
diff --git a/efficientdet/iou_utils.py b/efficientdet/iou_utils.py
@@ -0,0 +1,186 @@
+# Copyright 2020 Google Research. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""IoU utils for box regression with iou losses.
+
+Distance-IoU Loss: Faster and Better Learning for Bounding Box Regression.
+https://arxiv.org/pdf/1911.08287.pdf
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+# gtype import
+from __future__ import print_function
+
+import math
+import numpy as np
+import tensorflow.compat.v1 as tf
+from typing import Union, Text
+
+FloatType = Union[tf.Tensor, float, np.float32, np.float64]
+
+
+def _get_v(b1_height: FloatType, b1_width: FloatType, b2_height: FloatType,
+           b2_width: FloatType) -> tf.Tensor:
+  """Get the consistency measurement of aspect ratio for ciou."""
+
+  @tf.custom_gradient
+  def _get_grad_v(height, width):
+    """backpropogate gradient."""
+    arctan = tf.atan(tf.math.divide_no_nan(b1_width, b1_height)) - tf.atan(
+        tf.math.divide_no_nan(width, height))
+    v = 4 * ((arctan / math.pi)**2)
+
+    def _grad_v(dv, variables=None):
+      gdw = dv * 8 * arctan * height / (math.pi**2)
+      gdh = -dv * 8 * arctan * width / (math.pi**2)
+      return [gdh, gdw], tf.gradients(v, variables, grad_ys=dv)
+
+    return v, _grad_v
+
+  return _get_grad_v(b2_height, b2_width)
+
+
+def _iou_per_anchor(pred_boxes: FloatType,
+                    target_boxes: FloatType,
+                    iou_type: Text = 'iou') -> tf.Tensor:
+  """Computing the IoU for a single anchor.
+
+  Args:
+    pred_boxes: predicted boxes, with coordinate [y_min, x_min, y_max, x_max].
+    target_boxes: target boxes, with coordinate [y_min, x_min, y_max, x_max].
+    iou_type: one of ['iou', 'ciou', 'diou', 'giou'].
+
+  Returns:
+    IoU loss float `Tensor`.
+  """
+  # t_ denotes target boxes and p_ denotes predicted boxes.
+  t_ymin, t_xmin, t_ymax, t_xmax = target_boxes
+  p_ymin, p_xmin, p_ymax, p_xmax = pred_boxes
+
+  zero = tf.convert_to_tensor(0.0, t_ymin.dtype)
+  p_width = tf.maximum(zero, p_xmax - p_xmin)
+  p_height = tf.maximum(zero, p_ymax - p_ymin)
+  t_width = tf.maximum(zero, t_xmax - t_xmin)
+  t_height = tf.maximum(zero, t_ymax - t_ymin)
+  p_area = p_width * p_height
+  t_area = t_width * t_height
+
+  intersect_ymin = tf.maximum(p_ymin, t_ymin)
+  intersect_xmin = tf.maximum(p_xmin, t_xmin)
+  intersect_ymax = tf.minimum(p_ymax, t_ymax)
+  intersect_xmax = tf.minimum(p_xmax, t_xmax)
+  intersect_width = tf.maximum(zero, intersect_xmax - intersect_xmin)
+  intersect_height = tf.maximum(zero, intersect_ymax - intersect_ymin)
+  intersect_area = intersect_width * intersect_height
+
+  union_area = p_area + t_area - intersect_area
+  iou_v = tf.math.divide_no_nan(intersect_area, union_area)
+  if iou_type == 'iou':
+    return iou_v  # iou is the simplest form.
+
+  enclose_ymin = tf.minimum(p_ymin, t_ymin)
+  enclose_xmin = tf.minimum(p_xmin, t_xmin)
+  enclose_ymax = tf.maximum(p_ymax, t_ymax)
+  enclose_xmax = tf.maximum(p_xmax, t_xmax)
+
+  assert iou_type in ('giou', 'diou', 'ciou')
+  if iou_type == 'giou':  # giou is the generalized iou.
+    enclose_width = tf.maximum(zero, enclose_xmax - enclose_xmin)
+    enclose_height = tf.maximum(zero, enclose_ymax - enclose_ymin)
+    enclose_area = enclose_width * enclose_height
+    giou_v = iou_v - tf.math.divide_no_nan(
+        (enclose_area - union_area), enclose_area)
+    return giou_v
+
+  assert iou_type in ('diou', 'ciou')
+  p_center = tf.stack([(p_ymin + p_ymax) / 2, (p_xmin + p_xmax) / 2])
+  t_center = tf.stack([(t_ymin + t_ymax) / 2, (t_xmin + t_xmax) / 2])
+  euclidean = tf.linalg.norm(t_center - p_center)
+  diag_length = tf.linalg.norm(
+      [enclose_ymax - enclose_ymin, enclose_xmax - enclose_xmin])
+  diou_v = iou_v - tf.math.divide_no_nan(euclidean**2, diag_length**2)
+  if iou_type == 'diou':  # diou is the distance iou.
+    return diou_v
+
+  assert iou_type == 'ciou'
+  v = _get_v(p_height, p_width, t_height, t_width)
+  alpha = tf.math.divide_no_nan(v, ((1 - iou_v) + v))
+  return diou_v - alpha * v  # the last one is ciou.
+
+
+def iou_loss(pred_boxes: FloatType,
+             target_boxes: FloatType,
+             iou_type: Text = 'iou') -> tf.Tensor:
+  """A unified interface for computing various IoU losses.
+
+  Let B and B_gt denotes the pred_box and B_gt is the target box (ground truth):
+
+    IoU = |B & B_gt| / |B | B_gt|
+
+    GIoU = IoU - |C - B U B_gt| / C, where C is the smallest box covering B and
+    B_gt.
+
+    DIoU = IoU - E(B, B_gt)^2 / c^2, E is the Euclidean distance of the center
+    points of B and B_gt, and c is the diagonal length of the smallest box
+    covering the two boxes
+
+    CIoU = IoU - DIoU - a * v, where a is a positive trade-off parameter, and
+    v measures the consistency of aspect ratio:
+      v = (arctan(w_gt / h_gt) - arctan(w / h)) * 4 / pi^2
+    where (w_gt, h_gt) and (w, h) are the width and height of the target and
+    predicted box respectively.
+
+  The returned loss is computed as 1 - one of {IoU, GIoU, DIoU, CIoU}.
+
+  Args:
+    pred_boxes: predicted boxes, with coordinate [y_min, x_min, y_max, x_max]*.
+      It can be multiple anchors, with each anchor box has four coordinates.
+    target_boxes: target boxes, with coordinate [y_min, x_min, y_max, x_max]*.
+      It can be multiple anchors, with each anchor box has four coordinates.
+    iou_type: one of ['iou', 'ciou', 'diou', 'giou'].
+
+  Returns:
+    IoU loss float `Tensor`.
+  """
+  if iou_type not in ('iou', 'ciou', 'diou', 'giou'):
+    raise ValueError(
+        'Unknown loss_type {}, not iou/ciou/diou/giou'.format(iou_type))
+
+  pred_boxes = tf.convert_to_tensor(pred_boxes, tf.float32)
+  target_boxes = tf.cast(target_boxes, pred_boxes.dtype)
+
+  # t_ denotes target boxes and p_ denotes predicted boxes: (y, x, y_max, x_max)
+  pred_boxes_list = tf.unstack(pred_boxes, None, axis=-1)
+  target_boxes_list = tf.unstack(target_boxes, None, axis=-1)
+  assert len(pred_boxes_list) == len(target_boxes_list)
+  assert len(pred_boxes_list) % 4 == 0
+
+  iou_loss_list = []
+  for i in range(0, len(pred_boxes_list), 4):
+    pred_boxes = pred_boxes_list[i: i + 4]
+    target_boxes = target_boxes_list[i: i + 4]
+
+    # Compute mask.
+    t_ymin, t_xmin, t_ymax, t_xmax = target_boxes
+    mask = tf.not_equal((t_ymax - t_ymin) * (t_xmax - t_xmin), 0)
+    mask = tf.cast(mask, t_ymin.dtype)
+    # Loss should be mask * (1 - iou) = mask - masked_iou.
+    pred_boxes = [b * mask for b in pred_boxes]
+    iou_loss_list.append(
+        mask - tf.squeeze(_iou_per_anchor(pred_boxes, target_boxes, iou_type)))
+  if len(iou_loss_list) == 1:
+    return iou_loss_list[0]
+  return tf.reduce_sum(tf.stack(iou_loss_list), 0)
+
diff --git a/efficientdet/iou_utils_test.py b/efficientdet/iou_utils_test.py
@@ -0,0 +1,74 @@
+# Copyright 2020 Google Research. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ======================================
+"""Tests for iou_utils."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow.compat.v1 as tf
+import iou_utils
+
+
+class IouUtilsTest(tf.test.TestCase):
+  """IoU test class."""
+
+  def setUp(self):
+    super(IouUtilsTest, self).setUp()
+    self.pb = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]],
+                          dtype=tf.float32)
+    self.tb = tf.constant(
+        [[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]], dtype=tf.float32)
+    self.zeros = tf.constant([[0, 0, 0, 0], [0, 0, 0, 0]], dtype=tf.float32)
+
+  def test_iou(self):
+    self.assertAllClose(
+        iou_utils.iou_loss(self.pb, self.tb, 'iou'), [0.875, 1.])
+
+  def test_ciou(self):
+    self.assertAllClose(
+        iou_utils.iou_loss(self.pb, self.tb, 'ciou'), [1.408893, 1.548753])
+
+  def test_diou(self):
+    self.assertAllClose(
+        iou_utils.iou_loss(self.pb, self.tb, 'diou'), [1.406532, 1.531532])
+
+  def test_giou(self):
+    self.assertAllClose(
+        iou_utils.iou_loss(self.pb, self.tb, 'giou'), [1.075000, 1.933333])
+
+  def test_iou_zero_target(self):
+    self.assertAllClose(
+        iou_utils.iou_loss(self.pb, self.zeros, 'iou'), [0.0, 0.0])
+    self.assertAllClose(
+        iou_utils.iou_loss(self.pb, self.zeros, 'ciou'), [0.0, 0.0])
+    self.assertAllClose(
+        iou_utils.iou_loss(self.pb, self.zeros, 'diou'), [0.0, 0.0])
+    self.assertAllClose(
+        iou_utils.iou_loss(self.pb, self.zeros, 'giou'), [0.0, 0.0])
+
+  def test_iou_multiple_anchors(self):
+    pb = tf.tile(self.pb, [1, 2])
+    tb = tf.tile(self.tb, [1, 2])
+    self.assertAllClose(iou_utils.iou_loss(pb, tb, 'iou'), [1.75, 2.0])
+
+  def test_iou_multiple_anchors_mixed(self):
+    pb = tf.concat([self.pb, self.zeros], axis=-1)
+    tb = tf.concat([self.tb, self.zeros], axis=-1)
+    self.assertAllClose(iou_utils.iou_loss(pb, tb, 'iou'), [0.875, 1.0])
+
+
+if __name__ == '__main__':
+  tf.test.main()