add augmix (#362)

fsx950223 · web-flow · commit 15ebc2d9dbc9 · 2020-05-06T20:28:55.000-07:00
* add augmix

* fix params
diff --git a/efficientdet/aug/autoaugment.py b/efficientdet/aug/autoaugment.py
@@ -26,7 +26,9 @@
 import math
 from absl import logging
 import tensorflow.compat.v1 as tf
+import tensorflow_probability as tfp
 import hparams_config
+import numpy as np
 
 try:
   # addon image_ops are simpler, but they have some issues on GPU and TPU.
@@ -1545,9 +1547,30 @@ def select_and_apply_random_policy(policies, image, bboxes):
         lambda: (image, bboxes))
   return (image, bboxes)
 
+def select_and_apply_random_policy_augmix(policies, image, bboxes, mixture_width=3, mixture_depth=-1, alpha=1):
+  """Select a random policy from `policies` and apply it to `image`."""
+  policy_to_select = tf.random_uniform([], maxval=len(policies), dtype=tf.int32)
+  # Note that using tf.case instead of tf.conds would result in significantly
+  # larger graphs and would even break export for some larger policies.
+  ws = tfp.distributions.Dirichlet([alpha] * mixture_width).sample()
+  m = tfp.distributions.Beta(alpha, alpha).sample()
+  mix = tf.zeros_like(image, dtype=tf.float32)
+  for j in range(mixture_width):
+    aug_image = image
+    depth = mixture_depth if mixture_depth > 0 else np.random.randint(1, 4)
+    for _ in range(depth):
+      for (i, policy) in enumerate(policies):
+        aug_image, bboxes = tf.cond(
+            tf.equal(i, policy_to_select),
+            lambda selected_policy=policy: selected_policy(aug_image, bboxes),
+            lambda: (aug_image, bboxes))
+    mix += ws[j] * tf.cast(aug_image, tf.float32)
+  mixed = tf.cast((1 - m) * tf.cast(image, tf.float32) + m * mix, tf.uint8)
+  return (mixed, bboxes)
 
 def build_and_apply_nas_policy(policies, image, bboxes,
-                               augmentation_hparams):
+                               augmentation_hparams, use_augmix=False,
+                               mixture_width=3, mixture_depth=-1, alpha=1):
   """Build a policy from the given policies passed in and apply to image.
 
   Args:
@@ -1559,6 +1582,11 @@ def build_and_apply_nas_policy(policies, image, bboxes,
     bboxes: tf.Tensor of shape [N, 4] representing ground truth boxes that are
       normalized between [0, 1].
     augmentation_hparams: Hparams associated with the NAS learned policy.
+    use_augmix: whether use augmix[https://arxiv.org/pdf/1912.02781.pdf]
+    width: Width of augmentation chain
+    depth: Depth of augmentation chain. -1 enables stochastic depth uniformly
+      from [1, 3]
+    alpha: Probability coefficient for Beta and Dirichlet distributions.
 
   Returns:
     A version of image that now has data augmentation applied to it based on
@@ -1592,14 +1620,19 @@ def final_policy(image_, bboxes_):
         return image_, bboxes_
       return final_policy
     tf_policies.append(make_final_policy(tf_policy))
+  if use_augmix:
+    augmented_images, augmented_bboxes = select_and_apply_random_policy_augmix(
+        tf_policies, image, bboxes, mixture_width, mixture_depth, alpha)
+  else:
+    augmented_images, augmented_bboxes = select_and_apply_random_policy(
+        tf_policies, image, bboxes)
 
-  augmented_images, augmented_bboxes = select_and_apply_random_policy(
-      tf_policies, image, bboxes)
   # If no bounding boxes were specified, then just return the images.
   return (augmented_images, augmented_bboxes)
 
 
-def distort_image_with_autoaugment(image, bboxes, augmentation_name):
+def distort_image_with_autoaugment(image, bboxes, augmentation_name, use_augmix=False,
+                                   mixture_width=3, mixture_depth=-1, alpha=1):
   """Applies the AutoAugment policy to `image` and `bboxes`.
 
   Args:
@@ -1633,4 +1666,5 @@ def distort_image_with_autoaugment(image, bboxes, augmentation_name):
       cutout_bbox_const=50,
       translate_bbox_const=120))
 
-  return build_and_apply_nas_policy(policy, image, bboxes, augmentation_hparams)
+  return build_and_apply_nas_policy(policy, image, bboxes, augmentation_hparams,
+                                    use_augmix, mixture_width, mixture_depth, alpha)
diff --git a/efficientdet/aug/autoaugment_test.py b/efficientdet/aug/autoaugment_test.py
@@ -30,6 +30,7 @@ def test_autoaugment_policy(self):
     image = tf.placeholder(tf.uint8, shape=[640, 640, 3])
     bboxes = tf.placeholder(tf.float32, shape=[4, 4])
     autoaugment.distort_image_with_autoaugment(image, bboxes, 'test')
+    autoaugment.distort_image_with_autoaugment(image, bboxes, 'test', use_augmix=True)
 
 
 if __name__ == '__main__':
diff --git a/efficientdet/dataloader.py b/efficientdet/dataloader.py
@@ -277,7 +277,7 @@ def _dataset_parser(value):
         if params.get('autoaugment_policy', None) and self._is_training:
           from aug import autoaugment  # pylint: disable=g-import-not-at-top
           image, boxes = autoaugment.distort_image_with_autoaugment(
-              image, boxes, params['autoaugment_policy'])
+              image, boxes, params['autoaugment_policy'], params['use_augmix'], *params['augmix_params'])
 
         input_processor = DetectionInputProcessor(
             image, params['image_size'], boxes, classes)
diff --git a/efficientdet/hparams_config.py b/efficientdet/hparams_config.py
@@ -177,6 +177,9 @@ def default_detection_configs():
   h.train_scale_min = 0.1
   h.train_scale_max = 2.0
   h.autoaugment_policy = None
+  h.use_augmix = False
+  # mixture_width, mixture_depth, alpha
+  h.augmix_params = (3, -1, 1)
 
   # dataset specific parameters
   h.num_classes = 90