Internal change

tensorflower-gardener · tensorflower-gardener · commit 5a08ff870902 · 2023-05-11T06:23:04.000-07:00
PiperOrigin-RevId: 531182533
diff --git a/official/vision/dataloaders/retinanet_input.py b/official/vision/dataloaders/retinanet_input.py
@@ -18,7 +18,10 @@
 into (image, labels) tuple for RetinaNet.
 """
 
+from typing import Optional
+
 # Import libraries
+
 from absl import logging
 import tensorflow as tf
 
@@ -51,6 +54,7 @@ def __init__(self,
                skip_crowd_during_training=True,
                max_num_instances=100,
                dtype='bfloat16',
+               resize_first: Optional[bool] = None,
                mode=None):
     """Initializes parameters for parsing annotations in the dataset.
 
@@ -91,6 +95,8 @@ def __init__(self,
       max_num_instances: `int` number of maximum number of instances in an
         image. The groundtruth data will be padded to `max_num_instances`.
       dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}.
+      resize_first: Optional `bool`, if True, resize the image before the
+        augmentations; computationally more efficient.
       mode: a ModeKeys. Specifies if this is training, evaluation, prediction or
         prediction with ground-truths in the outputs.
     """
@@ -141,6 +147,30 @@ def __init__(self,
     # Data type.
     self._dtype = dtype
 
+    # Input pipeline optimization.
+    self._resize_first = resize_first
+
+  def _resize_and_crop_image_and_boxes(self, image, boxes, pad=True):
+    """Resizes and crops image and boxes, optionally with padding."""
+    # Resizes and crops image.
+    padded_size = None
+    if pad:
+      padded_size = preprocess_ops.compute_padded_size(self._output_size,
+                                                       2**self._max_level)
+    image, image_info = preprocess_ops.resize_and_crop_image(
+        image,
+        self._output_size,
+        padded_size=padded_size,
+        aug_scale_min=self._aug_scale_min,
+        aug_scale_max=self._aug_scale_max)
+
+    # Resizes and crops boxes.
+    image_scale = image_info[2, :]
+    offset = image_info[3, :]
+    boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
+                                                 image_info[1, :], offset)
+    return image, boxes, image_info
+
   def _parse_train_data(self, data, anchor_labeler=None):
     """Parses data for training and evaluation."""
     classes = data['groundtruth_classes']
@@ -165,6 +195,21 @@ def _parse_train_data(self, data, anchor_labeler=None):
 
     # Gets original image.
     image = data['image']
+    image_size = tf.cast(tf.shape(image)[0:2], tf.float32)
+
+    less_output_pixels = (
+        self._output_size[0] * self._output_size[1]
+    ) < image_size[0] * image_size[1]
+
+    # Resizing first can reduce augmentation computation if the original image
+    # has more pixels than the desired output image.
+    # There might be a smarter threshold to compute less_output_pixels as
+    # we keep the padding to the very end, i.e., a resized image likely has less
+    # pixels than self._output_size[0] * self._output_size[1].
+    resize_first = self._resize_first and less_output_pixels
+    if resize_first:
+      image, boxes, image_info = self._resize_and_crop_image_and_boxes(
+          image, boxes, pad=False)
 
     # Apply autoaug or randaug.
     if self._augmenter is not None:
@@ -181,21 +226,16 @@ def _parse_train_data(self, data, anchor_labeler=None):
     # Converts boxes from normalized coordinates to pixel coordinates.
     boxes = box_ops.denormalize_boxes(boxes, image_shape)
 
-    # Resizes and crops image.
-    image, image_info = preprocess_ops.resize_and_crop_image(
-        image,
-        self._output_size,
-        padded_size=preprocess_ops.compute_padded_size(self._output_size,
-                                                       2**self._max_level),
-        aug_scale_min=self._aug_scale_min,
-        aug_scale_max=self._aug_scale_max)
+    if not resize_first:
+      image, boxes, image_info = self._resize_and_crop_image_and_boxes(
+          image, boxes, pad=True)
+    else:
+      padded_size = preprocess_ops.compute_padded_size(self._output_size,
+                                                       2**self._max_level)
+      image = tf.image.pad_to_bounding_box(
+          image, 0, 0, padded_size[0], padded_size[1])
     image_height, image_width, _ = image.get_shape().as_list()
 
-    # Resizes and crops boxes.
-    image_scale = image_info[2, :]
-    offset = image_info[3, :]
-    boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
-                                                 image_info[1, :], offset)
     # Filters out ground-truth boxes that are all zeros.
     indices = box_ops.get_non_empty_box_indices(boxes)
     boxes = tf.gather(boxes, indices)