tensorflow
diff --git a/‎official/projects/detr/README.md‎
Lines changed: 46 additions & 0 deletions b/‎official/projects/detr/README.md‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎official/projects/detr/configs/detr.py‎
Lines changed: 103 additions & 0 deletions b/‎official/projects/detr/configs/detr.py‎
Lines changed: 103 additions & 0 deletions
diff --git a/‎official/projects/detr/configs/detr_test.py‎
Lines changed: 41 additions & 0 deletions b/‎official/projects/detr/configs/detr_test.py‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎official/projects/detr/dataloaders/coco.py‎
Lines changed: 157 additions & 0 deletions b/‎official/projects/detr/dataloaders/coco.py‎
Lines changed: 157 additions & 0 deletions
@@ -0,0 +1,46 @@
+# End-to-End Object Detection with Transformers (DETR)
+
+[![DETR](https://img.shields.io/badge/DETR-arXiv.2005.12872-B3181B?)](https://arxiv.org/abs/2005.12872).
+
+TensorFlow 2 implementation of End-to-End Object Detection with Transformers
+
+⚠️ Disclaimer: All datasets hyperlinked from this page are not owned or
+distributed by Google. The dataset is made available by third parties.
+Please review the terms and conditions made available by the third parties
+before using the data.
+
+## Scripts:
+
+You can find the scripts to reproduce the following experiments in
+detr/experiments.
+
+
+## DETR [COCO](https://cocodataset.org) ([ImageNet](https://www.image-net.org) pretrained)
+
+| Model     | Resolution | Batch size | Epochs | Decay@ | Params (M) | Box AP | Dashboard | Checkpoint | Experiment |
+| --------- | :--------: | ----------:| ------:| -----: | ---------: | -----: | --------: | ---------: | ---------: |
+| DETR-ResNet-50 | 1333x1333 |64|300| 200 |41 | 40.6 | [tensorboard](https://tensorboard.dev/experiment/o2IEZnniRYu6pqViBeopIg/#scalars) | [ckpt](https://storage.googleapis.com/tf_model_garden/vision/detr/detr_resnet_50_300.tar.gz) | detr_r50_300epochs.sh |
+| DETR-ResNet-50 | 1333x1333 |64|500| 400 |41 | 42.0| [tensorboard](https://tensorboard.dev/experiment/YFMDKpESR4yjocPh5HgfRw/) | [ckpt](https://storage.googleapis.com/tf_model_garden/vision/detr/detr_resnet_50_500.tar.gz) | detr_r50_500epochs.sh |
+| DETR-ResNet-50 | 1333x1333 |64|300| 200 |41 | 40.6 | paper | NA | NA |
+| DETR-ResNet-50 | 1333x1333 |64|500| 400 |41 | 42.0 | paper | NA | NA |
+| DETR-DC5-ResNet-50 | 1333x1333 |64|500| 400 |41 | 43.3 | paper | NA | NA |
+
+## Need contribution:
+
+*   Add DC5 support and update experiment table.
+
+
+## Citing TensorFlow Model Garden
+
+If you find this codebase helpful in your research, please cite this repository.
+
+```
+@misc{tensorflowmodelgarden2020,
+  author = {Hongkun Yu and Chen Chen and Xianzhi Du and Yeqing Li and
+            Abdullah Rashwan and Le Hou and Pengchong Jin and Fan Yang and
+            Frederick Liu and Jaeyoun Kim and Jing Li},
+  title = {{TensorFlow Model Garden}},
+  howpublished = {\url{https://github.com/tensorflow/models}},
+  year = {2020}
+}
+```
@@ -0,0 +1,103 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""DETR configurations."""
+
+import dataclasses
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.projects.detr import optimization
+from official.projects.detr.dataloaders import coco
+
+
+@dataclasses.dataclass
+class DetectionConfig(cfg.TaskConfig):
+  """The translation task config."""
+  train_data: cfg.DataConfig = cfg.DataConfig()
+  validation_data: cfg.DataConfig = cfg.DataConfig()
+  lambda_cls: float = 1.0
+  lambda_box: float = 5.0
+  lambda_giou: float = 2.0
+
+  init_ckpt: str = ''
+  num_classes: int = 81  # 0: background
+  background_cls_weight: float = 0.1
+  num_encoder_layers: int = 6
+  num_decoder_layers: int = 6
+
+  # Make DETRConfig.
+  num_queries: int = 100
+  num_hidden: int = 256
+  per_category_metrics: bool = False
+
+
+@exp_factory.register_config_factory('detr_coco')
+def detr_coco() -> cfg.ExperimentConfig:
+  """Config to get results that matches the paper."""
+  train_batch_size = 64
+  eval_batch_size = 64
+  num_train_data = 118287
+  num_steps_per_epoch = num_train_data // train_batch_size
+  train_steps = 500 * num_steps_per_epoch  # 500 epochs
+  decay_at = train_steps - 100 * num_steps_per_epoch  # 400 epochs
+  config = cfg.ExperimentConfig(
+      task=DetectionConfig(
+          train_data=coco.COCODataConfig(
+              tfds_name='coco/2017',
+              tfds_split='train',
+              is_training=True,
+              global_batch_size=train_batch_size,
+              shuffle_buffer_size=1000,
+          ),
+          validation_data=coco.COCODataConfig(
+              tfds_name='coco/2017',
+              tfds_split='validation',
+              is_training=False,
+              global_batch_size=eval_batch_size,
+              drop_remainder=False
+          )
+      ),
+      trainer=cfg.TrainerConfig(
+          train_steps=train_steps,
+          validation_steps=-1,
+          steps_per_loop=10000,
+          summary_interval=10000,
+          checkpoint_interval=10000,
+          validation_interval=10000,
+          max_to_keep=1,
+          best_checkpoint_export_subdir='best_ckpt',
+          best_checkpoint_eval_metric='AP',
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'detr_adamw',
+                  'detr_adamw': {
+                      'weight_decay_rate': 1e-4,
+                      'global_clipnorm': 0.1,
+                      # Avoid AdamW legacy behavior.
+                      'gradient_clip_norm': 0.0
+                  }
+              },
+              'learning_rate': {
+                  'type': 'stepwise',
+                  'stepwise': {
+                      'boundaries': [decay_at],
+                      'values': [0.0001, 1.0e-05]
+                  }
+              },
+              })
+          ),
+      restrictions=[
+          'task.train_data.is_training != None',
+      ])
+  return config
@@ -0,0 +1,41 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for detr."""
+
+# pylint: disable=unused-import
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.projects.detr.configs import detr as exp_cfg
+from official.projects.detr.dataloaders import coco
+
+
+class DetrTest(tf.test.TestCase, parameterized.TestCase):
+
+  @parameterized.parameters(('detr_coco',))
+  def test_detr_configs(self, config_name):
+    config = exp_factory.get_exp_config(config_name)
+    self.assertIsInstance(config, cfg.ExperimentConfig)
+    self.assertIsInstance(config.task, exp_cfg.DetectionConfig)
+    self.assertIsInstance(config.task.train_data, coco.COCODataConfig)
+    config.task.train_data.is_training = None
+    with self.assertRaises(KeyError):
+      config.validate()
+
+
+if __name__ == '__main__':
+  tf.test.main()
@@ -0,0 +1,157 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""COCO data loader for DETR."""
+
+import dataclasses
+from typing import Optional, Tuple
+import tensorflow as tf
+
+from official.core import config_definitions as cfg
+from official.core import input_reader
+from official.vision.beta.ops import box_ops
+from official.vision.beta.ops import preprocess_ops
+
+
+@dataclasses.dataclass
+class COCODataConfig(cfg.DataConfig):
+  """Data config for COCO."""
+  output_size: Tuple[int, int] = (1333, 1333)
+  max_num_boxes: int = 100
+  resize_scales: Tuple[int, ...] = (
+      480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
+
+
+class COCODataLoader():
+  """A class to load dataset for COCO detection task."""
+
+  def __init__(self, params: COCODataConfig):
+    self._params = params
+
+  def preprocess(self, inputs):
+    """Preprocess COCO for DETR."""
+    image = inputs['image']
+    boxes = inputs['objects']['bbox']
+    classes = inputs['objects']['label'] + 1
+    is_crowd = inputs['objects']['is_crowd']
+
+    image = preprocess_ops.normalize_image(image)
+    if self._params.is_training:
+      image, boxes, _ = preprocess_ops.random_horizontal_flip(image, boxes)
+
+      do_crop = tf.greater(tf.random.uniform([]), 0.5)
+      if do_crop:
+        # Rescale
+        boxes = box_ops.denormalize_boxes(boxes, tf.shape(image)[:2])
+        index = tf.random.categorical(tf.zeros([1, 3]), 1)[0]
+        scales = tf.gather([400.0, 500.0, 600.0], index, axis=0)
+        short_side = scales[0]
+        image, image_info = preprocess_ops.resize_image(image, short_side)
+        boxes = preprocess_ops.resize_and_crop_boxes(boxes,
+                                                     image_info[2, :],
+                                                     image_info[1, :],
+                                                     image_info[3, :])
+        boxes = box_ops.normalize_boxes(boxes, image_info[1, :])
+
+        # Do croping
+        shape = tf.cast(image_info[1], dtype=tf.int32)
+        h = tf.random.uniform(
+            [], 384, tf.math.minimum(shape[0], 600), dtype=tf.int32)
+        w = tf.random.uniform(
+            [], 384, tf.math.minimum(shape[1], 600), dtype=tf.int32)
+        i = tf.random.uniform([], 0, shape[0] - h + 1, dtype=tf.int32)
+        j = tf.random.uniform([], 0, shape[1] - w + 1, dtype=tf.int32)
+        image = tf.image.crop_to_bounding_box(image, i, j, h, w)
+        boxes = tf.clip_by_value(
+            (boxes[..., :] * tf.cast(
+                tf.stack([shape[0], shape[1], shape[0], shape[1]]),
+                dtype=tf.float32) -
+             tf.cast(tf.stack([i, j, i, j]), dtype=tf.float32)) /
+            tf.cast(tf.stack([h, w, h, w]), dtype=tf.float32), 0.0, 1.0)
+      scales = tf.constant(
+          self._params.resize_scales,
+          dtype=tf.float32)
+      index = tf.random.categorical(tf.zeros([1, 11]), 1)[0]
+      scales = tf.gather(scales, index, axis=0)
+    else:
+      scales = tf.constant([self._params.resize_scales[-1]], tf.float32)
+
+    image_shape = tf.shape(image)[:2]
+    boxes = box_ops.denormalize_boxes(boxes, image_shape)
+    gt_boxes = boxes
+    short_side = scales[0]
+    image, image_info = preprocess_ops.resize_image(
+        image,
+        short_side,
+        max(self._params.output_size))
+    boxes = preprocess_ops.resize_and_crop_boxes(boxes,
+                                                 image_info[2, :],
+                                                 image_info[1, :],
+                                                 image_info[3, :])
+    boxes = box_ops.normalize_boxes(boxes, image_info[1, :])
+
+    # Filters out ground truth boxes that are all zeros.
+    indices = box_ops.get_non_empty_box_indices(boxes)
+    boxes = tf.gather(boxes, indices)
+    classes = tf.gather(classes, indices)
+    is_crowd = tf.gather(is_crowd, indices)
+    boxes = box_ops.yxyx_to_cycxhw(boxes)
+
+    image = tf.image.pad_to_bounding_box(
+        image, 0, 0, self._params.output_size[0], self._params.output_size[1])
+    labels = {
+        'classes':
+            preprocess_ops.clip_or_pad_to_fixed_size(
+                classes, self._params.max_num_boxes),
+        'boxes':
+            preprocess_ops.clip_or_pad_to_fixed_size(
+                boxes, self._params.max_num_boxes)
+    }
+    if not self._params.is_training:
+      labels.update({
+          'id':
+              inputs['image/id'],
+          'image_info':
+              image_info,
+          'is_crowd':
+              preprocess_ops.clip_or_pad_to_fixed_size(
+                  is_crowd, self._params.max_num_boxes),
+          'gt_boxes':
+              preprocess_ops.clip_or_pad_to_fixed_size(
+                  gt_boxes, self._params.max_num_boxes),
+      })
+
+    return image, labels
+
+  def _transform_and_batch_fn(
+      self,
+      dataset,
+      input_context: Optional[tf.distribute.InputContext] = None):
+    """Preprocess and batch."""
+    dataset = dataset.map(
+        self.preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)
+    per_replica_batch_size = input_context.get_per_replica_batch_size(
+        self._params.global_batch_size
+    ) if input_context else self._params.global_batch_size
+    dataset = dataset.batch(
+        per_replica_batch_size, drop_remainder=self._params.is_training)
+    return dataset
+
+  def load(self, input_context: Optional[tf.distribute.InputContext] = None):
+    """Returns a tf.dataset.Dataset."""
+    reader = input_reader.InputReader(
+        params=self._params,
+        decoder_fn=None,
+        transform_and_batch_fn=self._transform_and_batch_fn)
+    return reader.read(input_context)