Add the test split to TAO dataset.

The TensorFlow Datasets Authors · The TensorFlow Datasets Authors · commit 2638dc260dbd · 2024-10-03T03:02:17.000-07:00
PiperOrigin-RevId: 681794893
diff --git a/tensorflow_datasets/video/tao/dummy_data/annotations-1.2/test_without_annotations.json b/tensorflow_datasets/video/tao/dummy_data/annotations-1.2/test_without_annotations.json
diff --git a/tensorflow_datasets/video/tao/tao.py b/tensorflow_datasets/video/tao/tao.py
@@ -101,6 +101,7 @@ def _maybe_prepare_manual_data(
   manually_downloaded_files = [
       '1_AVA_HACS_TRAIN_*.zip',
       '2_AVA_HACS_VAL_*.zip',
+      '3_AVA_HACS_TEST_*.zip',
   ]
   files = []
   for file in manually_downloaded_files:
@@ -282,7 +283,7 @@ class Tao(tfds.core.BeamBasedBuilder):
   ]
   VERSION = tfds.core.Version('1.0.0')
   RELEASE_NOTES = {
-      '1.0.0': 'Initial release.',
+      '1.1.0': 'Added test split.',
   }
 
   def _info(self) -> tfds.core.DatasetInfo:
@@ -336,6 +337,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     data = dl_manager.download_and_extract({
         'train': _VIDEO_URL + '1-TAO_TRAIN.zip',
         'val': _VIDEO_URL + '2-TAO_VAL.zip',
+        'test': _VIDEO_URL + '3-TAO_TEST.zip',
         'annotations': _ANNOTATIONS_URL,
     })
 
@@ -359,6 +361,14 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager):
             / 'validation.json',
             id_map=id_map,
         ),
+        tfds.Split.TEST: self._generate_examples(
+            data_path=data['test'],
+            manual_path=None,
+            annotations_path=data['annotations']
+            / 'annotations-1.2'
+            / 'test_without_annotations.json',
+            id_map=id_map,
+        ),
     }
 
   def _maybe_resize_video(self, frames_list):
diff --git a/tensorflow_datasets/video/tao/tao_test.py b/tensorflow_datasets/video/tao/tao_test.py
@@ -27,10 +27,12 @@ class TaoTest(tfds.testing.DatasetBuilderTestCase):
   SPLITS = {
       tfds.Split.TRAIN: 1,
       tfds.Split.VALIDATION: 1,
+      tfds.Split.TEST: 1,
   }
   DL_EXTRACT_RESULT = {
       'train': '',
       'val': '',
+      'test': '',
       'annotations': '',
   }
 
@@ -58,7 +60,8 @@ def _download_and_prepare_as_dataset(self, builder):
       splits = builder.as_dataset()
       train_ex = list(splits[tfds.Split.TRAIN])[0]
       val_ex = list(splits[tfds.Split.VALIDATION])[0]
-      for ex in [train_ex, val_ex]:
+      test_ex = list(splits[tfds.Split.TEST])[0]
+      for ex in [train_ex, val_ex, test_ex]:
         # There should be the same number of each of these; a number
         # per group of bboxes indicating which frame they correspond to.
         self.assertEqual(
@@ -69,10 +72,12 @@ def _download_and_prepare_as_dataset(self, builder):
       splits = builder.as_dataset()
       train_ex = list(splits[tfds.Split.TRAIN])[0]
       val_ex = list(splits[tfds.Split.VALIDATION])[0]
+      test_ex = list(splits[tfds.Split.TEST])[0]
       # NOTE: For real images, this will be a list of potentially a thousand or
       # more frames. For testing purposes we load a single dummy 10 X 10 image.
       self.assertEqual(train_ex['video'].shape, (1, 28, 42, 3))
       self.assertEqual(val_ex['video'].shape, (1, 28, 42, 3))
+      self.assertEqual(test_ex['video'].shape, (1, 28, 42, 3))
 
 
 if __name__ == '__main__':