google-research-datasets
diff --git a/‎sanpo_dataset/lib/common.py‎
Lines changed: 55 additions & 7 deletions b/‎sanpo_dataset/lib/common.py‎
Lines changed: 55 additions & 7 deletions
diff --git a/‎sanpo_dataset/lib/common_test.py‎
Lines changed: 21 additions & 8 deletions b/‎sanpo_dataset/lib/common_test.py‎
Lines changed: 21 additions & 8 deletions
diff --git a/‎sanpo_dataset/lib/tensorflow_dataset.py‎
Lines changed: 15 additions & 7 deletions b/‎sanpo_dataset/lib/tensorflow_dataset.py‎
Lines changed: 15 additions & 7 deletions
@@ -21,7 +21,7 @@
 import json
 import os
 import pathlib
-from typing import Any, Iterator, List, Mapping, Optional, Tuple, Union
+from typing import Any, Dict, Iterator, List, Mapping, Optional, Tuple, Union
 
 import numpy as np
 
@@ -48,6 +48,9 @@
 IMAGE_MASK_FILENAME_EXTENSION = '.png'
 DEPTH_FILENAME_EXTENSION = '.npz'
 CAMERA_POSES_CSV_FILENAME = 'camera_poses.csv'
+FRAME_SEGMENTATION_ANNOTATION_TYPE_FILENAME = (
+    'frame_segmentation_annotation_type.json'
+)
 
 
 FEATURE_SESSION_TYPE = 'session_type'
@@ -56,6 +59,7 @@
 FEATURE_IMAGE_RIGHT = 'image_right'
 FEATURE_METRIC_DEPTH_LABEL = 'metric_depth'
 FEATURE_HAS_METRIC_DEPTH_LABEL = 'has_metric_depth'
+FEATURE_SEGMENTATION_ANNOTATION_TYPE = 'segmentation_annotation_type'
 FEATURE_PANOPTIC_MASK_LABEL = 'panoptic_label'
 FEATURE_HAS_PANOPTIC_MASK_LABEL = 'has_panoptic_label'
 FEATURE_SEMANTIC_LABEL = 'semantic_label'
@@ -102,7 +106,6 @@ def wrapped_path(path: Any) -> Any:
 
 
 class DatasetViewMode(enum.Enum):
-
   """Configures which data to include in each sample.
 
   STEREO_VIEW_FRAME_MODE: Stereo view in frame mode. Each sample contains both
@@ -433,10 +436,42 @@ def has_segmentation_annotation(self) -> bool:
         return True
     return False
 
-  def lens_names(self, sensor_name: str) -> List[str]:
+  def segmentation_annotation_type(
+      self, input_sensor_name: str
+  ) -> Optional[Dict[int, str]]:
+    """Returns segmentation annotation type for the given sensor."""
+    # Computed once.
+    if not hasattr(self, '_sensor_frame_segmentation_annotation_type'):
+      self._sensor_frame_segmentation_annotation_type = {}
+      for sensor_name in self.sensor_names:
+        frame_segmentation_annotation_filepath = (
+            self.base_path
+            / sensor_name
+            / LEFT_LENS_NAME
+            / FRAME_SEGMENTATION_ANNOTATION_TYPE_FILENAME
+        )
+        if frame_segmentation_annotation_filepath.exists():
+          with wrapped_open(
+              frame_segmentation_annotation_filepath, 'r'
+          ) as fileptr:
+            frame_segmentation_annotation_type = json.load(fileptr)
+            self._sensor_frame_segmentation_annotation_type[sensor_name] = {}
+            for (
+                frame_num_str,
+                annotation_type,
+            ) in frame_segmentation_annotation_type.items():
+              self._sensor_frame_segmentation_annotation_type[sensor_name][
+                  int(frame_num_str)
+              ] = annotation_type
+        else:
+          self._sensor_frame_segmentation_annotation_type[sensor_name] = None
+
+    return self._sensor_frame_segmentation_annotation_type[input_sensor_name]
+
+  def lens_names(self, input_sensor_name: str) -> List[str]:
     """Returns lens names in the session's sensor."""
 
-    # Just compute once.
+    # Computed once.
     if not hasattr(self, '_sensor_lens_names'):
       self._sensor_lens_names = {}
       for sensor_name in self.sensor_names:
@@ -448,10 +483,10 @@ def lens_names(self, sensor_name: str) -> List[str]:
         lens_names.sort()
         self._sensor_lens_names[sensor_name] = lens_names
 
-    return self._sensor_lens_names[sensor_name]
+    return self._sensor_lens_names[input_sensor_name]
 
   def camera_poses(
-      self, sensor_name: str
+      self, input_sensor_name: str
   ) -> List[Mapping[str, Union[bool, np.ndarray]]]:
     """Returns camera poses corresponding the session's sensor."""
 
@@ -465,7 +500,7 @@ def camera_poses(
             csv_file
         )
 
-    return self._sensor_camera_poses[sensor_name]
+    return self._sensor_camera_poses[input_sensor_name]
 
   def camera_intrinsics(
       self, sensor_name: str, lens_name: str
@@ -657,6 +692,9 @@ def _itersamples(
               else ''
           )
           sample[FEATURE_HAS_PANOPTIC_MASK_LABEL] = segmentation_mask_exists
+          sample[FEATURE_SEGMENTATION_ANNOTATION_TYPE] = (
+              ex.segmentation_annotation_type
+          )
 
         if self.config.feature_metric_depth_zed.to_include():
           metric_zed_depth_filename = ex.metric_depth_zed_filename(
@@ -714,6 +752,16 @@ def has_metric_depth_zed(self):
   def has_segmentation_mask(self):
     return self.segmentation_mask_filename(LEFT_LENS_NAME).exists()
 
+  @functools.cached_property
+  def segmentation_annotation_type(self) -> str:
+    segmentation_annotation_type = self.session.segmentation_annotation_type(
+        self.sensor_name
+    )
+    if segmentation_annotation_type is None:
+      return 'NA'
+    else:
+      return segmentation_annotation_type[self.frame_num]
+
   @property
   def has_camera_pose(self):
     # all examples have some camera pose
 
@@ -23,9 +23,11 @@
 
 FLAGS = flags.FLAGS
 
-_REAL_SESSIONS_PATH = 'third_party/py/sanpo_dataset/lib/testdata/sanpo-real'
+_REAL_SESSIONS_PATH = (
+    'sanpo_dataset/sanpo_dataset/lib/testdata/sanpo-real'
+)
 _SYNTHETIC_SESSIONS_PATH = (
-    'third_party/py/sanpo_dataset/lib/testdata/sanpo-synthetic'
+    'sanpo_dataset/sanpo_dataset/lib/testdata/sanpo-synthetic'
 )
 _REAL_SESSION_NAME = 'real_session'
 _SYNTHETIC_SESSION_NAME = 'synthetic_session'
@@ -45,19 +47,17 @@ def setUp(self):
     super().setUp()
 
     self.real_sessions_base_dir = os.path.join(
-        FLAGS.test_srcdir, 'goo''gle3', _REAL_SESSIONS_PATH
+        FLAGS.test_srcdir, _REAL_SESSIONS_PATH
     )
     self.real_session_dir = os.path.join(
-        FLAGS.test_srcdir, 'goo''gle3', _REAL_SESSIONS_PATH, _REAL_SESSION_NAME
+        FLAGS.test_srcdir, _REAL_SESSIONS_PATH, _REAL_SESSION_NAME
     )
     self.synthetic_sessions_base_dir = os.path.join(
         FLAGS.test_srcdir,
-        'goo''gle3',
         _SYNTHETIC_SESSIONS_PATH,
     )
     self.synthetic_session_dir = os.path.join(
         FLAGS.test_srcdir,
-        'goo''gle3',
         _SYNTHETIC_SESSIONS_PATH,
         _SYNTHETIC_SESSION_NAME,
     )
@@ -133,6 +133,15 @@ def test_sanpo_real_session(self):
               else _N_REAL_CAMERA_HEAD_FRAMES
           ),
       )
+      segmentation_annotation_type = sanpo_session.segmentation_annotation_type(
+          sensor_name
+      )
+      if sensor_name == 'camera_chest':
+        # This session->sensor_name has 73 frames. There must be annotation type
+        # for each of them.
+        self.assertLen(segmentation_annotation_type, 73)
+      else:
+        self.assertIsNone(segmentation_annotation_type)
 
   def test_frame_example(self):
     sanpo_session = common.SanpoSession(
@@ -147,6 +156,9 @@ def test_frame_example(self):
       self.assertTrue(frame_example.has_right_lens)
       if sensor_name == 'camera_chest':
         self.assertTrue(frame_example.has_segmentation_mask)
+        self.assertEqual(
+            frame_example.segmentation_annotation_type, 'HUMAN_ANNOTATED'
+        )
       else:
         self.assertFalse(frame_example.has_segmentation_mask)
 
@@ -164,6 +176,7 @@ def test_synthetic_frame_example(self):
       self.assertFalse(frame_example.has_metric_depth_zed)
       self.assertFalse(frame_example.has_right_lens)
       self.assertTrue(frame_example.has_segmentation_mask)
+      self.assertEqual(frame_example.segmentation_annotation_type, 'SYNTHETIC')
 
   def test_sanpo_session_list(self):
     session_ids_file = tempfile.mktemp()
@@ -197,7 +210,7 @@ def test_all_frame_iter_samples_panoptic_frame_mode(self):
     )
     count = 0
     for example in sanpo_session.all_frame_itersamples():
-      self.assertLen(example, 7)
+      self.assertLen(example, 8)
       count += 1
     self.assertEqual(count, _N_REAL_CAMERA_CHEST_FRAMES)
 
@@ -210,7 +223,7 @@ def test_all_frame_iter_samples_all_optional_frame_mode(self):
     sanpo_session = common.SanpoSession(self.real_session_dir, config)
     count = 0
     for example in sanpo_session.all_frame_itersamples():
-      self.assertLen(example, 14)
+      self.assertLen(example, 15)
       count += 1
     self.assertEqual(
         count, _N_REAL_CAMERA_CHEST_FRAMES + _N_REAL_CAMERA_HEAD_FRAMES
 
@@ -22,6 +22,7 @@
 import pathlib
 import random
 from typing import Any, Iterator, Mapping, Optional, Tuple, Union
+import warnings
 
 import numpy as np
 from sanpo_dataset.lib import common
@@ -60,16 +61,15 @@ def __init__(
             f'[{target_h}, {target_w}] which looks like [width,height].'
         )
       if abs(target_w * 9 / 16 - target_h) > 1:
-        raise ValueError(
+        warnings.warn(
             f'The target shape [{target_h},{target_w}] aspect ratio must be'
-            f' 16:9. Consider setting a target_shape of either [{target_h},'
-            f' {int(target_h*16/9)}] or [{int(target_w*9/16)}, {target_w}],'
-            ' which would preserve the image aspect ratio.\n\nSANPO does not'
-            ' perform cropping or color augmentation for you because'
-            ' preprocessing strategies can vary by application.'
+            ' 16:9 or else camera intrinsics will be incorrect at the ratio'
+            ' you requested.'
             # TODO(kwilber): add a crop tool and uncomment the below lines
             # f'To crop the image, you can use the `common.crop_*` '
             # f'family of functions which properly adjust camera intrinsics.'
+            ,
+            UserWarning,
         )
 
     # TODO(kwilber): Verify the config.
@@ -177,6 +177,9 @@ def _get_tensor_signature(self) -> Mapping[str, tf.TensorSpec]:
       signature[common.FEATURE_HAS_PANOPTIC_MASK_LABEL] = tf.TensorSpec(
           shape=(), dtype=tf.bool
       )
+      signature[common.FEATURE_SEGMENTATION_ANNOTATION_TYPE] = tf.TensorSpec(
+          shape=(), dtype=tf.string
+      )
 
     if self.builder_config.feature_camera_pose.to_include():
       signature[common.FEATURE_TRACKING_STATE] = tf.TensorSpec(
@@ -202,7 +205,9 @@ def _maybe_resize(
       resize_method = tf.image.ResizeMethod.NEAREST_NEIGHBOR
     else:
       resize_method = tf.image.ResizeMethod.BILINEAR
-    return tf.image.resize(tensor, [target_h, target_w], method=resize_method)
+    return tf.image.resize_with_pad(
+        tensor, target_h, target_w, method=resize_method
+    )
 
   def _tf_decode_image(self, filename: tf.Tensor) -> tf.Tensor:
     # can't use tf.io.decode_image here because
@@ -283,6 +288,9 @@ def _tf_load_panoptic_labels(
           common.FEATURE_HAS_PANOPTIC_MASK_LABEL: tf.convert_to_tensor(
               features[common.FEATURE_HAS_PANOPTIC_MASK_LABEL]
           ),
+          common.FEATURE_SEGMENTATION_ANNOTATION_TYPE: tf.convert_to_tensor(
+              features[common.FEATURE_SEGMENTATION_ANNOTATION_TYPE]
+          ),
       }
 
     return {}