Seeed-Studio
diff --git a/‎sscma/datasets/transforms/formatting.py‎
Lines changed: 65 additions & 63 deletions b/‎sscma/datasets/transforms/formatting.py‎
Lines changed: 65 additions & 63 deletions
@@ -39,8 +39,6 @@ def to_tensor(data):
             "`Sequence`, `int` and `float`"
         )
 
-
-@TRANSFORMS.register_module()
 class PackDetInputs(BaseTransform):
     """Pack the inputs data for the detection / semantic segmentation /
     panoptic segmentation.
@@ -70,54 +68,57 @@ class PackDetInputs(BaseTransform):
             Default: ``('img_id', 'img_path', 'ori_shape', 'img_shape',
             'scale_factor', 'flip', 'flip_direction')``
     """
-
     mapping_table = {
-        "gt_bboxes": "bboxes",
-        "gt_bboxes_labels": "labels",
-        "gt_masks": "masks",
+        'gt_bboxes': 'bboxes',
+        'gt_bboxes_labels': 'labels',
+        'gt_masks': 'masks',
+        'gt_keypoints': 'keypoints',
+        'gt_keypoints_visible': 'keypoints_visible'
     }
 
-    def __init__(
-        self,
-        meta_keys=(
-            "img_id",
-            "img_path",
-            "ori_shape",
-            "img_shape",
-            "scale_factor",
-            "flip",
-            "flip_direction",
-        ),
-    ):
+    def __init__(self,
+                 meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                            'scale_factor', 'flip', 'flip_direction')):
         self.meta_keys = meta_keys
 
+
     def transform(self, results: dict) -> dict:
         """Method to pack the input data.
-
         Args:
             results (dict): Result dict from the data pipeline.
-
         Returns:
             dict:
-
             - 'inputs' (obj:`torch.Tensor`): The forward data of models.
             - 'data_sample' (obj:`DetDataSample`): The annotation info of the
                 sample.
         """
         packed_results = dict()
+        if 'img' in results:
+            img = results['img']
+            if len(img.shape) < 3:
+                img = np.expand_dims(img, -1)
+            # To improve the computational speed by by 3-5 times, apply:
+            # If image is not contiguous, use
+            # `numpy.transpose()` followed by `numpy.ascontiguousarray()`
+            # If image is already contiguous, use
+            # `torch.permute()` followed by `torch.contiguous()`
+            # Refer to https://github.com/open-mmlab/mmdetection/pull/9533
+            # for more details
+            if not img.flags.c_contiguous:
+                img = np.ascontiguousarray(img.transpose(2, 0, 1))
+                img = to_tensor(img)
+            else:
+                img = to_tensor(img).permute(2, 0, 1).contiguous()
 
-        if not results.get("torch", False):
-            results["img"] = V2F.to_dtype(
-                V2F.to_image(results["img"].copy()), torch.uint8, scale=True
-            )
-            results["torch"] = True
-
-        if "img" in results:
-            packed_results["inputs"] = results["img"]
+            packed_results['inputs'] = img
 
-        if "gt_ignore_flags" in results:
-            valid_idx = np.where(results["gt_ignore_flags"] == 0)[0]
-            ignore_idx = np.where(results["gt_ignore_flags"] == 1)[0]
+        if 'gt_ignore_flags' in results:
+            valid_idx = np.where(results['gt_ignore_flags'] == 0)[0]
+            ignore_idx = np.where(results['gt_ignore_flags'] == 1)[0]
+        if 'gt_keypoints' in results:
+            results['gt_keypoints_visible'] = results[
+                'gt_keypoints'].keypoints_visible
+            results['gt_keypoints'] = results['gt_keypoints'].keypoints
 
         data_sample = DetDataSample()
         instance_data = InstanceData()
@@ -126,59 +127,60 @@ def transform(self, results: dict) -> dict:
         for key in self.mapping_table.keys():
             if key not in results:
                 continue
-            if key == "gt_masks" or isinstance(results[key], BaseBoxes):
-                if "gt_ignore_flags" in results:
-                    instance_data[self.mapping_table[key]] = results[key][valid_idx]
-                    ignore_instance_data[self.mapping_table[key]] = results[key][
-                        ignore_idx
-                    ]
+            if key == 'gt_masks' or isinstance(results[key], BaseBoxes):
+                if 'gt_ignore_flags' in results:
+                    instance_data[
+                        self.mapping_table[key]] = results[key][valid_idx]
+                    ignore_instance_data[
+                        self.mapping_table[key]] = results[key][ignore_idx]
                 else:
                     instance_data[self.mapping_table[key]] = results[key]
             else:
-                if "gt_ignore_flags" in results:
+                if 'gt_ignore_flags' in results:
                     instance_data[self.mapping_table[key]] = to_tensor(
-                        results[key][valid_idx]
-                    )
+                        results[key][valid_idx])
                     ignore_instance_data[self.mapping_table[key]] = to_tensor(
-                        results[key][ignore_idx]
-                    )
+                        results[key][ignore_idx])
                 else:
-                    instance_data[self.mapping_table[key]] = to_tensor(results[key])
+                    instance_data[self.mapping_table[key]] = to_tensor(
+                        results[key])
         data_sample.gt_instances = instance_data
         data_sample.ignored_instances = ignore_instance_data
 
-        if "proposals" in results:
-            proposals = InstanceData(
-                bboxes=to_tensor(results["proposals"]),
-                scores=to_tensor(results["proposals_scores"]),
-            )
-            data_sample.proposals = proposals
-
-        if "gt_seg_map" in results:
+        if 'gt_seg_map' in results:
             gt_sem_seg_data = dict(
-                sem_seg=to_tensor(results["gt_seg_map"][None, ...].copy())
-            )
-            gt_sem_seg_data = PixelData(**gt_sem_seg_data)
-            if "ignore_index" in results:
-                metainfo = dict(ignore_index=results["ignore_index"])
-                gt_sem_seg_data.set_metainfo(metainfo)
-            data_sample.gt_sem_seg = gt_sem_seg_data
+                sem_seg=to_tensor(results['gt_seg_map'][None, ...].copy()))
+            data_sample.gt_sem_seg = PixelData(**gt_sem_seg_data)
+
+        # In order to unify the support for the overlap mask annotations
+        # i.e. mask overlap annotations in (h,w) format,
+        # we use the gt_panoptic_seg field to unify the modeling
+        if 'gt_panoptic_seg' in results:
+            data_sample.gt_panoptic_seg = PixelData(
+                pan_seg=results['gt_panoptic_seg'])
 
         img_meta = {}
         for key in self.meta_keys:
-            if key in results:
-                img_meta[key] = results[key]
+            assert key in results, f'`{key}` is not found in `results`, ' \
+                                   f'the valid keys are {list(results)}.'
+            img_meta[key] = results[key]
+
         data_sample.set_metainfo(img_meta)
-        packed_results["data_samples"] = data_sample
+        packed_results['data_samples'] = data_sample
 
         return packed_results
 
+
     def __repr__(self) -> str:
         repr_str = self.__class__.__name__
-        repr_str += f"(meta_keys={self.meta_keys})"
+        repr_str += f'(meta_keys={self.meta_keys})'
         return repr_str
 
 
+
+
+
+
 class PackInputs(BaseTransform):
     """Pack the inputs data.