Improved convert_bounding_boxes_to_points to handle rotated bounding boxes and added tests for all formats

Alex-S-H-P · Alex-S-H-P · commit e99b82ae671a · 2025-05-05T21:59:34.000+02:00
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -6877,18 +6877,39 @@ def test_no_valid_input(self, query):
             query(["blah"])
 
     @pytest.mark.parametrize(
-        "boxes", [tv_tensors.BoundingBoxes(torch.tensor([[1, 1, 2, 2]]), format="XYXY", canvas_size=(4, 4))]
+        "boxes", [
+            tv_tensors.BoundingBoxes(torch.tensor([[1., 1., 2., 2.]]), format="XYXY", canvas_size=(4, 4)),  # [boxes0]
+            tv_tensors.BoundingBoxes(torch.tensor([[1., 1., 1., 1.]]), format="XYWH", canvas_size=(4, 4)),  # [boxes1]
+            tv_tensors.BoundingBoxes(torch.tensor([[1.5, 1.5, 1., 1.]]), format="CXCYWH", canvas_size=(4, 4)),  # [boxes2]
+            tv_tensors.BoundingBoxes(torch.tensor([[1.5, 1.5, 1., 1., 45]]), format="CXCYWHR", canvas_size=(4, 4)),  # [boxes3]
+            tv_tensors.BoundingBoxes(torch.tensor([[1., 1., 1., 1., 45.]]), format="XYWHR", canvas_size=(4, 4)),  # [boxes4]
+            tv_tensors.BoundingBoxes(torch.tensor([[1., 1., 1., 2., 2., 2., 2., 1.]]), format="XY" * 4, canvas_size=(4, 4)),  # [boxes5]
+        ]
     )
     def test_convert_bounding_boxes_to_points(self, boxes: tv_tensors.BoundingBoxes):
-        # TODO: this test can't handle rotated boxes yet
         kp = F.convert_bounding_boxes_to_points(boxes)
-        assert kp.shape == boxes.shape + (2,)
+        assert kp.shape == (boxes.shape[0], 4, 2)
         assert kp.dtype == boxes.dtype
         # kp is a list of A, B, C, D polygons.
-        # If we use A | C, we should get back the XYXY format of bounding box
-        reconverted = torch.cat([kp[..., 0, :], kp[..., 2, :]], dim=-1)
-        reconverted_bbox = F.convert_bounding_box_format(
-            tv_tensors.BoundingBoxes(reconverted, format=tv_tensors.BoundingBoxFormat.XYXY, canvas_size=kp.canvas_size),
-            new_format=boxes.format,
-        )
-        assert (reconverted_bbox == boxes).all(), f"Invalid reconversion : {reconverted_bbox}"
+
+        if F._meta.is_rotated_bounding_box_format(boxes.format):
+            # In the rotated case
+            # If we convert to XYXYXYXY format, we should get what we want.
+            reconverted = kp.reshape(-1, 8)
+            reconverted_bbox = F.convert_bounding_box_format(
+                tv_tensors.BoundingBoxes(reconverted, format=tv_tensors.BoundingBoxFormat.XYXYXYXY, canvas_size=kp.canvas_size),
+                new_format=boxes.format
+            )
+            assert ((reconverted_bbox - boxes).abs() < 1e-5).all(), (  # Rotational computations mean that we can't ensure exactitude.
+                f"Invalid reconversion :\n\tGot:  {reconverted_bbox}\n\tFrom: {boxes}\n\t"
+                f"Diff: {reconverted_bbox - boxes}"
+            )
+        else:
+            # In the unrotated case
+            # If we use A | C, we should get back the XYXY format of bounding box
+            reconverted = torch.cat([kp[..., 0, :], kp[..., 2, :]], dim=-1)
+            reconverted_bbox = F.convert_bounding_box_format(
+                tv_tensors.BoundingBoxes(reconverted, format=tv_tensors.BoundingBoxFormat.XYXY, canvas_size=kp.canvas_size),
+                new_format=boxes.format,
+            )
+            assert (reconverted_bbox == boxes).all(), f"Invalid reconversion :\n\tGot:  {reconverted_bbox}\n\tFrom: {boxes}"
diff --git a/torchvision/transforms/v2/functional/_meta.py b/torchvision/transforms/v2/functional/_meta.py
@@ -185,16 +185,38 @@ def _xyxy_to_keypoints(bounding_boxes: torch.Tensor) -> torch.Tensor:
     return bounding_boxes[:, [[0, 1], [2, 1], [2, 3], [0, 3]]]
 
 
+def _xyxyxyxy_to_keypoints(bounding_boxes: torch.Tensor) -> torch.Tensor:
+    return bounding_boxes[:, [[0, 1], [2, 3], [4, 5], [6, 7]]]
+
+
 def convert_bounding_boxes_to_points(bounding_boxes: tv_tensors.BoundingBoxes) -> tv_tensors.KeyPoints:
     """Converts a set of bounding boxes to its edge points.
 
+    .. note::
+
+        This handles rotated :class:`tv_tensors.BoundingBoxes` formats
+        by first converting them to XYXYXYXY format.
+         
+        Due to floating-point approximation, this may not be an exact computation.
+
     Args:
         bounding_boxes (tv_tensors.BoundingBoxes): A set of ``N`` bounding boxes (of shape ``[N, 4]``)
 
     Returns:
-        tv_tensors.KeyPoints: The edges, of shape ``[N, 4, 2]``
+        tv_tensors.KeyPoints: The edges, as a polygon of shape ``[N, 4, 2]``
     """
-    # TODO: support rotated BBOX
+    if is_rotated_bounding_box_format(bounding_boxes.format):
+        # We are working on a rotated bounding box
+        bbox = _convert_bounding_box_format(
+            bounding_boxes.as_subclass(torch.Tensor),
+            old_format=bounding_boxes.format,
+            new_format=BoundingBoxFormat.XYXYXYXY,
+            inplace=False,
+        )
+        return tv_tensors.KeyPoints(
+            _xyxyxyxy_to_keypoints(bbox), canvas_size=bounding_boxes.canvas_size
+        )
+
     bbox = _convert_bounding_box_format(
         bounding_boxes.as_subclass(torch.Tensor),
         old_format=bounding_boxes.format,