Address bbox to keypoint conversion

NicolasHug · NicolasHug · commit 0e1caea58fda · 2025-06-05T11:28:30.000+01:00
diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst
@@ -459,6 +459,7 @@ functionals
     v2.functional.to_pil_image
     v2.functional.to_dtype
     v2.functional.convert_bounding_box_format
+    v2.functional.convert_bounding_boxes_to_keypoints
 
 
 Deprecated
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -6008,7 +6008,9 @@ def test_transform(self, make_input, dtype, device):
                 "will degenerate to that anyway."
             )
 
-        torch.manual_seed(1)  # TODOKP why is this needed now??
+        # TODO needed to add seed after KeyPoints PR, not sure why? failure
+        # wasn't really significant anyway.
+        torch.manual_seed(1)
         check_transform(
             transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.25),
             make_input(dtype=dtype, device=device),
@@ -7194,36 +7196,35 @@ def test_no_valid_input(self, query):
         with pytest.raises(TypeError, match="No image"):
             query(["blah"])
 
-    # TODOKP this is tested here in TestUtils but defined in meta
-    @pytest.mark.parametrize(
-        "boxes",
-        [
-            tv_tensors.BoundingBoxes(torch.tensor([[1.0, 1.0, 2.0, 2.0]]), format="XYXY", canvas_size=(4, 4)),
-            tv_tensors.BoundingBoxes(torch.tensor([[1.0, 1.0, 1.0, 1.0]]), format="XYWH", canvas_size=(4, 4)),
-            tv_tensors.BoundingBoxes(torch.tensor([[1.5, 1.5, 1.0, 1.0]]), format="CXCYWH", canvas_size=(4, 4)),
-            tv_tensors.BoundingBoxes(torch.tensor([[1.5, 1.5, 1.0, 1.0, 45]]), format="CXCYWHR", canvas_size=(4, 4)),
-            tv_tensors.BoundingBoxes(torch.tensor([[1.0, 1.0, 1.0, 1.0, 45.0]]), format="XYWHR", canvas_size=(4, 4)),
-            tv_tensors.BoundingBoxes(
-                torch.tensor([[1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0]]), format="XYXYXYXY", canvas_size=(4, 4)
-            ),
-        ],
-    )
-    def test_convert_bounding_boxes_to_points(self, boxes: tv_tensors.BoundingBoxes):
-        kp = F.convert_bounding_boxes_to_points(boxes)
-        assert kp.shape == (boxes.shape[0], 4, 2)
-        assert kp.dtype == boxes.dtype
-
-        # We manually convert the kp back into a BoundingBoxes, and convert that
-        # bbox back into the original `boxes` format to compare against it.
-        if F._meta.is_rotated_bounding_box_format(boxes.format):
-            reconverted = kp.reshape(-1, 8)
-            intermediate_format = tv_tensors.BoundingBoxFormat.XYXYXYXY
-        else:
-            reconverted = torch.cat([kp[..., 0, :], kp[..., 2, :]], dim=-1)
-            intermediate_format = tv_tensors.BoundingBoxFormat.XYXY
+@pytest.mark.parametrize(
+    "boxes",
+    [
+        tv_tensors.BoundingBoxes(torch.tensor([[1.0, 1.0, 2.0, 2.0]]), format="XYXY", canvas_size=(4, 4)),
+        tv_tensors.BoundingBoxes(torch.tensor([[1.0, 1.0, 1.0, 1.0]]), format="XYWH", canvas_size=(4, 4)),
+        tv_tensors.BoundingBoxes(torch.tensor([[1.5, 1.5, 1.0, 1.0]]), format="CXCYWH", canvas_size=(4, 4)),
+        tv_tensors.BoundingBoxes(torch.tensor([[1.5, 1.5, 1.0, 1.0, 45]]), format="CXCYWHR", canvas_size=(4, 4)),
+        tv_tensors.BoundingBoxes(torch.tensor([[1.0, 1.0, 1.0, 1.0, 45.0]]), format="XYWHR", canvas_size=(4, 4)),
+        tv_tensors.BoundingBoxes(
+            torch.tensor([[1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0]]), format="XYXYXYXY", canvas_size=(4, 4)
+        ),
+    ],
+)
+def test_convert_bounding_boxes_to_keypoints(boxes: tv_tensors.BoundingBoxes):
+    kp = F.convert_bounding_boxes_to_keypoints(boxes)
+    assert kp.shape == (boxes.shape[0], 4, 2)
+    assert kp.dtype == boxes.dtype
+
+    # We manually convert the kp back into a BoundingBoxes, and convert that
+    # bbox back into the original `boxes` format to compare against it.
+    if F._meta.is_rotated_bounding_box_format(boxes.format):
+        reconverted = kp.reshape(-1, 8)
+        intermediate_format = tv_tensors.BoundingBoxFormat.XYXYXYXY
+    else:
+        reconverted = torch.cat([kp[..., 0, :], kp[..., 2, :]], dim=-1)
+        intermediate_format = tv_tensors.BoundingBoxFormat.XYXY
 
-        reconverted_bbox = F.convert_bounding_box_format(
-            tv_tensors.BoundingBoxes(reconverted, format=intermediate_format, canvas_size=kp.canvas_size),
-            new_format=boxes.format,
-        )
-        assert_equal(reconverted_bbox, boxes, atol=1e-5, rtol=0)
+    reconverted_bbox = F.convert_bounding_box_format(
+        tv_tensors.BoundingBoxes(reconverted, format=intermediate_format, canvas_size=kp.canvas_size),
+        new_format=boxes.format,
+    )
+    assert_equal(reconverted_bbox, boxes, atol=1e-5, rtol=0)
diff --git a/torchvision/transforms/v2/functional/__init__.py b/torchvision/transforms/v2/functional/__init__.py
@@ -6,7 +6,7 @@
     clamp_bounding_boxes,
     clamp_keypoints,
     convert_bounding_box_format,
-    convert_bounding_boxes_to_points,  #TODOKP also needs docs
+    convert_bounding_boxes_to_keypoints,
     get_dimensions_image,
     get_dimensions_video,
     get_dimensions,
diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py
@@ -70,7 +70,7 @@ def horizontal_flip_keypoints(keypoints: torch.Tensor, canvas_size: tuple[int, i
     shape = keypoints.shape
     keypoints = keypoints.clone().reshape(-1, 2)
     keypoints[..., 0] = keypoints[..., 0].sub_(canvas_size[1]).neg_()
-    return keypoints.reshape(shape)
+    return clamp_keypoints(keypoints.reshape(shape), canvas_size=canvas_size)
 
 
 @_register_kernel_internal(horizontal_flip, tv_tensors.KeyPoints, tv_tensor_wrapper=False)
@@ -164,7 +164,7 @@ def vertical_flip_keypoints(keypoints: torch.Tensor, canvas_size: tuple[int, int
     shape = keypoints.shape
     keypoints = keypoints.clone().reshape(-1, 2)
     keypoints[..., 1] = keypoints[..., 1].sub_(canvas_size[0]).neg_()
-    return keypoints.reshape(shape)
+    return clamp_keypoints(keypoints.reshape(shape), canvas_size=canvas_size)
 
 
 def vertical_flip_bounding_boxes(
diff --git a/torchvision/transforms/v2/functional/_meta.py b/torchvision/transforms/v2/functional/_meta.py
@@ -189,40 +189,30 @@ def _xyxyxyxy_to_keypoints(bounding_boxes: torch.Tensor) -> torch.Tensor:
     return bounding_boxes[:, [[0, 1], [2, 3], [4, 5], [6, 7]]]
 
 
-# TODOKP Should this be in the box ops? Or in utils? rename points->keypoints.
-def convert_bounding_boxes_to_points(bounding_boxes: tv_tensors.BoundingBoxes) -> tv_tensors.KeyPoints:
+# Note: this doesn't have a corresponding transforms class.
+def convert_bounding_boxes_to_keypoints(bounding_boxes: tv_tensors.BoundingBoxes) -> tv_tensors.KeyPoints:
     """Convert a set of bounding boxes to its edge points.
 
-    .. note::
-
-        This handles rotated :class:`tv_tensors.BoundingBoxes` formats
-        by first converting them to XYXYXYXY format.
-
-        Due to floating-point approximation, this may not be an exact computation.
-
     Args:
         bounding_boxes (tv_tensors.BoundingBoxes): A set of ``N`` bounding boxes (of shape ``[N, 4]``)
 
     Returns:
         tv_tensors.KeyPoints: The edges, as a polygon of shape ``[N, 4, 2]``
     """
     if is_rotated_bounding_box_format(bounding_boxes.format):
-        # We are working on a rotated bounding box
-        bbox = _convert_bounding_box_format(
-            bounding_boxes.as_subclass(torch.Tensor),
-            old_format=bounding_boxes.format,
-            new_format=BoundingBoxFormat.XYXYXYXY,
-            inplace=False,
-        )
-        return tv_tensors.KeyPoints(_xyxyxyxy_to_keypoints(bbox), canvas_size=bounding_boxes.canvas_size)
+        intermediate_format = BoundingBoxFormat.XYXYXYXY
+        to_keypoints = _xyxyxyxy_to_keypoints
+    else:
+        intermediate_format = BoundingBoxFormat.XYXY
+        to_keypoints = _xyxy_to_keypoints
 
     bbox = _convert_bounding_box_format(
         bounding_boxes.as_subclass(torch.Tensor),
         old_format=bounding_boxes.format,
-        new_format=BoundingBoxFormat.XYXY,
+        new_format=intermediate_format,
         inplace=False,
     )
-    return tv_tensors.KeyPoints(_xyxy_to_keypoints(bbox), canvas_size=bounding_boxes.canvas_size)
+    return tv_tensors.KeyPoints(to_keypoints(bbox), canvas_size=bounding_boxes.canvas_size)
 
 
 def _cxcywhr_to_xywhr(cxcywhr: torch.Tensor, inplace: bool) -> torch.Tensor: