Update resize_bounding_boxes for rotated boxes

AntoineSimoulin · AntoineSimoulin · commit 782f406c9fe8 · 2025-06-03T19:59:39.000-07:00
Test Plan:
```bash
pytest test/test_transforms_v2.py -vvv -k "TestResize and test_kernel_bounding_boxes"

pytest test/test_transforms_v2.py -vvv -k "TestResize and test_bounding_boxes_correctness"
````
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -49,7 +49,7 @@
 from torchvision.transforms.functional import pil_modes_mapping, to_pil_image
 from torchvision.transforms.v2 import functional as F
 from torchvision.transforms.v2._utils import check_type, is_pure_tensor
-from torchvision.transforms.v2.functional._geometry import _get_perspective_coeffs
+from torchvision.transforms.v2.functional._geometry import _get_perspective_coeffs, _parallelogram_to_bounding_boxes
 from torchvision.transforms.v2.functional._utils import _get_kernel, _register_kernel_internal
 
 
@@ -560,7 +560,9 @@ def affine_bounding_boxes(bounding_boxes):
     )
 
 
-def reference_affine_rotated_bounding_boxes_helper(bounding_boxes, *, affine_matrix, new_canvas_size=None, clamp=True):
+def reference_affine_rotated_bounding_boxes_helper(
+    bounding_boxes, *, affine_matrix, new_canvas_size=None, clamp=True, flip=False
+):
     format = bounding_boxes.format
     canvas_size = new_canvas_size or bounding_boxes.canvas_size
 
@@ -588,17 +590,20 @@ def affine_rotated_bounding_boxes(bounding_boxes):
         transformed_points = np.matmul(points, affine_matrix.astype(points.dtype).T)
         output = torch.tensor(
             [
-                float(transformed_points[1, 0]),
-                float(transformed_points[1, 1]),
                 float(transformed_points[0, 0]),
                 float(transformed_points[0, 1]),
-                float(transformed_points[3, 0]),
-                float(transformed_points[3, 1]),
+                float(transformed_points[1, 0]),
+                float(transformed_points[1, 1]),
                 float(transformed_points[2, 0]),
                 float(transformed_points[2, 1]),
+                float(transformed_points[3, 0]),
+                float(transformed_points[3, 1]),
             ]
         )
 
+        output = output[[2, 3, 0, 1, 6, 7, 4, 5]] if flip else output
+        output = _parallelogram_to_bounding_boxes(output)
+
         output = F.convert_bounding_box_format(
             output, old_format=tv_tensors.BoundingBoxFormat.XYXYXYXY, new_format=format
         )
@@ -707,7 +712,7 @@ def test_kernel_image(self, size, interpolation, use_max_size, antialias, dtype,
             check_scripted_vs_eager=not isinstance(size, int),
         )
 
-    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
+    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
     @pytest.mark.parametrize("size", OUTPUT_SIZES)
     @pytest.mark.parametrize("use_max_size", [True, False])
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@@ -725,6 +730,7 @@ def test_kernel_bounding_boxes(self, format, size, use_max_size, dtype, device):
         check_kernel(
             F.resize_bounding_boxes,
             bounding_boxes,
+            format=format,
             canvas_size=bounding_boxes.canvas_size,
             size=size,
             **max_size_kwarg,
@@ -816,7 +822,7 @@ def test_image_correctness(self, size, interpolation, use_max_size, fn):
         self._check_output_size(image, actual, size=size, **max_size_kwarg)
         torch.testing.assert_close(actual, expected, atol=1, rtol=0)
 
-    def _reference_resize_bounding_boxes(self, bounding_boxes, *, size, max_size=None):
+    def _reference_resize_bounding_boxes(self, bounding_boxes, format, *, size, max_size=None):
         old_height, old_width = bounding_boxes.canvas_size
         new_height, new_width = self._compute_output_size(
             input_size=bounding_boxes.canvas_size, size=size, max_size=max_size
@@ -832,13 +838,19 @@ def _reference_resize_bounding_boxes(self, bounding_boxes, *, size, max_size=Non
             ],
         )
 
-        return reference_affine_bounding_boxes_helper(
+        helper = (
+            reference_affine_rotated_bounding_boxes_helper
+            if tv_tensors.is_rotated_bounding_format(bounding_boxes.format)
+            else reference_affine_bounding_boxes_helper
+        )
+
+        return helper(
             bounding_boxes,
             affine_matrix=affine_matrix,
             new_canvas_size=(new_height, new_width),
         )
 
-    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
+    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
     @pytest.mark.parametrize("size", OUTPUT_SIZES)
     @pytest.mark.parametrize("use_max_size", [True, False])
     @pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)])
@@ -849,7 +861,7 @@ def test_bounding_boxes_correctness(self, format, size, use_max_size, fn):
         bounding_boxes = make_bounding_boxes(format=format, canvas_size=self.INPUT_SIZE)
 
         actual = fn(bounding_boxes, size=size, **max_size_kwarg)
-        expected = self._reference_resize_bounding_boxes(bounding_boxes, size=size, **max_size_kwarg)
+        expected = self._reference_resize_bounding_boxes(bounding_boxes, format=format, size=size, **max_size_kwarg)
 
         self._check_output_size(bounding_boxes, actual, size=size, **max_size_kwarg)
         torch.testing.assert_close(actual, expected)
@@ -1152,7 +1164,7 @@ def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes: tv_tensors.B
         )
 
         helper = (
-            reference_affine_rotated_bounding_boxes_helper
+            functools.partial(reference_affine_rotated_bounding_boxes_helper, flip=True)
             if tv_tensors.is_rotated_bounding_format(bounding_boxes.format)
             else reference_affine_bounding_boxes_helper
         )
@@ -1607,7 +1619,7 @@ def _reference_vertical_flip_bounding_boxes(self, bounding_boxes: tv_tensors.Bou
         )
 
         helper = (
-            reference_affine_rotated_bounding_boxes_helper
+            functools.partial(reference_affine_rotated_bounding_boxes_helper, flip=True)
             if tv_tensors.is_rotated_bounding_format(bounding_boxes.format)
             else reference_affine_bounding_boxes_helper
         )
diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py
@@ -381,8 +381,53 @@ def _resize_mask_dispatch(
     return tv_tensors.wrap(output, like=inpt)
 
 
+def _parallelogram_to_bounding_boxes(parallelogram: torch.Tensor, inplace: bool = False) -> torch.Tensor:
+    """
+    Convert a parallelogram to a rectangle while keeping the points (x1, y1) and (x3, y3) unchanged.
+
+    This function transforms a parallelogram represented by 8 coordinates (4 points) into a rectangle.
+    The first point (x1, y1) and the third point (x3, y3) of the parallelogram remain fixed,
+    while the second and fourth points are adjusted to form a proper rectangle.
+
+    Args:
+        parallelogram (torch.Tensor): Tensor of shape (..., 8) containing coordinates of parallelograms.
+                                     Format is [x1, y1, x2, y2, x3, y3, x4, y4].
+        inplace (bool, optional): If True, performs operation in-place. Default is False.
+
+    Returns:
+        torch.Tensor: Tensor of same shape as input containing the rectangle coordinates.
+                     The output maintains the same dtype as the input.
+    """
+    if not inplace:
+        parallelogram = parallelogram.clone()
+
+    dtype = parallelogram.dtype
+    if not torch.is_floating_point(parallelogram):
+        parallelogram = parallelogram.float()
+
+    # Calculate diagonal vector from first to third point
+    dx = parallelogram[..., 4] - parallelogram[..., 0]
+    dy = parallelogram[..., 5] - parallelogram[..., 1]
+    diag = torch.sqrt(dx**2 + dy**2)
+
+    # Calculate rotation angle in radians
+    r_rad = torch.atan2(parallelogram[..., 1] - parallelogram[..., 3], parallelogram[..., 2] - parallelogram[..., 0])
+    cos, sin = torch.cos(r_rad), torch.sin(r_rad)
+
+    # Calculate width using the angle between diagonal and rotation
+    w = diag * torch.abs(torch.sin(torch.atan2(dx, dy) - r_rad))
+
+    # Update coordinates to form a rectangle
+    parallelogram[..., 2] = parallelogram[..., 0] + w * cos
+    parallelogram[..., 3] = parallelogram[..., 1] - w * sin
+    parallelogram[..., 6] = parallelogram[..., 4] - w * cos
+    parallelogram[..., 7] = parallelogram[..., 5] + w * sin
+    return parallelogram.to(dtype)
+
+
 def resize_bounding_boxes(
     bounding_boxes: torch.Tensor,
+    format: tv_tensors.BoundingBoxFormat,
     canvas_size: tuple[int, int],
     size: Optional[list[int]],
     max_size: Optional[int] = None,
@@ -395,19 +440,42 @@ def resize_bounding_boxes(
 
     w_ratio = new_width / old_width
     h_ratio = new_height / old_height
-    ratios = torch.tensor([w_ratio, h_ratio, w_ratio, h_ratio], device=bounding_boxes.device)
-    return (
-        bounding_boxes.mul(ratios).to(bounding_boxes.dtype),
-        (new_height, new_width),
-    )
+    if tv_tensors.is_rotated_bounding_format(format):
+        original_shape = bounding_boxes.shape
+        xyxyxyxy_boxes = convert_bounding_box_format(
+            bounding_boxes, old_format=format, new_format=tv_tensors.BoundingBoxFormat.XYXYXYXY, inplace=False
+        ).reshape(-1, 8)
+
+        ratios = torch.tensor(
+            [w_ratio, h_ratio, w_ratio, h_ratio, w_ratio, h_ratio, w_ratio, h_ratio], device=bounding_boxes.device
+        )
+        transformed_points = xyxyxyxy_boxes.mul(ratios)
+        out_bboxes = _parallelogram_to_bounding_boxes(transformed_points)
+        return (
+            convert_bounding_box_format(
+                out_bboxes,
+                old_format=tv_tensors.BoundingBoxFormat.XYXYXYXY,
+                new_format=format,
+                inplace=False,
+            )
+            .to(bounding_boxes.dtype)
+            .reshape(original_shape),
+            (new_height, new_width),
+        )
+    else:
+        ratios = torch.tensor([w_ratio, h_ratio, w_ratio, h_ratio], device=bounding_boxes.device)
+        return (
+            bounding_boxes.mul(ratios).to(bounding_boxes.dtype),
+            (new_height, new_width),
+        )
 
 
 @_register_kernel_internal(resize, tv_tensors.BoundingBoxes, tv_tensor_wrapper=False)
 def _resize_bounding_boxes_dispatch(
     inpt: tv_tensors.BoundingBoxes, size: Optional[list[int]], max_size: Optional[int] = None, **kwargs: Any
 ) -> tv_tensors.BoundingBoxes:
     output, canvas_size = resize_bounding_boxes(
-        inpt.as_subclass(torch.Tensor), inpt.canvas_size, size, max_size=max_size
+        inpt.as_subclass(torch.Tensor), format=inpt.format, canvas_size=inpt.canvas_size, size=size, max_size=max_size
     )
     return tv_tensors.wrap(output, like=inpt, canvas_size=canvas_size)
 
@@ -2204,7 +2272,7 @@ def resized_crop_bounding_boxes(
     size: list[int],
 ) -> tuple[torch.Tensor, tuple[int, int]]:
     bounding_boxes, canvas_size = crop_bounding_boxes(bounding_boxes, format, top, left, height, width)
-    return resize_bounding_boxes(bounding_boxes, canvas_size=canvas_size, size=size)
+    return resize_bounding_boxes(bounding_boxes, format=format, canvas_size=canvas_size, size=size)
 
 
 @_register_kernel_internal(resized_crop, tv_tensors.BoundingBoxes, tv_tensor_wrapper=False)