Fix tests for int rotated boxes

AntoineSimoulin · AntoineSimoulin · commit c96676a9f5d1 · 2025-06-04T12:43:37.000-07:00
diff --git a/test/common_utils.py b/test/common_utils.py
@@ -417,6 +417,13 @@ def sample_position(values, max_value):
         format = tv_tensors.BoundingBoxFormat[format]
 
     dtype = dtype or torch.float32
+    int_dtype = dtype in (
+        torch.uint8,
+        torch.int8,
+        torch.int16,
+        torch.int32,
+        torch.int64,
+    )
 
     h, w = (torch.randint(1, s, (num_boxes,)) for s in canvas_size)
     y = sample_position(h, canvas_size[0])
@@ -443,17 +450,17 @@ def sample_position(values, max_value):
     elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
         r_rad = r * torch.pi / 180.0
         cos, sin = torch.cos(r_rad), torch.sin(r_rad)
-        x1, y1 = x, y
-        x2 = x1 + w * cos
-        y2 = y1 - w * sin
-        x3 = x2 + h * sin
-        y3 = y2 + h * cos
-        x4 = x1 + h * sin
-        y4 = y1 + h * cos
+        x1 = torch.round(x) if int_dtype else x
+        y1 = torch.round(y) if int_dtype else y
+        x2 = torch.round(x1 + w * cos) if int_dtype else x1 + w * cos
+        y2 = torch.round(y1 - w * sin) if int_dtype else y1 - w * sin
+        x3 = torch.round(x2 + h * sin) if int_dtype else x2 + h * sin
+        y3 = torch.round(y2 + h * cos) if int_dtype else y2 + h * cos
+        x4 = torch.round(x1 + h * sin) if int_dtype else x1 + h * sin
+        y4 = torch.round(y1 + h * cos) if int_dtype else y1 + h * cos
         parts = (x1, y1, x2, y2, x3, y3, x4, y4)
     else:
         raise ValueError(f"Format {format} is not supported")
-
     return tv_tensors.BoundingBoxes(
         torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, canvas_size=canvas_size
     )
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -5939,6 +5939,15 @@ def test_classification_preset(image_type, label_type, dataset_return_type, to_t
     assert out_label == label
 
 
+@pytest.mark.parametrize("input_size", [(17, 11), (11, 17), (11, 11)])
+@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
+@pytest.mark.parametrize("device", cpu_and_cuda())
+def test_parallelogram_to_bounding_boxes(input_size, dtype, device):
+    bounding_boxes = make_bounding_boxes(input_size, format=tv_tensors.BoundingBoxFormat.XYXYXYXY, dtype=dtype, device=device)
+    actual = _parallelogram_to_bounding_boxes(bounding_boxes)
+    torch.testing.assert_close(actual, bounding_boxes, rtol=0, atol=1)
+
+
 @pytest.mark.parametrize("image_type", (PIL.Image, torch.Tensor, tv_tensors.Image))
 @pytest.mark.parametrize("data_augmentation", ("hflip", "lsj", "multiscale", "ssd", "ssdlite"))
 @pytest.mark.parametrize("to_tensor", (transforms.ToTensor, transforms.ToImage))
diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py
@@ -381,29 +381,32 @@ def _resize_mask_dispatch(
     return tv_tensors.wrap(output, like=inpt)
 
 
-def _parallelogram_to_bounding_boxes(parallelogram: torch.Tensor, inplace: bool = False) -> torch.Tensor:
+def _parallelogram_to_bounding_boxes(parallelogram: torch.Tensor) -> torch.Tensor:
     """
     Convert a parallelogram to a rectangle while keeping the points (x1, y1) and (x3, y3) unchanged.
 
     This function transforms a parallelogram represented by 8 coordinates (4 points) into a rectangle.
     The first point (x1, y1) and the third point (x3, y3) of the parallelogram remain fixed,
     while the second and fourth points are adjusted to form a proper rectangle.
 
+    Note:
+        This function is not applied in-place and will return a copy of the input tensor.
+
     Args:
         parallelogram (torch.Tensor): Tensor of shape (..., 8) containing coordinates of parallelograms.
                                      Format is [x1, y1, x2, y2, x3, y3, x4, y4].
-        inplace (bool, optional): If True, performs operation in-place. Default is False.
 
     Returns:
         torch.Tensor: Tensor of same shape as input containing the rectangle coordinates.
                      The output maintains the same dtype as the input.
     """
-    if not inplace:
-        parallelogram = parallelogram.clone()
-
     dtype = parallelogram.dtype
-    if not torch.is_floating_point(parallelogram):
-        parallelogram = parallelogram.float()
+    int_dtype = dtype in (torch.uint8,
+            torch.int8,
+            torch.int16,
+            torch.int32,
+            torch.int64,
+        )
 
     # Calculate diagonal vector from first to third point
     dx = parallelogram[..., 4] - parallelogram[..., 0]
@@ -417,21 +420,28 @@ def _parallelogram_to_bounding_boxes(parallelogram: torch.Tensor, inplace: bool
     # Calculate width using the angle between diagonal and rotation
     w = diag * torch.abs(torch.sin(torch.atan2(dx, dy) - r_rad))
 
+    delta_x = torch.round(w * cos).to(dtype) if int_dtype else w * cos
+    detla_y = torch.round(w * sin).to(dtype) if int_dtype else w * sin
+
     # Update coordinates to form a rectangle
-    parallelogram[..., 2] = parallelogram[..., 0] + w * cos
-    parallelogram[..., 3] = parallelogram[..., 1] - w * sin
-    parallelogram[..., 6] = parallelogram[..., 4] - w * cos
-    parallelogram[..., 7] = parallelogram[..., 5] + w * sin
-    return parallelogram.to(dtype)
+    parallelogram[..., 2] = parallelogram[..., 0] + delta_x
+    parallelogram[..., 3] = parallelogram[..., 1] - detla_y
+    parallelogram[..., 6] = parallelogram[..., 4] - delta_x
+    parallelogram[..., 7] = parallelogram[..., 5] + detla_y
+    return parallelogram
 
 
 def resize_bounding_boxes(
     bounding_boxes: torch.Tensor,
-    format: tv_tensors.BoundingBoxFormat,
     canvas_size: tuple[int, int],
     size: Optional[list[int]],
     max_size: Optional[int] = None,
+    format: tv_tensors.BoundingBoxFormat = tv_tensors.BoundingBoxFormat.XYXY,
 ) -> tuple[torch.Tensor, tuple[int, int]]:
+    # We set the default format as `tv_tensors.BoundingBoxFormat.XYXY`
+    # to ensure backward compatibility.
+    # Indeed before the introduction of rotated bounding box format
+    # this function did not received `format` parameter as input.
     old_height, old_width = canvas_size
     new_height, new_width = _compute_resized_output_size(canvas_size, size=size, max_size=max_size)
 
@@ -893,12 +903,9 @@ def _affine_bounding_boxes_with_expand(
     bounding_boxes = bounding_boxes.clone() if bounding_boxes.is_floating_point() else bounding_boxes.float()
     dtype = bounding_boxes.dtype
     device = bounding_boxes.device
-    intermediate_format = (
-        tv_tensors.BoundingBoxFormat.XYXYXYXY
-        if tv_tensors.is_rotated_bounding_format(format)
-        else tv_tensors.BoundingBoxFormat.XYXY
-    )
-    intermediate_shape = 8 if tv_tensors.is_rotated_bounding_format(format) else 4
+    is_rotated = tv_tensors.is_rotated_bounding_format(format)
+    intermediate_format = tv_tensors.BoundingBoxFormat.XYXYXYXY if is_rotated else tv_tensors.BoundingBoxFormat.XYXY
+    intermediate_shape = 8 if is_rotated else 4
     bounding_boxes = (
         convert_bounding_box_format(bounding_boxes, old_format=format, new_format=intermediate_format, inplace=True)
     ).reshape(-1, intermediate_shape)
@@ -925,7 +932,7 @@ def _affine_bounding_boxes_with_expand(
     # Tensor of points has shape (N * 4, 3), where N is the number of bboxes
     # Single point structure is similar to
     # [(xmin, ymin, 1), (xmax, ymin, 1), (xmax, ymax, 1), (xmin, ymax, 1)]
-    if tv_tensors.is_rotated_bounding_format(format):
+    if is_rotated:
         points = bounding_boxes.reshape(-1, 2)
     else:
         points = bounding_boxes[:, [[0, 1], [2, 1], [2, 3], [0, 3]]].reshape(-1, 2)
@@ -934,7 +941,7 @@ def _affine_bounding_boxes_with_expand(
     transformed_points = torch.matmul(points, transposed_affine_matrix)
     # 3) Reshape transformed points to [N boxes, 4 points, x/y coords]
     # and compute bounding box from 4 transformed points:
-    if tv_tensors.is_rotated_bounding_format(format):
+    if is_rotated:
         transformed_points = transformed_points.reshape(-1, 8)
         out_bboxes = _parallelogram_to_bounding_boxes(transformed_points)
     else:
@@ -1557,6 +1564,9 @@ def crop_bounding_boxes(
     bounding_boxes = bounding_boxes - torch.tensor(sub, dtype=bounding_boxes.dtype, device=bounding_boxes.device)
     canvas_size = (height, width)
 
+    if format == tv_tensors.BoundingBoxFormat.XYXYXYXY:
+        bounding_boxes = _parallelogram_to_bounding_boxes(bounding_boxes)
+
     return clamp_bounding_boxes(bounding_boxes, format=format, canvas_size=canvas_size), canvas_size