Update clamp_bounding_boxes for rotated boxes

AntoineSimoulin · AntoineSimoulin · commit 1105aa10fc49 · 2025-06-10T14:48:56.000-07:00
Test Plan:
Unit tests:
```bash
pytest test/test_transforms_v2.py -vvv -k "TestClampBoundingBoxes and test_kernel"
pytest test/test_transforms_v2.py -vvv -k "TestClampBoundingBoxes and test_functional"
```
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -4770,7 +4770,7 @@ def test_correctness_image(self, mean, std, dtype, fn):
 
 
 class TestClampBoundingBoxes:
-    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
+    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
     @pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel(self, format, dtype, device):
@@ -4782,7 +4782,7 @@ def test_kernel(self, format, dtype, device):
             canvas_size=bounding_boxes.canvas_size,
         )
 
-    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
+    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
     def test_functional(self, format):
         check_functional(F.clamp_bounding_boxes, make_bounding_boxes(format=format))
 
diff --git a/torchvision/transforms/v2/functional/_meta.py b/torchvision/transforms/v2/functional/_meta.py
@@ -352,14 +352,122 @@ def _clamp_bounding_boxes(
     return out_boxes.to(in_dtype)
 
 
+def _order_bounding_boxes_points(
+    bounding_boxes: torch.Tensor, indices: torch.Tensor | None = None
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Re-order points in bounding boxes based on specific criteria or provided indices.
+
+    This function reorders the points of bounding boxes either according to provided indices or
+    by a default ordering strategy. In the default strategy, (x1, y1) corresponds to the point
+    with the lowest x value. If multiple points have the same lowest x value, the point with the
+    lowest y value is chosen.
+
+    Args:
+        bounding_boxes (torch.Tensor): A tensor containing bounding box coordinates in format [x1, y1, x2, y2, x3, y3, x4, y4].
+        indices (torch.Tensor | None): Optional tensor containing indices for reordering. If None, default ordering is applied.
+
+    Returns:
+        tuple[torch.Tensor, torch.Tensor]: A tuple containing:
+            - indices: The indices used for reordering
+            - reordered_boxes: The bounding boxes with reordered points
+    """
+    if indices is None:
+        output_xyxyxyxy = bounding_boxes.clone().reshape(-1, 8)
+        x, y = output_xyxyxyxy[..., 0::2], output_xyxyxyxy[..., 1::2]
+        y_max = torch.max(y, dim=1, keepdim=True)[0]
+        _, x1 = (y_max - y).div(y_max).add(x.add(1).mul(100)).min(dim=1)
+        indices = torch.ones_like(output_xyxyxyxy)
+        indices[..., 0] = x1.mul(2)
+        indices.cumsum_(1).remainder_(8)
+    return indices, bounding_boxes.gather(1, indices.to(torch.int64))
+
+
+def area(box: torch.Tensor) -> torch.Tensor:
+    x1, y1, x2, y2, x3, y3, x4, y4 = box.clone().reshape(-1, 8).unbind(-1)
+    w = (y2 - y1) ** 2 + (x2 - x1) ** 2
+    h = (y3 - y2) ** 2 + (x3 - x2) ** 2
+    return w * h
+
+
+def _clamp_along_y_axis(
+    bounding_boxes: torch.Tensor,
+) -> torch.Tensor:
+    """
+    Adjusts bounding boxes along the y-axis based on specific conditions.
+
+    This function modifies the bounding boxes by evaluating different cases
+    and applying the appropriate transformation to ensure the bounding boxes
+    are clamped correctly along the y-axis.
+
+    Args:
+        bounding_boxes (torch.Tensor): A tensor containing bounding box coordinates.
+
+    Returns:
+        torch.Tensor: The adjusted bounding boxes.
+    """
+    original_dtype = bounding_boxes.dtype
+    original_shape = bounding_boxes.shape
+    x1, y1, x2, y2, x3, y3, x4, y4 = bounding_boxes.reshape(-1, 8).unbind(-1)
+    a = (y2 - y1) / (x2 - x1)
+    b1 = y1 - a * x1
+    b2 = y2 + x2 / a
+    b3 = y3 - a * x3
+    b4 = y4 + x4 / a
+    b23 = (b2 - b3) / 2 * a / (1 + a**2)
+    z = torch.zeros_like(b1)
+    case_a = torch.cat([x.unsqueeze(1) for x in [z, b1, x2, y2, x3, y3, x3 - x2, y3 + b1 - y2]], dim=1)
+    case_b = torch.cat([x.unsqueeze(1) for x in [z, b4, x2 - x1, y2 - y1 + b4, x3, y3, x4, y4]], dim=1)
+    case_c = torch.cat(
+        [x.unsqueeze(1) for x in [z, (b2 + b3) / 2, b23, -b23 / a + b2, x3, y3, b23, b23 * a + b3]], dim=1
+    )
+    case_d = torch.zeros_like(case_c)
+
+    cond_a = x1.lt(0).logical_and(x2.ge(0)).logical_and(x3.ge(0)).logical_and(x4.ge(0))
+    cond_a = cond_a.logical_and(area(case_a) > area(case_b))
+    cond_a = cond_a.logical_or(x1.lt(0).logical_and(x2.ge(0)).logical_and(x3.ge(0)).logical_and(x4.le(0)))
+    cond_b = x1.lt(0).logical_and(x2.ge(0)).logical_and(x3.ge(0)).logical_and(x4.ge(0))
+    cond_b = cond_b.logical_and(area(case_a) <= area(case_b))
+    cond_b = cond_b.logical_or(x1.lt(0).logical_and(x2.le(0)).logical_and(x3.ge(0)).logical_and(x4.ge(0)))
+    cond_c = x1.lt(0).logical_and(x2.le(0)).logical_and(x3.ge(0)).logical_and(x4.le(0))
+    cond_d = x1.lt(0).logical_and(x2.le(0)).logical_and(x3.le(0)).logical_and(x4.le(0))
+
+    for cond, case in zip(
+        [cond_a, cond_b, cond_c, cond_d],
+        [case_a, case_b, case_c, case_d],
+    ):
+        bounding_boxes = torch.where(cond.unsqueeze(1).repeat(1, 8), case.reshape(-1, 8), bounding_boxes)
+    return bounding_boxes.to(original_dtype).reshape(original_shape)
+
+
 def _clamp_rotated_bounding_boxes(
     bounding_boxes: torch.Tensor, format: BoundingBoxFormat, canvas_size: tuple[int, int]
 ) -> torch.Tensor:
-    # TODO: For now we are not clamping rotated bounding boxes.
-    in_dtype = bounding_boxes.dtype
-    out_boxes = bounding_boxes.clone() if bounding_boxes.is_floating_point() else bounding_boxes.float()
+    original_shape = bounding_boxes.shape
+    original_dtype = bounding_boxes.dtype
+    bounding_boxes = bounding_boxes.clone() if bounding_boxes.is_floating_point() else bounding_boxes.float()
+    out_boxes = (
+        convert_bounding_box_format(
+            bounding_boxes, old_format=format, new_format=tv_tensors.BoundingBoxFormat.XYXYXYXY, inplace=True
+        )
+    ).reshape(-1, 8)
+
+    for _ in range(4):
+        indices, out_boxes = _order_bounding_boxes_points(out_boxes)
+        out_boxes = _clamp_along_y_axis(out_boxes)
+        _, out_boxes = _order_bounding_boxes_points(out_boxes, indices)
+        # rotate 90 degrees counter clock wise
+        out_boxes[:, ::2], out_boxes[:, 1::2] = (
+            out_boxes[:, 1::2].clone(),
+            canvas_size[1] - out_boxes[:, ::2].clone(),
+        )
+        canvas_size = (canvas_size[1], canvas_size[0])
 
-    return out_boxes.to(in_dtype)
+    out_boxes = convert_bounding_box_format(
+        out_boxes, old_format=tv_tensors.BoundingBoxFormat.XYXYXYXY, new_format=format, inplace=True
+    ).reshape(original_shape)
+
+    out_boxes = out_boxes.to(original_dtype)
+    return out_boxes
 
 
 def clamp_bounding_boxes(