pytorch · NicolasHug · Jul 1, 2025 · Jun 20, 2025 · Jun 20, 2025 · Jun 22, 2025
diff --git a/test/common_utils.py b/test/common_utils.py
@@ -21,7 +21,7 @@
 from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair
 from torchvision import io, tv_tensors
 from torchvision.transforms._functional_tensor import _max_value as get_max_value
-from torchvision.transforms.v2.functional import clamp_bounding_boxes, to_image, to_pil_image
+from torchvision.transforms.v2.functional import to_image, to_pil_image
 
 
 IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"])
@@ -410,7 +410,7 @@ def make_bounding_boxes(
     canvas_size=DEFAULT_SIZE,
     *,
     format=tv_tensors.BoundingBoxFormat.XYXY,
-    clamping_mode="hard",  # TODOBB
+    clamping_mode="soft",
     num_boxes=1,
     dtype=None,
     device="cpu",
@@ -469,21 +469,6 @@ def sample_position(values, max_value):
     else:
         raise ValueError(f"Format {format} is not supported")
     out_boxes = torch.stack(parts, dim=-1).to(dtype=dtype, device=device)
-    if tv_tensors.is_rotated_bounding_format(format):
-        # The rotated bounding boxes are not guaranteed to be within the canvas by design,
-        # so we apply clamping. We also add a 2 buffer to the canvas size to avoid
-        # numerical issues during the testing
-        buffer = 4
-        out_boxes = clamp_bounding_boxes(
-            out_boxes,
-            format=format,
-            canvas_size=(canvas_size[0] - buffer, canvas_size[1] - buffer),
-            clamping_mode=clamping_mode,
-        )
-        if format is tv_tensors.BoundingBoxFormat.XYWHR or format is tv_tensors.BoundingBoxFormat.CXCYWHR:
-            out_boxes[:, :2] += buffer // 2
-        elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
-            out_boxes[:, :] += buffer // 2
     return tv_tensors.BoundingBoxes(out_boxes, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode)
 
 

diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -551,6 +551,7 @@ def affine_bounding_boxes(bounding_boxes):
         ),
         format=format,
         canvas_size=canvas_size,
+        clamping_mode=clamping_mode,
     )
 
 
@@ -639,6 +640,7 @@ def affine_rotated_bounding_boxes(bounding_boxes):
         ).reshape(bounding_boxes.shape),
         format=format,
         canvas_size=canvas_size,
+        clamping_mode=clamping_mode,
     )
 
 
@@ -1305,7 +1307,7 @@ def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes: tv_tensors.B
             if tv_tensors.is_rotated_bounding_format(bounding_boxes.format)
             else reference_affine_bounding_boxes_helper
         )
-        return helper(bounding_boxes, affine_matrix=affine_matrix)
+        return helper(bounding_boxes, affine_matrix=affine_matrix, clamp=False)
 
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
     @pytest.mark.parametrize(
@@ -1914,7 +1916,7 @@ def _reference_vertical_flip_bounding_boxes(self, bounding_boxes: tv_tensors.Bou
             if tv_tensors.is_rotated_bounding_format(bounding_boxes.format)
             else reference_affine_bounding_boxes_helper
         )
-        return helper(bounding_boxes, affine_matrix=affine_matrix)
+        return helper(bounding_boxes, affine_matrix=affine_matrix, clamp=False)
 
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
     @pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)])
@@ -2079,7 +2081,6 @@ def test_functional(self, make_input):
             (F.rotate_image, torch.Tensor),
             (F._geometry._rotate_image_pil, PIL.Image.Image),
             (F.rotate_image, tv_tensors.Image),
-            (F.rotate_bounding_boxes, tv_tensors.BoundingBoxes),
             (F.rotate_mask, tv_tensors.Mask),
             (F.rotate_video, tv_tensors.Video),
             (F.rotate_keypoints, tv_tensors.KeyPoints),
@@ -2229,29 +2230,26 @@ def _reference_rotate_bounding_boxes(self, bounding_boxes, *, angle, expand, cen
             clamp=False,
         )
 
-        return F.clamp_bounding_boxes(self._recenter_bounding_boxes_after_expand(output, recenter_xy=recenter_xy)).to(
-            bounding_boxes
-        )
+        return self._recenter_bounding_boxes_after_expand(output, recenter_xy=recenter_xy).to(bounding_boxes)
 
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
     @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
     @pytest.mark.parametrize("expand", [False, True])
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
     def test_functional_bounding_boxes_correctness(self, format, angle, expand, center):
-        bounding_boxes = make_bounding_boxes(format=format)
+        bounding_boxes = make_bounding_boxes(format=format, clamping_mode=None)
 
         actual = F.rotate(bounding_boxes, angle=angle, expand=expand, center=center)
         expected = self._reference_rotate_bounding_boxes(bounding_boxes, angle=angle, expand=expand, center=center)
-
-        torch.testing.assert_close(actual, expected)
         torch.testing.assert_close(F.get_size(actual), F.get_size(expected), atol=2 if expand else 0, rtol=0)
+        torch.testing.assert_close(actual, expected)
 
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
     @pytest.mark.parametrize("expand", [False, True])
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
     @pytest.mark.parametrize("seed", list(range(5)))
     def test_transform_bounding_boxes_correctness(self, format, expand, center, seed):
-        bounding_boxes = make_bounding_boxes(format=format)
+        bounding_boxes = make_bounding_boxes(format=format, clamping_mode=None)
 
         transform = transforms.RandomRotation(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, expand=expand, center=center)
 
@@ -2262,9 +2260,8 @@ def test_transform_bounding_boxes_correctness(self, format, expand, center, seed
         actual = transform(bounding_boxes)
 
         expected = self._reference_rotate_bounding_boxes(bounding_boxes, **params, expand=expand, center=center)
-
-        torch.testing.assert_close(actual, expected)
         torch.testing.assert_close(F.get_size(actual), F.get_size(expected), atol=2 if expand else 0, rtol=0)
+        torch.testing.assert_close(actual, expected)
 
     def _recenter_keypoints_after_expand(self, keypoints, *, recenter_xy):
         x, y = recenter_xy
@@ -4349,7 +4346,6 @@ def test_functional(self, make_input):
             (F.resized_crop_image, torch.Tensor),
             (F._geometry._resized_crop_image_pil, PIL.Image.Image),
             (F.resized_crop_image, tv_tensors.Image),
-            (F.resized_crop_bounding_boxes, tv_tensors.BoundingBoxes),
             (F.resized_crop_mask, tv_tensors.Mask),
             (F.resized_crop_video, tv_tensors.Video),
             (F.resized_crop_keypoints, tv_tensors.KeyPoints),
@@ -4415,6 +4411,7 @@ def _reference_resized_crop_bounding_boxes(self, bounding_boxes, *, top, left, h
                 [0, 0, 1],
             ],
         )
+
         affine_matrix = (resize_affine_matrix @ crop_affine_matrix)[:2, :]
 
         helper = (
@@ -4423,15 +4420,15 @@ def _reference_resized_crop_bounding_boxes(self, bounding_boxes, *, top, left, h
             else reference_affine_bounding_boxes_helper
         )
 
-        return helper(
-            bounding_boxes,
-            affine_matrix=affine_matrix,
-            new_canvas_size=size,
-        )
+        return helper(bounding_boxes, affine_matrix=affine_matrix, new_canvas_size=size, clamp=False)
 
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
     def test_functional_bounding_boxes_correctness(self, format):
-        bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format)
+        # Note that we don't want to clamp because in
+        # _reference_resized_crop_bounding_boxes we are fusing the crop and the
+        # resize operation, where none of the croppings happen - particularly,
+        # the intermediate one.
+        bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, clamping_mode=None)
 
         actual = F.resized_crop(bounding_boxes, **self.CROP_KWARGS, size=self.OUTPUT_SIZE)
         expected = self._reference_resized_crop_bounding_boxes(
@@ -5510,7 +5507,7 @@ def test_correctness_image(self, mean, std, dtype, fn):
 
 class TestClampBoundingBoxes:
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
-    @pytest.mark.parametrize("clamping_mode", ("hard", "none"))  # TODOBB add soft
+    @pytest.mark.parametrize("clamping_mode", ("hard", None))  # TODOBB add soft
     @pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel(self, format, clamping_mode, dtype, device):
@@ -5524,7 +5521,7 @@ def test_kernel(self, format, clamping_mode, dtype, device):
         )
 
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
-    @pytest.mark.parametrize("clamping_mode", ("hard", "none"))  # TODOBB add soft
+    @pytest.mark.parametrize("clamping_mode", ("hard", None))  # TODOBB add soft
     def test_functional(self, format, clamping_mode):
         check_functional(F.clamp_bounding_boxes, make_bounding_boxes(format=format, clamping_mode=clamping_mode))
 
@@ -5552,8 +5549,8 @@ def test_transform(self):
         check_transform(transforms.ClampBoundingBoxes(), make_bounding_boxes())
 
     @pytest.mark.parametrize("rotated", (True, False))
-    @pytest.mark.parametrize("constructor_clamping_mode", ("hard", "none"))
-    @pytest.mark.parametrize("clamping_mode", ("hard", "none", None))  # TODOBB add soft here.
+    @pytest.mark.parametrize("constructor_clamping_mode", ("hard", None))
+    @pytest.mark.parametrize("clamping_mode", ("hard", None, None))  # TODOBB add soft here.
     @pytest.mark.parametrize("pass_pure_tensor", (True, False))
     @pytest.mark.parametrize("fn", [F.clamp_bounding_boxes, transform_cls_to_functional(transforms.ClampBoundingBoxes)])
     def test_clamping_mode(self, rotated, constructor_clamping_mode, clamping_mode, pass_pure_tensor, fn):
@@ -5562,7 +5559,7 @@ def test_clamping_mode(self, rotated, constructor_clamping_mode, clamping_mode,
         #   functional (or to the class) relies on the box's `.clamping_mode`
         #   attribute
         # - That clamping happens when it should, and only when it should, i.e.
-        #   when the clamping mode is not "none". It doesn't validate the
+        #   when the clamping mode is not None. It doesn't validate the
         #   nunmerical results, only that clamping happened. For that, we create
         #   a large 100x100 box inside of a small 10x10 image.
 
@@ -5595,16 +5592,16 @@ def test_clamping_mode(self, rotated, constructor_clamping_mode, clamping_mode,
             out = fn(boxes, clamping_mode=clamping_mode)
 
         clamping_mode_prevailing = constructor_clamping_mode if clamping_mode is None else clamping_mode
-        if clamping_mode_prevailing == "none":
+        if clamping_mode_prevailing is None:
             assert_equal(boxes, out)  # should be a pass-through
         else:
             assert_equal(out, expected_clamped_output)
 
 
 class TestSetClampingMode:
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
-    @pytest.mark.parametrize("constructor_clamping_mode", ("hard", "none"))  # TODOBB add soft
-    @pytest.mark.parametrize("desired_clamping_mode", ("hard", "none"))  # TODOBB add soft
+    @pytest.mark.parametrize("constructor_clamping_mode", ("hard", None))  # TODOBB add soft
+    @pytest.mark.parametrize("desired_clamping_mode", ("hard", None))  # TODOBB add soft
     def test_setter(self, format, constructor_clamping_mode, desired_clamping_mode):
 
         in_boxes = make_bounding_boxes(format=format, clamping_mode=constructor_clamping_mode)
@@ -5614,7 +5611,7 @@ def test_setter(self, format, constructor_clamping_mode, desired_clamping_mode):
         assert out_boxes.clamping_mode == desired_clamping_mode
 
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
-    @pytest.mark.parametrize("constructor_clamping_mode", ("hard", "none"))  # TODOBB add soft
+    @pytest.mark.parametrize("constructor_clamping_mode", ("hard", None))  # TODOBB add soft
     def test_pipeline_no_leak(self, format, constructor_clamping_mode):
         class AssertClampingMode(transforms.Transform):
             def __init__(self, expected_clamping_mode):
@@ -5629,12 +5626,12 @@ def transform(self, inpt, _):
 
         t = transforms.Compose(
             [
-                transforms.SetClampingMode("none"),
-                AssertClampingMode("none"),
+                transforms.SetClampingMode(None),
+                AssertClampingMode(None),
                 transforms.SetClampingMode("hard"),
                 AssertClampingMode("hard"),
-                transforms.SetClampingMode("none"),
-                AssertClampingMode("none"),
+                transforms.SetClampingMode(None),
+                AssertClampingMode(None),
                 transforms.ClampBoundingBoxes("hard"),
             ]
         )
@@ -5646,7 +5643,7 @@ def transform(self, inpt, _):
 
         # assert that the output boxes clamping_mode is the one set by the last SetClampingMode.
         # ClampBoundingBoxes doesn't set clamping_mode.
-        assert out_boxes.clamping_mode == "none"
+        assert out_boxes.clamping_mode is None
 
 
 class TestClampKeyPoints:

diff --git a/test/test_tv_tensors.py b/test/test_tv_tensors.py
@@ -406,3 +406,8 @@ def test_return_type_input():
         tv_tensors.set_return_type("typo")
 
     tv_tensors.set_return_type("tensor")
+
+
+def test_box_clamping_mode_default():
+    assert tv_tensors.BoundingBoxes([0, 0, 10, 10], format="XYXY", canvas_size=(100, 100)).clamping_mode == "soft"
+    assert tv_tensors.BoundingBoxes([0, 0, 10, 10, 0], format="XYWHR", canvas_size=(100, 100)).clamping_mode == "soft"
diff --git a/torchvision/transforms/v2/_meta.py b/torchvision/transforms/v2/_meta.py
@@ -1,4 +1,4 @@
-from typing import Any, Optional, Union
+from typing import Any, Union
 
 from torchvision import tv_tensors
 from torchvision.transforms.v2 import functional as F, Transform
@@ -34,7 +34,9 @@ class ClampBoundingBoxes(Transform):
 
     """
 
-    def __init__(self, clamping_mode: Optional[CLAMPING_MODE_TYPE] = None) -> None:
+    # TODOBB consider "auto" to be a Literal, make sur torchscript is still happy
+    # TODOBB validate clamping_mode
+    def __init__(self, clamping_mode: Union[CLAMPING_MODE_TYPE, str] = "auto") -> None:
         super().__init__()
         self.clamping_mode = clamping_mode