pytorch · NicolasHug · Jul 1, 2025 · Jun 30, 2025 · Jun 30, 2025 · Jul 1, 2025
diff --git a/test/common_utils.py b/test/common_utils.py
@@ -424,13 +424,6 @@ def sample_position(values, max_value):
         format = tv_tensors.BoundingBoxFormat[format]
 
     dtype = dtype or torch.float32
-    int_dtype = dtype in (
-        torch.uint8,
-        torch.int8,
-        torch.int16,
-        torch.int32,
-        torch.int64,
-    )
 
     h, w = (torch.randint(1, s, (num_boxes,)) for s in canvas_size)
     y = sample_position(h, canvas_size[0])
@@ -457,14 +450,14 @@ def sample_position(values, max_value):
     elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
         r_rad = r * torch.pi / 180.0
         cos, sin = torch.cos(r_rad), torch.sin(r_rad)
-        x1 = torch.round(x) if int_dtype else x
-        y1 = torch.round(y) if int_dtype else y
-        x2 = torch.round(x1 + w * cos) if int_dtype else x1 + w * cos
-        y2 = torch.round(y1 - w * sin) if int_dtype else y1 - w * sin
-        x3 = torch.round(x2 + h * sin) if int_dtype else x2 + h * sin
-        y3 = torch.round(y2 + h * cos) if int_dtype else y2 + h * cos
-        x4 = torch.round(x1 + h * sin) if int_dtype else x1 + h * sin
-        y4 = torch.round(y1 + h * cos) if int_dtype else y1 + h * cos
+        x1 = x
+        y1 = y
+        x2 = x1 + w * cos
+        y2 = y1 - w * sin
+        x3 = x2 + h * sin
+        y3 = y2 + h * cos
+        x4 = x1 + h * sin
+        y4 = y1 + h * cos
         parts = (x1, y1, x2, y2, x3, y3, x4, y4)
     else:
         raise ValueError(f"Format {format} is not supported")

diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -564,13 +564,6 @@ def reference_affine_rotated_bounding_boxes_helper(
 
     def affine_rotated_bounding_boxes(bounding_boxes):
         dtype = bounding_boxes.dtype
-        int_dtype = dtype in (
-            torch.uint8,
-            torch.int8,
-            torch.int16,
-            torch.int32,
-            torch.int64,
-        )
         device = bounding_boxes.device
 
         # Go to float before converting to prevent precision loss in case of CXCYWHR -> XYXYXYXY and W or H is 1
@@ -605,18 +598,12 @@ def affine_rotated_bounding_boxes(bounding_boxes):
         )
 
         output = output[[2, 3, 0, 1, 6, 7, 4, 5]] if flip else output
-        if not int_dtype:
-            output = _parallelogram_to_bounding_boxes(output)
+        output = _parallelogram_to_bounding_boxes(output)
 
         output = F.convert_bounding_box_format(
             output, old_format=tv_tensors.BoundingBoxFormat.XYXYXYXY, new_format=format
         )
 
-        if torch.is_floating_point(output) and int_dtype:
-            # It is important to round before cast.
-            output = torch.round(output)
-
-        # For rotated boxes, it is important to cast before clamping.
         return (
             F.clamp_bounding_boxes(
                 output.to(dtype=dtype, device=device),
@@ -760,6 +747,8 @@ def test_kernel_image(self, size, interpolation, use_max_size, antialias, dtype,
     def test_kernel_bounding_boxes(self, format, size, use_max_size, dtype, device):
         if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
             return
+        if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
+            pytest.xfail("Rotated bounding boxes should be floating point tensors")
 
         bounding_boxes = make_bounding_boxes(
             format=format,
@@ -1212,6 +1201,8 @@ def test_kernel_image(self, dtype, device):
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel_bounding_boxes(self, format, dtype, device):
+        if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
+            pytest.xfail("Rotated bounding boxes should be floating point tensors")
         bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
         check_kernel(
             F.horizontal_flip_bounding_boxes,
@@ -1441,6 +1432,8 @@ def test_kernel_image(self, param, value, dtype, device):
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel_bounding_boxes(self, param, value, format, dtype, device):
+        if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
+            pytest.xfail("Rotated bounding boxes should be floating point tensors")
         bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
         self._check_kernel(
             F.affine_bounding_boxes,
@@ -1655,7 +1648,7 @@ def test_functional_bounding_boxes_correctness(self, format, angle, translate, s
             center=center,
         )
 
-        torch.testing.assert_close(actual, expected, atol=1e-5, rtol=1e-5)
+        torch.testing.assert_close(actual, expected, atol=1e-4, rtol=1e-4)
 
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
@@ -1823,6 +1816,8 @@ def test_kernel_image(self, dtype, device):
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel_bounding_boxes(self, format, dtype, device):
+        if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
+            pytest.xfail("Rotated bounding boxes should be floating point tensors")
         bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
         check_kernel(
             F.vertical_flip_bounding_boxes,
@@ -2021,8 +2016,14 @@ def test_kernel_bounding_boxes(self, param, value, format, dtype, device):
         kwargs = {param: value}
         if param != "angle":
             kwargs["angle"] = self._MINIMAL_AFFINE_KWARGS["angle"]
+        if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
+            pytest.xfail("Rotated bounding boxes should be floating point tensors")
 
         bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
+        if tv_tensors.is_rotated_bounding_format(format):
+            # TODO there is a 1e-6 difference between GPU and CPU outputs
+            # due to clamping. To avoid failing this test, we do clamp before hand.
+            bounding_boxes = F.clamp_bounding_boxes(bounding_boxes)
 
         check_kernel(
             F.rotate_bounding_boxes,
@@ -3236,6 +3237,8 @@ def test_kernel_image(self, param, value, dtype, device):
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel_bounding_boxes(self, format, dtype, device):
+        if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
+            pytest.xfail("Rotated bounding boxes should be floating point tensors")
         bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
 
         check_kernel(
@@ -3399,6 +3402,8 @@ def test_kernel_image(self, kwargs, dtype, device):
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel_bounding_boxes(self, kwargs, format, dtype, device):
+        if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
+            pytest.xfail("Rotated bounding boxes should be floating point tensors")
         bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, dtype=dtype, device=device)
         check_kernel(F.crop_bounding_boxes, bounding_boxes, format=format, **kwargs)
 
@@ -3576,6 +3581,8 @@ def _reference_crop_bounding_boxes(self, bounding_boxes, *, top, left, height, w
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_functional_bounding_box_correctness(self, kwargs, format, dtype, device):
+        if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
+            pytest.xfail("Rotated bounding boxes should be floating point tensors")
         bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, dtype=dtype, device=device)
 
         actual = F.crop(bounding_boxes, **kwargs)
@@ -3590,6 +3597,8 @@ def test_functional_bounding_box_correctness(self, kwargs, format, dtype, device
     @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("seed", list(range(5)))
     def test_transform_bounding_boxes_correctness(self, output_size, format, dtype, device, seed):
+        if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
+            pytest.xfail("Rotated bounding boxes should be floating point tensors")
         input_size = [s * 2 for s in output_size]
         bounding_boxes = make_bounding_boxes(input_size, format=format, dtype=dtype, device=device)
 
@@ -4267,6 +4276,10 @@ def _reference_convert_bounding_box_format(self, bounding_boxes, new_format):
     @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("fn_type", ["functional", "transform"])
     def test_correctness(self, old_format, new_format, dtype, device, fn_type):
+        if not dtype.is_floating_point and (
+            tv_tensors.is_rotated_bounding_format(old_format) or tv_tensors.is_rotated_bounding_format(new_format)
+        ):
+            pytest.xfail("Rotated bounding boxes should be floating point tensors")
         bounding_boxes = make_bounding_boxes(format=old_format, dtype=dtype, device=device)
 
         if fn_type == "functional":
@@ -4706,6 +4719,8 @@ def _reference_pad_bounding_boxes(self, bounding_boxes, *, padding):
     @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("fn", [F.pad, transform_cls_to_functional(transforms.Pad)])
     def test_bounding_boxes_correctness(self, padding, format, dtype, device, fn):
+        if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
+            pytest.xfail("Rotated bounding boxes should be floating point tensors")
         bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
 
         actual = fn(bounding_boxes, padding=padding)
@@ -4876,6 +4891,8 @@ def _reference_center_crop_bounding_boxes(self, bounding_boxes, output_size):
     @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("fn", [F.center_crop, transform_cls_to_functional(transforms.CenterCrop)])
     def test_bounding_boxes_correctness(self, output_size, format, dtype, device, fn):
+        if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
+            pytest.xfail("Rotated bounding boxes should be floating point tensors")
         bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, dtype=dtype, device=device)
 
         actual = fn(bounding_boxes, output_size)
@@ -5242,6 +5259,8 @@ def perspective_bounding_boxes(bounding_boxes):
     @pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_correctness_perspective_bounding_boxes(self, startpoints, endpoints, format, dtype, device):
+        if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
+            pytest.xfail("Rotated bounding boxes should be floating point tensors")
         bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
 
         actual = F.perspective(bounding_boxes, startpoints=startpoints, endpoints=endpoints)
@@ -5511,6 +5530,8 @@ class TestClampBoundingBoxes:
     @pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel(self, format, clamping_mode, dtype, device):
+        if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
+            pytest.xfail("Rotated bounding boxes should be floating point tensors")
         bounding_boxes = make_bounding_boxes(format=format, clamping_mode=clamping_mode, dtype=dtype, device=device)
         check_kernel(
             F.clamp_bounding_boxes,
@@ -5572,9 +5593,12 @@ def test_clamping_mode(self, rotated, constructor_clamping_mode, clamping_mode,
 
         if rotated:
             boxes = tv_tensors.BoundingBoxes(
-                [0, 0, 100, 100, 0], format="XYWHR", canvas_size=(10, 10), clamping_mode=constructor_clamping_mode
+                [0.0, 0.0, 100.0, 100.0, 0.0],
+                format="XYWHR",
+                canvas_size=(10, 10),
+                clamping_mode=constructor_clamping_mode,
             )
-            expected_clamped_output = torch.tensor([[0, 0, 10, 10, 0]])
+            expected_clamped_output = torch.tensor([[0.0, 0.0, 10.0, 10.0, 0.0]])
         else:
             boxes = tv_tensors.BoundingBoxes(
                 [0, 100, 0, 100], format="XYXY", canvas_size=(10, 10), clamping_mode=constructor_clamping_mode
@@ -6938,14 +6962,11 @@ def test_classification_preset(image_type, label_type, dataset_return_type, to_t
 
 
 @pytest.mark.parametrize("input_size", [(17, 11), (11, 17), (11, 11)])
-@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
 @pytest.mark.parametrize("device", cpu_and_cuda())
-def test_parallelogram_to_bounding_boxes(input_size, dtype, device):
+def test_parallelogram_to_bounding_boxes(input_size, device):
     # Assert that applying `_parallelogram_to_bounding_boxes` to rotated boxes
     # does not modify the input.
-    bounding_boxes = make_bounding_boxes(
-        input_size, format=tv_tensors.BoundingBoxFormat.XYXYXYXY, dtype=dtype, device=device
-    )
+    bounding_boxes = make_bounding_boxes(input_size, format=tv_tensors.BoundingBoxFormat.XYXYXYXY, device=device)
     actual = _parallelogram_to_bounding_boxes(bounding_boxes)
     torch.testing.assert_close(actual, bounding_boxes, rtol=0, atol=1)
 

diff --git a/test/test_tv_tensors.py b/test/test_tv_tensors.py
@@ -69,15 +69,39 @@ def test_bbox_instance(data, format):
 )
 @pytest.mark.parametrize("scripted", (False, True))
 def test_bbox_format(format, is_rotated_expected, scripted):
-    if isinstance(format, str):
-        format = tv_tensors.BoundingBoxFormat[(format.upper())]
-
     fn = tv_tensors.is_rotated_bounding_format
     if scripted:
         fn = torch.jit.script(fn)
     assert fn(format) == is_rotated_expected
 
 
+@pytest.mark.parametrize(
+    "format, support_integer_dtype",
+    [
+        ("XYXY", True),
+        ("XYWH", True),
+        ("CXCYWH", True),
+        ("XYXYXYXY", False),
+        ("XYWHR", False),
+        ("CXCYWHR", False),
+        (tv_tensors.BoundingBoxFormat.XYXY, True),
+        (tv_tensors.BoundingBoxFormat.XYWH, True),
+        (tv_tensors.BoundingBoxFormat.CXCYWH, True),
+        (tv_tensors.BoundingBoxFormat.XYXYXYXY, False),
+        (tv_tensors.BoundingBoxFormat.XYWHR, False),
+        (tv_tensors.BoundingBoxFormat.CXCYWHR, False),
+    ],
+)
+@pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8])
+def test_bbox_format_dtype(format, support_integer_dtype, input_dtype):
+    tensor = torch.randint(0, 32, size=(5, 2), dtype=input_dtype)
+    if not input_dtype.is_floating_point and not support_integer_dtype:
+        with pytest.raises(ValueError, match="Rotated bounding boxes should be floating point tensors"):
+            tv_tensors.BoundingBoxes(tensor, format=format, canvas_size=(32, 32))
+    else:
+        tv_tensors.BoundingBoxes(tensor, format=format, canvas_size=(32, 32))
+
+
 def test_bbox_dim_error():
     data_3d = [[[1, 2, 3, 4]]]
     with pytest.raises(ValueError, match="Expected a 1D or 2D tensor, got 3D"):
@@ -409,5 +433,10 @@ def test_return_type_input():
 
 
 def test_box_clamping_mode_default():
-    assert tv_tensors.BoundingBoxes([0, 0, 10, 10], format="XYXY", canvas_size=(100, 100)).clamping_mode == "soft"
-    assert tv_tensors.BoundingBoxes([0, 0, 10, 10, 0], format="XYWHR", canvas_size=(100, 100)).clamping_mode == "soft"
+    assert (
+        tv_tensors.BoundingBoxes([0.0, 0.0, 10.0, 10.0], format="XYXY", canvas_size=(100, 100)).clamping_mode == "soft"
+    )
+    assert (
+        tv_tensors.BoundingBoxes([0.0, 0.0, 10.0, 10.0, 0.0], format="XYWHR", canvas_size=(100, 100)).clamping_mode
+        == "soft"
+    )