diff --git a/test/common_utils.py b/test/common_utils.py index ee3a2d5cbde..0da8e6bbc1d 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -424,13 +424,6 @@ def sample_position(values, max_value): format = tv_tensors.BoundingBoxFormat[format] dtype = dtype or torch.float32 - int_dtype = dtype in ( - torch.uint8, - torch.int8, - torch.int16, - torch.int32, - torch.int64, - ) h, w = (torch.randint(1, s, (num_boxes,)) for s in canvas_size) y = sample_position(h, canvas_size[0]) @@ -457,14 +450,14 @@ def sample_position(values, max_value): elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY: r_rad = r * torch.pi / 180.0 cos, sin = torch.cos(r_rad), torch.sin(r_rad) - x1 = torch.round(x) if int_dtype else x - y1 = torch.round(y) if int_dtype else y - x2 = torch.round(x1 + w * cos) if int_dtype else x1 + w * cos - y2 = torch.round(y1 - w * sin) if int_dtype else y1 - w * sin - x3 = torch.round(x2 + h * sin) if int_dtype else x2 + h * sin - y3 = torch.round(y2 + h * cos) if int_dtype else y2 + h * cos - x4 = torch.round(x1 + h * sin) if int_dtype else x1 + h * sin - y4 = torch.round(y1 + h * cos) if int_dtype else y1 + h * cos + x1 = x + y1 = y + x2 = x1 + w * cos + y2 = y1 - w * sin + x3 = x2 + h * sin + y3 = y2 + h * cos + x4 = x1 + h * sin + y4 = y1 + h * cos parts = (x1, y1, x2, y2, x3, y3, x4, y4) else: raise ValueError(f"Format {format} is not supported") diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index b05b04cca89..1030032b980 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -564,13 +564,6 @@ def reference_affine_rotated_bounding_boxes_helper( def affine_rotated_bounding_boxes(bounding_boxes): dtype = bounding_boxes.dtype - int_dtype = dtype in ( - torch.uint8, - torch.int8, - torch.int16, - torch.int32, - torch.int64, - ) device = bounding_boxes.device # Go to float before converting to prevent precision loss in case of CXCYWHR -> XYXYXYXY and W or H is 1 @@ -605,18 +598,12 @@ def affine_rotated_bounding_boxes(bounding_boxes): ) output = output[[2, 3, 0, 1, 6, 7, 4, 5]] if flip else output - if not int_dtype: - output = _parallelogram_to_bounding_boxes(output) + output = _parallelogram_to_bounding_boxes(output) output = F.convert_bounding_box_format( output, old_format=tv_tensors.BoundingBoxFormat.XYXYXYXY, new_format=format ) - if torch.is_floating_point(output) and int_dtype: - # It is important to round before cast. - output = torch.round(output) - - # For rotated boxes, it is important to cast before clamping. return ( F.clamp_bounding_boxes( output.to(dtype=dtype, device=device), @@ -760,6 +747,8 @@ def test_kernel_image(self, size, interpolation, use_max_size, antialias, dtype, def test_kernel_bounding_boxes(self, format, size, use_max_size, dtype, device): if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): return + if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format): + pytest.xfail("Rotated bounding boxes should be floating point tensors") bounding_boxes = make_bounding_boxes( format=format, @@ -1212,6 +1201,8 @@ def test_kernel_image(self, dtype, device): @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_bounding_boxes(self, format, dtype, device): + if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format): + pytest.xfail("Rotated bounding boxes should be floating point tensors") bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) check_kernel( F.horizontal_flip_bounding_boxes, @@ -1441,6 +1432,8 @@ def test_kernel_image(self, param, value, dtype, device): @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_bounding_boxes(self, param, value, format, dtype, device): + if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format): + pytest.xfail("Rotated bounding boxes should be floating point tensors") bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) self._check_kernel( F.affine_bounding_boxes, @@ -1655,7 +1648,7 @@ def test_functional_bounding_boxes_correctness(self, format, angle, translate, s center=center, ) - torch.testing.assert_close(actual, expected, atol=1e-5, rtol=1e-5) + torch.testing.assert_close(actual, expected, atol=1e-4, rtol=1e-4) @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) @@ -1823,6 +1816,8 @@ def test_kernel_image(self, dtype, device): @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_bounding_boxes(self, format, dtype, device): + if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format): + pytest.xfail("Rotated bounding boxes should be floating point tensors") bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) check_kernel( F.vertical_flip_bounding_boxes, @@ -2021,8 +2016,14 @@ def test_kernel_bounding_boxes(self, param, value, format, dtype, device): kwargs = {param: value} if param != "angle": kwargs["angle"] = self._MINIMAL_AFFINE_KWARGS["angle"] + if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format): + pytest.xfail("Rotated bounding boxes should be floating point tensors") bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) + if tv_tensors.is_rotated_bounding_format(format): + # TODO there is a 1e-6 difference between GPU and CPU outputs + # due to clamping. To avoid failing this test, we do clamp before hand. + bounding_boxes = F.clamp_bounding_boxes(bounding_boxes) check_kernel( F.rotate_bounding_boxes, @@ -3236,6 +3237,8 @@ def test_kernel_image(self, param, value, dtype, device): @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_bounding_boxes(self, format, dtype, device): + if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format): + pytest.xfail("Rotated bounding boxes should be floating point tensors") bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) check_kernel( @@ -3399,6 +3402,8 @@ def test_kernel_image(self, kwargs, dtype, device): @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_bounding_boxes(self, kwargs, format, dtype, device): + if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format): + pytest.xfail("Rotated bounding boxes should be floating point tensors") bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, dtype=dtype, device=device) check_kernel(F.crop_bounding_boxes, bounding_boxes, format=format, **kwargs) @@ -3576,6 +3581,8 @@ def _reference_crop_bounding_boxes(self, bounding_boxes, *, top, left, height, w @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_functional_bounding_box_correctness(self, kwargs, format, dtype, device): + if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format): + pytest.xfail("Rotated bounding boxes should be floating point tensors") bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, dtype=dtype, device=device) actual = F.crop(bounding_boxes, **kwargs) @@ -3590,6 +3597,8 @@ def test_functional_bounding_box_correctness(self, kwargs, format, dtype, device @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("seed", list(range(5))) def test_transform_bounding_boxes_correctness(self, output_size, format, dtype, device, seed): + if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format): + pytest.xfail("Rotated bounding boxes should be floating point tensors") input_size = [s * 2 for s in output_size] bounding_boxes = make_bounding_boxes(input_size, format=format, dtype=dtype, device=device) @@ -4267,6 +4276,10 @@ def _reference_convert_bounding_box_format(self, bounding_boxes, new_format): @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("fn_type", ["functional", "transform"]) def test_correctness(self, old_format, new_format, dtype, device, fn_type): + if not dtype.is_floating_point and ( + tv_tensors.is_rotated_bounding_format(old_format) or tv_tensors.is_rotated_bounding_format(new_format) + ): + pytest.xfail("Rotated bounding boxes should be floating point tensors") bounding_boxes = make_bounding_boxes(format=old_format, dtype=dtype, device=device) if fn_type == "functional": @@ -4706,6 +4719,8 @@ def _reference_pad_bounding_boxes(self, bounding_boxes, *, padding): @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("fn", [F.pad, transform_cls_to_functional(transforms.Pad)]) def test_bounding_boxes_correctness(self, padding, format, dtype, device, fn): + if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format): + pytest.xfail("Rotated bounding boxes should be floating point tensors") bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) actual = fn(bounding_boxes, padding=padding) @@ -4876,6 +4891,8 @@ def _reference_center_crop_bounding_boxes(self, bounding_boxes, output_size): @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("fn", [F.center_crop, transform_cls_to_functional(transforms.CenterCrop)]) def test_bounding_boxes_correctness(self, output_size, format, dtype, device, fn): + if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format): + pytest.xfail("Rotated bounding boxes should be floating point tensors") bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, dtype=dtype, device=device) actual = fn(bounding_boxes, output_size) @@ -5242,6 +5259,8 @@ def perspective_bounding_boxes(bounding_boxes): @pytest.mark.parametrize("dtype", [torch.int64, torch.float32]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_correctness_perspective_bounding_boxes(self, startpoints, endpoints, format, dtype, device): + if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format): + pytest.xfail("Rotated bounding boxes should be floating point tensors") bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) actual = F.perspective(bounding_boxes, startpoints=startpoints, endpoints=endpoints) @@ -5511,6 +5530,8 @@ class TestClampBoundingBoxes: @pytest.mark.parametrize("dtype", [torch.int64, torch.float32]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel(self, format, clamping_mode, dtype, device): + if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format): + pytest.xfail("Rotated bounding boxes should be floating point tensors") bounding_boxes = make_bounding_boxes(format=format, clamping_mode=clamping_mode, dtype=dtype, device=device) check_kernel( F.clamp_bounding_boxes, @@ -5572,9 +5593,12 @@ def test_clamping_mode(self, rotated, constructor_clamping_mode, clamping_mode, if rotated: boxes = tv_tensors.BoundingBoxes( - [0, 0, 100, 100, 0], format="XYWHR", canvas_size=(10, 10), clamping_mode=constructor_clamping_mode + [0.0, 0.0, 100.0, 100.0, 0.0], + format="XYWHR", + canvas_size=(10, 10), + clamping_mode=constructor_clamping_mode, ) - expected_clamped_output = torch.tensor([[0, 0, 10, 10, 0]]) + expected_clamped_output = torch.tensor([[0.0, 0.0, 10.0, 10.0, 0.0]]) else: boxes = tv_tensors.BoundingBoxes( [0, 100, 0, 100], format="XYXY", canvas_size=(10, 10), clamping_mode=constructor_clamping_mode @@ -6938,14 +6962,11 @@ def test_classification_preset(image_type, label_type, dataset_return_type, to_t @pytest.mark.parametrize("input_size", [(17, 11), (11, 17), (11, 11)]) -@pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) @pytest.mark.parametrize("device", cpu_and_cuda()) -def test_parallelogram_to_bounding_boxes(input_size, dtype, device): +def test_parallelogram_to_bounding_boxes(input_size, device): # Assert that applying `_parallelogram_to_bounding_boxes` to rotated boxes # does not modify the input. - bounding_boxes = make_bounding_boxes( - input_size, format=tv_tensors.BoundingBoxFormat.XYXYXYXY, dtype=dtype, device=device - ) + bounding_boxes = make_bounding_boxes(input_size, format=tv_tensors.BoundingBoxFormat.XYXYXYXY, device=device) actual = _parallelogram_to_bounding_boxes(bounding_boxes) torch.testing.assert_close(actual, bounding_boxes, rtol=0, atol=1) diff --git a/test/test_tv_tensors.py b/test/test_tv_tensors.py index bed419b312c..9fb1b9fd7ec 100644 --- a/test/test_tv_tensors.py +++ b/test/test_tv_tensors.py @@ -69,15 +69,39 @@ def test_bbox_instance(data, format): ) @pytest.mark.parametrize("scripted", (False, True)) def test_bbox_format(format, is_rotated_expected, scripted): - if isinstance(format, str): - format = tv_tensors.BoundingBoxFormat[(format.upper())] - fn = tv_tensors.is_rotated_bounding_format if scripted: fn = torch.jit.script(fn) assert fn(format) == is_rotated_expected +@pytest.mark.parametrize( + "format, support_integer_dtype", + [ + ("XYXY", True), + ("XYWH", True), + ("CXCYWH", True), + ("XYXYXYXY", False), + ("XYWHR", False), + ("CXCYWHR", False), + (tv_tensors.BoundingBoxFormat.XYXY, True), + (tv_tensors.BoundingBoxFormat.XYWH, True), + (tv_tensors.BoundingBoxFormat.CXCYWH, True), + (tv_tensors.BoundingBoxFormat.XYXYXYXY, False), + (tv_tensors.BoundingBoxFormat.XYWHR, False), + (tv_tensors.BoundingBoxFormat.CXCYWHR, False), + ], +) +@pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8]) +def test_bbox_format_dtype(format, support_integer_dtype, input_dtype): + tensor = torch.randint(0, 32, size=(5, 2), dtype=input_dtype) + if not input_dtype.is_floating_point and not support_integer_dtype: + with pytest.raises(ValueError, match="Rotated bounding boxes should be floating point tensors"): + tv_tensors.BoundingBoxes(tensor, format=format, canvas_size=(32, 32)) + else: + tv_tensors.BoundingBoxes(tensor, format=format, canvas_size=(32, 32)) + + def test_bbox_dim_error(): data_3d = [[[1, 2, 3, 4]]] with pytest.raises(ValueError, match="Expected a 1D or 2D tensor, got 3D"): @@ -409,5 +433,10 @@ def test_return_type_input(): def test_box_clamping_mode_default(): - assert tv_tensors.BoundingBoxes([0, 0, 10, 10], format="XYXY", canvas_size=(100, 100)).clamping_mode == "soft" - assert tv_tensors.BoundingBoxes([0, 0, 10, 10, 0], format="XYWHR", canvas_size=(100, 100)).clamping_mode == "soft" + assert ( + tv_tensors.BoundingBoxes([0.0, 0.0, 10.0, 10.0], format="XYXY", canvas_size=(100, 100)).clamping_mode == "soft" + ) + assert ( + tv_tensors.BoundingBoxes([0.0, 0.0, 10.0, 10.0, 0.0], format="XYWHR", canvas_size=(100, 100)).clamping_mode + == "soft" + ) diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py index f109247dc6b..1c9ce3f6df0 100644 --- a/torchvision/transforms/v2/functional/_geometry.py +++ b/torchvision/transforms/v2/functional/_geometry.py @@ -104,16 +104,10 @@ def horizontal_flip_bounding_boxes( bounding_boxes[:, 0::2].sub_(canvas_size[1]).neg_() bounding_boxes = bounding_boxes[:, [2, 3, 0, 1, 6, 7, 4, 5]] elif format == tv_tensors.BoundingBoxFormat.XYWHR: - - dtype = bounding_boxes.dtype - if not torch.is_floating_point(bounding_boxes): - # Casting to float to support cos and sin computations. - bounding_boxes = bounding_boxes.to(torch.float32) angle_rad = bounding_boxes[:, 4].mul(torch.pi).div(180) bounding_boxes[:, 0].add_(bounding_boxes[:, 2].mul(angle_rad.cos())).sub_(canvas_size[1]).neg_() bounding_boxes[:, 1].sub_(bounding_boxes[:, 2].mul(angle_rad.sin())) bounding_boxes[:, 4].neg_() - bounding_boxes = bounding_boxes.to(dtype) else: # format == tv_tensors.BoundingBoxFormat.CXCYWHR: bounding_boxes[:, 0].sub_(canvas_size[1]).neg_() bounding_boxes[:, 4].neg_() @@ -192,15 +186,10 @@ def vertical_flip_bounding_boxes( bounding_boxes[:, 1::2].sub_(canvas_size[0]).neg_() bounding_boxes = bounding_boxes[:, [2, 3, 0, 1, 6, 7, 4, 5]] elif format == tv_tensors.BoundingBoxFormat.XYWHR: - dtype = bounding_boxes.dtype - if not torch.is_floating_point(bounding_boxes): - # Casting to float to support cos and sin computations. - bounding_boxes = bounding_boxes.to(torch.float64) angle_rad = bounding_boxes[:, 4].mul(torch.pi).div(180) bounding_boxes[:, 1].sub_(bounding_boxes[:, 2].mul(angle_rad.sin())).sub_(canvas_size[0]).neg_() bounding_boxes[:, 0].add_(bounding_boxes[:, 2].mul(angle_rad.cos())) bounding_boxes[:, 4].neg_().add_(180) - bounding_boxes = bounding_boxes.to(dtype) else: # format == tv_tensors.BoundingBoxFormat.CXCYWHR: bounding_boxes[:, 1].sub_(canvas_size[0]).neg_() bounding_boxes[:, 4].neg_().add_(180) @@ -462,19 +451,6 @@ def _parallelogram_to_bounding_boxes(parallelogram: torch.Tensor) -> torch.Tenso torch.Tensor: Tensor of same shape as input containing the rectangle coordinates. The output maintains the same dtype as the input. """ - dtype = parallelogram.dtype - int_dtype = dtype in ( - torch.uint8, - torch.int8, - torch.int16, - torch.int32, - torch.int64, - ) - if int_dtype: - # Does not apply the transformation to `int` boxes as the rounding error - # will typically not ensure the resulting box has a rectangular shape. - return parallelogram.clone() - out_boxes = parallelogram.clone() # Calculate parallelogram diagonal vectors @@ -499,8 +475,8 @@ def _parallelogram_to_bounding_boxes(parallelogram: torch.Tensor) -> torch.Tenso diag24 * torch.abs(torch.sin(torch.atan2(dx42, dy42) - r_rad)), ) - delta_x = torch.round(w * cos).to(dtype) if int_dtype else w * cos - delta_y = torch.round(w * sin).to(dtype) if int_dtype else w * sin + delta_x = w * cos + delta_y = w * sin # Update coordinates to form a rectangle # Keeping the points (x1, y1) and (x3, y3) unchanged. out_boxes[..., 2] = torch.where(mask, parallelogram[..., 0] + delta_x, parallelogram[..., 2]) @@ -1115,9 +1091,8 @@ def _affine_bounding_boxes_with_expand( original_shape = bounding_boxes.shape dtype = bounding_boxes.dtype - acceptable_dtypes = [torch.float64] # Ensure consistency between CPU and GPU. - need_cast = dtype not in acceptable_dtypes - bounding_boxes = bounding_boxes.to(torch.float64) if need_cast else bounding_boxes.clone() + need_cast = not bounding_boxes.is_floating_point() + bounding_boxes = bounding_boxes.float() if need_cast else bounding_boxes.clone() device = bounding_boxes.device is_rotated = tv_tensors.is_rotated_bounding_format(format) intermediate_format = tv_tensors.BoundingBoxFormat.XYXYXYXY if is_rotated else tv_tensors.BoundingBoxFormat.XYXY @@ -1196,8 +1171,6 @@ def _affine_bounding_boxes_with_expand( ).reshape(original_shape) if need_cast: - if dtype in (torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64): - out_bboxes.round_() out_bboxes = out_bboxes.to(dtype) return out_bboxes, canvas_size diff --git a/torchvision/transforms/v2/functional/_meta.py b/torchvision/transforms/v2/functional/_meta.py index 2370fe72fca..434a0c79b15 100644 --- a/torchvision/transforms/v2/functional/_meta.py +++ b/torchvision/transforms/v2/functional/_meta.py @@ -540,10 +540,6 @@ def _clamp_along_y_axis( Returns: torch.Tensor: The adjusted bounding boxes. """ - dtype = bounding_boxes.dtype - acceptable_dtypes = [torch.float64] # Ensure consistency between CPU and GPU. - need_cast = dtype not in acceptable_dtypes - eps = 1e-06 # Ensure consistency between CPU and GPU. original_shape = bounding_boxes.shape bounding_boxes = bounding_boxes.reshape(-1, 8) original_bounding_boxes = original_bounding_boxes.reshape(-1, 8) @@ -559,28 +555,17 @@ def _clamp_along_y_axis( case_b[..., 6].clamp_(0) # Clamp x4 to 0 case_c = torch.zeros_like(case_b) - cond_a = (x1 < eps) & ~case_a.isnan().any(-1) # First point is outside left boundary - cond_b = y1.isclose(y2, rtol=eps, atol=eps) | y3.isclose(y4, rtol=eps, atol=eps) # First line is nearly vertical + cond_a = (x1 < 0) & ~case_a.isnan().any(-1) # First point is outside left boundary + cond_b = y1.isclose(y2) | y3.isclose(y4) # First line is nearly vertical cond_c = (x1 <= 0) & (x2 <= 0) & (x3 <= 0) & (x4 <= 0) # All points outside left boundary - cond_c = ( - cond_c - | y1.isclose(y4, rtol=eps, atol=eps) - | y2.isclose(y3, rtol=eps, atol=eps) - | (cond_b & x1.isclose(x2, rtol=eps, atol=eps)) - ) # First line is nearly horizontal + cond_c = cond_c | y1.isclose(y4) | y2.isclose(y3) | (cond_b & x1.isclose(x2)) # First line is nearly horizontal for (cond, case) in zip( [cond_a, cond_b, cond_c], [case_a, case_b, case_c], ): bounding_boxes = torch.where(cond.unsqueeze(1).repeat(1, 8), case.reshape(-1, 8), bounding_boxes) - if clamping_mode is not None and clamping_mode == "hard": - bounding_boxes[..., 0].clamp_(0) # Clamp x1 to 0 - if need_cast: - if dtype in (torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64): - bounding_boxes.round_() - bounding_boxes = bounding_boxes.to(dtype) return bounding_boxes.reshape(original_shape) @@ -613,10 +598,7 @@ def _clamp_rotated_bounding_boxes( if clamping_mode is None: return bounding_boxes.clone() original_shape = bounding_boxes.shape - dtype = bounding_boxes.dtype - acceptable_dtypes = [torch.float64] # Ensure consistency between CPU and GPU. - need_cast = dtype not in acceptable_dtypes - bounding_boxes = bounding_boxes.to(torch.float64) if need_cast else bounding_boxes.clone() + bounding_boxes = bounding_boxes.clone() out_boxes = ( convert_bounding_box_format( bounding_boxes, old_format=format, new_format=tv_tensors.BoundingBoxFormat.XYXYXYXY, inplace=True @@ -645,11 +627,6 @@ def _clamp_rotated_bounding_boxes( out_boxes, old_format=tv_tensors.BoundingBoxFormat.XYXYXYXY, new_format=format, inplace=True ).reshape(original_shape) - if need_cast: - if dtype in (torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64): - # Adding epsilon to ensure consistency between CPU and GPU rounding. - out_boxes.add_(1e-7).round_() - out_boxes = out_boxes.to(dtype) return out_boxes diff --git a/torchvision/tv_tensors/_bounding_boxes.py b/torchvision/tv_tensors/_bounding_boxes.py index 4ad6d978bfb..e3c1032d0de 100644 --- a/torchvision/tv_tensors/_bounding_boxes.py +++ b/torchvision/tv_tensors/_bounding_boxes.py @@ -40,10 +40,17 @@ class BoundingBoxFormat(Enum): # TODO: Once torchscript supports Enums with staticmethod # this can be put into BoundingBoxFormat as staticmethod -def is_rotated_bounding_format(format: BoundingBoxFormat) -> bool: - return ( - format == BoundingBoxFormat.XYWHR or format == BoundingBoxFormat.CXCYWHR or format == BoundingBoxFormat.XYXYXYXY - ) +def is_rotated_bounding_format(format: BoundingBoxFormat | str) -> bool: + if isinstance(format, BoundingBoxFormat): + return ( + format == BoundingBoxFormat.XYWHR + or format == BoundingBoxFormat.CXCYWHR + or format == BoundingBoxFormat.XYXYXYXY + ) + elif isinstance(format, str): + return format in ("XYWHR", "CXCYWHR", "XYXYXYXY") + else: + raise ValueError(f"format should be str or BoundingBoxFormat, got {type(format)}") # TODOBB consider making this a Literal instead. Tried briefly and got @@ -110,6 +117,8 @@ def __new__( requires_grad: bool | None = None, ) -> BoundingBoxes: tensor = cls._to_tensor(data, dtype=dtype, device=device, requires_grad=requires_grad) + if not torch.is_floating_point(tensor) and is_rotated_bounding_format(format): + raise ValueError(f"Rotated bounding boxes should be floating point tensors, got {tensor.dtype}.") return cls._wrap(tensor, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode) @classmethod