Skip to content
Merged
23 changes: 8 additions & 15 deletions test/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,13 +424,6 @@ def sample_position(values, max_value):
format = tv_tensors.BoundingBoxFormat[format]

dtype = dtype or torch.float32
int_dtype = dtype in (
torch.uint8,
torch.int8,
torch.int16,
torch.int32,
torch.int64,
)

h, w = (torch.randint(1, s, (num_boxes,)) for s in canvas_size)
y = sample_position(h, canvas_size[0])
Expand All @@ -457,14 +450,14 @@ def sample_position(values, max_value):
elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
r_rad = r * torch.pi / 180.0
cos, sin = torch.cos(r_rad), torch.sin(r_rad)
x1 = torch.round(x) if int_dtype else x
y1 = torch.round(y) if int_dtype else y
x2 = torch.round(x1 + w * cos) if int_dtype else x1 + w * cos
y2 = torch.round(y1 - w * sin) if int_dtype else y1 - w * sin
x3 = torch.round(x2 + h * sin) if int_dtype else x2 + h * sin
y3 = torch.round(y2 + h * cos) if int_dtype else y2 + h * cos
x4 = torch.round(x1 + h * sin) if int_dtype else x1 + h * sin
y4 = torch.round(y1 + h * cos) if int_dtype else y1 + h * cos
x1 = x
y1 = y
x2 = x1 + w * cos
y2 = y1 - w * sin
x3 = x2 + h * sin
y3 = y2 + h * cos
x4 = x1 + h * sin
y4 = y1 + h * cos
parts = (x1, y1, x2, y2, x3, y3, x4, y4)
else:
raise ValueError(f"Format {format} is not supported")
Expand Down
65 changes: 43 additions & 22 deletions test/test_transforms_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,13 +564,6 @@ def reference_affine_rotated_bounding_boxes_helper(

def affine_rotated_bounding_boxes(bounding_boxes):
dtype = bounding_boxes.dtype
int_dtype = dtype in (
torch.uint8,
torch.int8,
torch.int16,
torch.int32,
torch.int64,
)
device = bounding_boxes.device

# Go to float before converting to prevent precision loss in case of CXCYWHR -> XYXYXYXY and W or H is 1
Expand Down Expand Up @@ -605,18 +598,12 @@ def affine_rotated_bounding_boxes(bounding_boxes):
)

output = output[[2, 3, 0, 1, 6, 7, 4, 5]] if flip else output
if not int_dtype:
output = _parallelogram_to_bounding_boxes(output)
output = _parallelogram_to_bounding_boxes(output)

output = F.convert_bounding_box_format(
output, old_format=tv_tensors.BoundingBoxFormat.XYXYXYXY, new_format=format
)

if torch.is_floating_point(output) and int_dtype:
# It is important to round before cast.
output = torch.round(output)

# For rotated boxes, it is important to cast before clamping.
return (
F.clamp_bounding_boxes(
output.to(dtype=dtype, device=device),
Expand Down Expand Up @@ -760,6 +747,8 @@ def test_kernel_image(self, size, interpolation, use_max_size, antialias, dtype,
def test_kernel_bounding_boxes(self, format, size, use_max_size, dtype, device):
if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
return
if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
pytest.xfail("Rotated bounding boxes should be floating point tensors")

bounding_boxes = make_bounding_boxes(
format=format,
Expand Down Expand Up @@ -1212,6 +1201,8 @@ def test_kernel_image(self, dtype, device):
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_kernel_bounding_boxes(self, format, dtype, device):
if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
pytest.xfail("Rotated bounding boxes should be floating point tensors")
bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
check_kernel(
F.horizontal_flip_bounding_boxes,
Expand Down Expand Up @@ -1441,6 +1432,8 @@ def test_kernel_image(self, param, value, dtype, device):
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_kernel_bounding_boxes(self, param, value, format, dtype, device):
if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
pytest.xfail("Rotated bounding boxes should be floating point tensors")
bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
self._check_kernel(
F.affine_bounding_boxes,
Expand Down Expand Up @@ -1655,7 +1648,7 @@ def test_functional_bounding_boxes_correctness(self, format, angle, translate, s
center=center,
)

torch.testing.assert_close(actual, expected, atol=1e-5, rtol=1e-5)
torch.testing.assert_close(actual, expected, atol=1e-4, rtol=1e-4)

@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
Expand Down Expand Up @@ -1823,6 +1816,8 @@ def test_kernel_image(self, dtype, device):
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_kernel_bounding_boxes(self, format, dtype, device):
if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
pytest.xfail("Rotated bounding boxes should be floating point tensors")
bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
check_kernel(
F.vertical_flip_bounding_boxes,
Expand Down Expand Up @@ -2021,8 +2016,14 @@ def test_kernel_bounding_boxes(self, param, value, format, dtype, device):
kwargs = {param: value}
if param != "angle":
kwargs["angle"] = self._MINIMAL_AFFINE_KWARGS["angle"]
if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
pytest.xfail("Rotated bounding boxes should be floating point tensors")

bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
if tv_tensors.is_rotated_bounding_format(format):
# TODO there is a 1e-6 difference between GPU and CPU outputs
# due to clamping. To avoid failing this test, we do clamp before hand.
Comment on lines +2024 to +2025
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for writing this TODO, for GPU vs CPU it's typically OK to have differences of up to 1e-4. We should be able to pass atol and rtol to the check_kernel call, but we can address that later

bounding_boxes = F.clamp_bounding_boxes(bounding_boxes)

check_kernel(
F.rotate_bounding_boxes,
Expand Down Expand Up @@ -3236,6 +3237,8 @@ def test_kernel_image(self, param, value, dtype, device):
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_kernel_bounding_boxes(self, format, dtype, device):
if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
pytest.xfail("Rotated bounding boxes should be floating point tensors")
bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)

check_kernel(
Expand Down Expand Up @@ -3399,6 +3402,8 @@ def test_kernel_image(self, kwargs, dtype, device):
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_kernel_bounding_boxes(self, kwargs, format, dtype, device):
if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
pytest.xfail("Rotated bounding boxes should be floating point tensors")
bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, dtype=dtype, device=device)
check_kernel(F.crop_bounding_boxes, bounding_boxes, format=format, **kwargs)

Expand Down Expand Up @@ -3576,6 +3581,8 @@ def _reference_crop_bounding_boxes(self, bounding_boxes, *, top, left, height, w
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_functional_bounding_box_correctness(self, kwargs, format, dtype, device):
if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
pytest.xfail("Rotated bounding boxes should be floating point tensors")
bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, dtype=dtype, device=device)

actual = F.crop(bounding_boxes, **kwargs)
Expand All @@ -3590,6 +3597,8 @@ def test_functional_bounding_box_correctness(self, kwargs, format, dtype, device
@pytest.mark.parametrize("device", cpu_and_cuda())
@pytest.mark.parametrize("seed", list(range(5)))
def test_transform_bounding_boxes_correctness(self, output_size, format, dtype, device, seed):
if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
pytest.xfail("Rotated bounding boxes should be floating point tensors")
input_size = [s * 2 for s in output_size]
bounding_boxes = make_bounding_boxes(input_size, format=format, dtype=dtype, device=device)

Expand Down Expand Up @@ -4267,6 +4276,10 @@ def _reference_convert_bounding_box_format(self, bounding_boxes, new_format):
@pytest.mark.parametrize("device", cpu_and_cuda())
@pytest.mark.parametrize("fn_type", ["functional", "transform"])
def test_correctness(self, old_format, new_format, dtype, device, fn_type):
if not dtype.is_floating_point and (
tv_tensors.is_rotated_bounding_format(old_format) or tv_tensors.is_rotated_bounding_format(new_format)
):
pytest.xfail("Rotated bounding boxes should be floating point tensors")
bounding_boxes = make_bounding_boxes(format=old_format, dtype=dtype, device=device)

if fn_type == "functional":
Expand Down Expand Up @@ -4706,6 +4719,8 @@ def _reference_pad_bounding_boxes(self, bounding_boxes, *, padding):
@pytest.mark.parametrize("device", cpu_and_cuda())
@pytest.mark.parametrize("fn", [F.pad, transform_cls_to_functional(transforms.Pad)])
def test_bounding_boxes_correctness(self, padding, format, dtype, device, fn):
if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
pytest.xfail("Rotated bounding boxes should be floating point tensors")
bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)

actual = fn(bounding_boxes, padding=padding)
Expand Down Expand Up @@ -4876,6 +4891,8 @@ def _reference_center_crop_bounding_boxes(self, bounding_boxes, output_size):
@pytest.mark.parametrize("device", cpu_and_cuda())
@pytest.mark.parametrize("fn", [F.center_crop, transform_cls_to_functional(transforms.CenterCrop)])
def test_bounding_boxes_correctness(self, output_size, format, dtype, device, fn):
if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
pytest.xfail("Rotated bounding boxes should be floating point tensors")
bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, dtype=dtype, device=device)

actual = fn(bounding_boxes, output_size)
Expand Down Expand Up @@ -5242,6 +5259,8 @@ def perspective_bounding_boxes(bounding_boxes):
@pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_correctness_perspective_bounding_boxes(self, startpoints, endpoints, format, dtype, device):
if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
pytest.xfail("Rotated bounding boxes should be floating point tensors")
bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)

actual = F.perspective(bounding_boxes, startpoints=startpoints, endpoints=endpoints)
Expand Down Expand Up @@ -5511,6 +5530,8 @@ class TestClampBoundingBoxes:
@pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_kernel(self, format, clamping_mode, dtype, device):
if not dtype.is_floating_point and tv_tensors.is_rotated_bounding_format(format):
pytest.xfail("Rotated bounding boxes should be floating point tensors")
bounding_boxes = make_bounding_boxes(format=format, clamping_mode=clamping_mode, dtype=dtype, device=device)
check_kernel(
F.clamp_bounding_boxes,
Expand Down Expand Up @@ -5572,9 +5593,12 @@ def test_clamping_mode(self, rotated, constructor_clamping_mode, clamping_mode,

if rotated:
boxes = tv_tensors.BoundingBoxes(
[0, 0, 100, 100, 0], format="XYWHR", canvas_size=(10, 10), clamping_mode=constructor_clamping_mode
[0.0, 0.0, 100.0, 100.0, 0.0],
format="XYWHR",
canvas_size=(10, 10),
clamping_mode=constructor_clamping_mode,
)
expected_clamped_output = torch.tensor([[0, 0, 10, 10, 0]])
expected_clamped_output = torch.tensor([[0.0, 0.0, 10.0, 10.0, 0.0]])
else:
boxes = tv_tensors.BoundingBoxes(
[0, 100, 0, 100], format="XYXY", canvas_size=(10, 10), clamping_mode=constructor_clamping_mode
Expand Down Expand Up @@ -6938,14 +6962,11 @@ def test_classification_preset(image_type, label_type, dataset_return_type, to_t


@pytest.mark.parametrize("input_size", [(17, 11), (11, 17), (11, 11)])
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_parallelogram_to_bounding_boxes(input_size, dtype, device):
def test_parallelogram_to_bounding_boxes(input_size, device):
# Assert that applying `_parallelogram_to_bounding_boxes` to rotated boxes
# does not modify the input.
bounding_boxes = make_bounding_boxes(
input_size, format=tv_tensors.BoundingBoxFormat.XYXYXYXY, dtype=dtype, device=device
)
bounding_boxes = make_bounding_boxes(input_size, format=tv_tensors.BoundingBoxFormat.XYXYXYXY, device=device)
actual = _parallelogram_to_bounding_boxes(bounding_boxes)
torch.testing.assert_close(actual, bounding_boxes, rtol=0, atol=1)

Expand Down
39 changes: 34 additions & 5 deletions test/test_tv_tensors.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,39 @@ def test_bbox_instance(data, format):
)
@pytest.mark.parametrize("scripted", (False, True))
def test_bbox_format(format, is_rotated_expected, scripted):
if isinstance(format, str):
format = tv_tensors.BoundingBoxFormat[(format.upper())]

fn = tv_tensors.is_rotated_bounding_format
if scripted:
fn = torch.jit.script(fn)
assert fn(format) == is_rotated_expected


@pytest.mark.parametrize(
"format, support_integer_dtype",
[
("XYXY", True),
("XYWH", True),
("CXCYWH", True),
("XYXYXYXY", False),
("XYWHR", False),
("CXCYWHR", False),
(tv_tensors.BoundingBoxFormat.XYXY, True),
(tv_tensors.BoundingBoxFormat.XYWH, True),
(tv_tensors.BoundingBoxFormat.CXCYWH, True),
(tv_tensors.BoundingBoxFormat.XYXYXYXY, False),
(tv_tensors.BoundingBoxFormat.XYWHR, False),
(tv_tensors.BoundingBoxFormat.CXCYWHR, False),
],
)
@pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8])
def test_bbox_format_dtype(format, support_integer_dtype, input_dtype):
tensor = torch.randint(0, 32, size=(5, 2), dtype=input_dtype)
if not input_dtype.is_floating_point and not support_integer_dtype:
with pytest.raises(ValueError, match="Rotated bounding boxes should be floating point tensors"):
tv_tensors.BoundingBoxes(tensor, format=format, canvas_size=(32, 32))
else:
tv_tensors.BoundingBoxes(tensor, format=format, canvas_size=(32, 32))


def test_bbox_dim_error():
data_3d = [[[1, 2, 3, 4]]]
with pytest.raises(ValueError, match="Expected a 1D or 2D tensor, got 3D"):
Expand Down Expand Up @@ -409,5 +433,10 @@ def test_return_type_input():


def test_box_clamping_mode_default():
assert tv_tensors.BoundingBoxes([0, 0, 10, 10], format="XYXY", canvas_size=(100, 100)).clamping_mode == "soft"
assert tv_tensors.BoundingBoxes([0, 0, 10, 10, 0], format="XYWHR", canvas_size=(100, 100)).clamping_mode == "soft"
assert (
tv_tensors.BoundingBoxes([0.0, 0.0, 10.0, 10.0], format="XYXY", canvas_size=(100, 100)).clamping_mode == "soft"
)
assert (
tv_tensors.BoundingBoxes([0.0, 0.0, 10.0, 10.0, 0.0], format="XYWHR", canvas_size=(100, 100)).clamping_mode
== "soft"
)
Loading
Loading