Skip to content

Commit 0e1caea

Browse files
committed
Address bbox to keypoint conversion
1 parent 211acf2 commit 0e1caea

File tree

5 files changed

+47
-55
lines changed

5 files changed

+47
-55
lines changed

docs/source/transforms.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,7 @@ functionals
459459
v2.functional.to_pil_image
460460
v2.functional.to_dtype
461461
v2.functional.convert_bounding_box_format
462+
v2.functional.convert_bounding_boxes_to_keypoints
462463

463464

464465
Deprecated

test/test_transforms_v2.py

Lines changed: 34 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6008,7 +6008,9 @@ def test_transform(self, make_input, dtype, device):
60086008
"will degenerate to that anyway."
60096009
)
60106010

6011-
torch.manual_seed(1) # TODOKP why is this needed now??
6011+
# TODO needed to add seed after KeyPoints PR, not sure why? failure
6012+
# wasn't really significant anyway.
6013+
torch.manual_seed(1)
60126014
check_transform(
60136015
transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.25),
60146016
make_input(dtype=dtype, device=device),
@@ -7194,36 +7196,35 @@ def test_no_valid_input(self, query):
71947196
with pytest.raises(TypeError, match="No image"):
71957197
query(["blah"])
71967198

7197-
# TODOKP this is tested here in TestUtils but defined in meta
7198-
@pytest.mark.parametrize(
7199-
"boxes",
7200-
[
7201-
tv_tensors.BoundingBoxes(torch.tensor([[1.0, 1.0, 2.0, 2.0]]), format="XYXY", canvas_size=(4, 4)),
7202-
tv_tensors.BoundingBoxes(torch.tensor([[1.0, 1.0, 1.0, 1.0]]), format="XYWH", canvas_size=(4, 4)),
7203-
tv_tensors.BoundingBoxes(torch.tensor([[1.5, 1.5, 1.0, 1.0]]), format="CXCYWH", canvas_size=(4, 4)),
7204-
tv_tensors.BoundingBoxes(torch.tensor([[1.5, 1.5, 1.0, 1.0, 45]]), format="CXCYWHR", canvas_size=(4, 4)),
7205-
tv_tensors.BoundingBoxes(torch.tensor([[1.0, 1.0, 1.0, 1.0, 45.0]]), format="XYWHR", canvas_size=(4, 4)),
7206-
tv_tensors.BoundingBoxes(
7207-
torch.tensor([[1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0]]), format="XYXYXYXY", canvas_size=(4, 4)
7208-
),
7209-
],
7210-
)
7211-
def test_convert_bounding_boxes_to_points(self, boxes: tv_tensors.BoundingBoxes):
7212-
kp = F.convert_bounding_boxes_to_points(boxes)
7213-
assert kp.shape == (boxes.shape[0], 4, 2)
7214-
assert kp.dtype == boxes.dtype
7215-
7216-
# We manually convert the kp back into a BoundingBoxes, and convert that
7217-
# bbox back into the original `boxes` format to compare against it.
7218-
if F._meta.is_rotated_bounding_box_format(boxes.format):
7219-
reconverted = kp.reshape(-1, 8)
7220-
intermediate_format = tv_tensors.BoundingBoxFormat.XYXYXYXY
7221-
else:
7222-
reconverted = torch.cat([kp[..., 0, :], kp[..., 2, :]], dim=-1)
7223-
intermediate_format = tv_tensors.BoundingBoxFormat.XYXY
7199+
@pytest.mark.parametrize(
7200+
"boxes",
7201+
[
7202+
tv_tensors.BoundingBoxes(torch.tensor([[1.0, 1.0, 2.0, 2.0]]), format="XYXY", canvas_size=(4, 4)),
7203+
tv_tensors.BoundingBoxes(torch.tensor([[1.0, 1.0, 1.0, 1.0]]), format="XYWH", canvas_size=(4, 4)),
7204+
tv_tensors.BoundingBoxes(torch.tensor([[1.5, 1.5, 1.0, 1.0]]), format="CXCYWH", canvas_size=(4, 4)),
7205+
tv_tensors.BoundingBoxes(torch.tensor([[1.5, 1.5, 1.0, 1.0, 45]]), format="CXCYWHR", canvas_size=(4, 4)),
7206+
tv_tensors.BoundingBoxes(torch.tensor([[1.0, 1.0, 1.0, 1.0, 45.0]]), format="XYWHR", canvas_size=(4, 4)),
7207+
tv_tensors.BoundingBoxes(
7208+
torch.tensor([[1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0]]), format="XYXYXYXY", canvas_size=(4, 4)
7209+
),
7210+
],
7211+
)
7212+
def test_convert_bounding_boxes_to_keypoints(boxes: tv_tensors.BoundingBoxes):
7213+
kp = F.convert_bounding_boxes_to_keypoints(boxes)
7214+
assert kp.shape == (boxes.shape[0], 4, 2)
7215+
assert kp.dtype == boxes.dtype
7216+
7217+
# We manually convert the kp back into a BoundingBoxes, and convert that
7218+
# bbox back into the original `boxes` format to compare against it.
7219+
if F._meta.is_rotated_bounding_box_format(boxes.format):
7220+
reconverted = kp.reshape(-1, 8)
7221+
intermediate_format = tv_tensors.BoundingBoxFormat.XYXYXYXY
7222+
else:
7223+
reconverted = torch.cat([kp[..., 0, :], kp[..., 2, :]], dim=-1)
7224+
intermediate_format = tv_tensors.BoundingBoxFormat.XYXY
72247225

7225-
reconverted_bbox = F.convert_bounding_box_format(
7226-
tv_tensors.BoundingBoxes(reconverted, format=intermediate_format, canvas_size=kp.canvas_size),
7227-
new_format=boxes.format,
7228-
)
7229-
assert_equal(reconverted_bbox, boxes, atol=1e-5, rtol=0)
7226+
reconverted_bbox = F.convert_bounding_box_format(
7227+
tv_tensors.BoundingBoxes(reconverted, format=intermediate_format, canvas_size=kp.canvas_size),
7228+
new_format=boxes.format,
7229+
)
7230+
assert_equal(reconverted_bbox, boxes, atol=1e-5, rtol=0)

torchvision/transforms/v2/functional/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
clamp_bounding_boxes,
77
clamp_keypoints,
88
convert_bounding_box_format,
9-
convert_bounding_boxes_to_points, #TODOKP also needs docs
9+
convert_bounding_boxes_to_keypoints,
1010
get_dimensions_image,
1111
get_dimensions_video,
1212
get_dimensions,

torchvision/transforms/v2/functional/_geometry.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def horizontal_flip_keypoints(keypoints: torch.Tensor, canvas_size: tuple[int, i
7070
shape = keypoints.shape
7171
keypoints = keypoints.clone().reshape(-1, 2)
7272
keypoints[..., 0] = keypoints[..., 0].sub_(canvas_size[1]).neg_()
73-
return keypoints.reshape(shape)
73+
return clamp_keypoints(keypoints.reshape(shape), canvas_size=canvas_size)
7474

7575

7676
@_register_kernel_internal(horizontal_flip, tv_tensors.KeyPoints, tv_tensor_wrapper=False)
@@ -164,7 +164,7 @@ def vertical_flip_keypoints(keypoints: torch.Tensor, canvas_size: tuple[int, int
164164
shape = keypoints.shape
165165
keypoints = keypoints.clone().reshape(-1, 2)
166166
keypoints[..., 1] = keypoints[..., 1].sub_(canvas_size[0]).neg_()
167-
return keypoints.reshape(shape)
167+
return clamp_keypoints(keypoints.reshape(shape), canvas_size=canvas_size)
168168

169169

170170
def vertical_flip_bounding_boxes(

torchvision/transforms/v2/functional/_meta.py

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -189,40 +189,30 @@ def _xyxyxyxy_to_keypoints(bounding_boxes: torch.Tensor) -> torch.Tensor:
189189
return bounding_boxes[:, [[0, 1], [2, 3], [4, 5], [6, 7]]]
190190

191191

192-
# TODOKP Should this be in the box ops? Or in utils? rename points->keypoints.
193-
def convert_bounding_boxes_to_points(bounding_boxes: tv_tensors.BoundingBoxes) -> tv_tensors.KeyPoints:
192+
# Note: this doesn't have a corresponding transforms class.
193+
def convert_bounding_boxes_to_keypoints(bounding_boxes: tv_tensors.BoundingBoxes) -> tv_tensors.KeyPoints:
194194
"""Convert a set of bounding boxes to its edge points.
195195
196-
.. note::
197-
198-
This handles rotated :class:`tv_tensors.BoundingBoxes` formats
199-
by first converting them to XYXYXYXY format.
200-
201-
Due to floating-point approximation, this may not be an exact computation.
202-
203196
Args:
204197
bounding_boxes (tv_tensors.BoundingBoxes): A set of ``N`` bounding boxes (of shape ``[N, 4]``)
205198
206199
Returns:
207200
tv_tensors.KeyPoints: The edges, as a polygon of shape ``[N, 4, 2]``
208201
"""
209202
if is_rotated_bounding_box_format(bounding_boxes.format):
210-
# We are working on a rotated bounding box
211-
bbox = _convert_bounding_box_format(
212-
bounding_boxes.as_subclass(torch.Tensor),
213-
old_format=bounding_boxes.format,
214-
new_format=BoundingBoxFormat.XYXYXYXY,
215-
inplace=False,
216-
)
217-
return tv_tensors.KeyPoints(_xyxyxyxy_to_keypoints(bbox), canvas_size=bounding_boxes.canvas_size)
203+
intermediate_format = BoundingBoxFormat.XYXYXYXY
204+
to_keypoints = _xyxyxyxy_to_keypoints
205+
else:
206+
intermediate_format = BoundingBoxFormat.XYXY
207+
to_keypoints = _xyxy_to_keypoints
218208

219209
bbox = _convert_bounding_box_format(
220210
bounding_boxes.as_subclass(torch.Tensor),
221211
old_format=bounding_boxes.format,
222-
new_format=BoundingBoxFormat.XYXY,
212+
new_format=intermediate_format,
223213
inplace=False,
224214
)
225-
return tv_tensors.KeyPoints(_xyxy_to_keypoints(bbox), canvas_size=bounding_boxes.canvas_size)
215+
return tv_tensors.KeyPoints(to_keypoints(bbox), canvas_size=bounding_boxes.canvas_size)
226216

227217

228218
def _cxcywhr_to_xywhr(cxcywhr: torch.Tensor, inplace: bool) -> torch.Tensor:

0 commit comments

Comments
 (0)