Skip to content

Commit b818d32

Browse files
authored
Add more rotated boxes docs (#9144)
1 parent c50f694 commit b818d32

File tree

4 files changed

+143
-83
lines changed

4 files changed

+143
-83
lines changed

gallery/transforms/plot_rotated_box_transforms.py

Lines changed: 114 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@
33
Transforms on Rotated Bounding Boxes
44
===============================================================
55
6-
This example illustrates how to define and use rotated bounding boxes. We'll
7-
cover how to define them, demonstrate their usage with some of the existing
8-
transforms, and finally some of their unique behavior in comparision to
9-
standard bounding boxes.
6+
This example illustrates how to define and use rotated bounding boxes.
7+
8+
.. note::
9+
Support for rotated bounding boxes was released in TorchVision 0.23 and is
10+
currently a BETA feature. We don't expect the API to change, but there may
11+
be some rare edge-cases. If you find any issues, please report them on
12+
our bug tracker: https://github.com/pytorch/vision/issues?q=is:open+is:issue
1013
1114
First, a bit of setup code:
1215
"""
@@ -18,7 +21,7 @@
1821

1922

2023
import torch
21-
from torchvision import tv_tensors
24+
from torchvision.tv_tensors import BoundingBoxes
2225
from torchvision.transforms import v2
2326
from helpers import plot
2427

@@ -37,16 +40,16 @@
3740
# Creating a Rotated Bounding Box
3841
# -------------------------------
3942
# Rotated bounding boxes are created by instantiating the
40-
# :class:`~torchvision.tv_tensors.BoundingBoxes` class. It's the `format`
43+
# :class:`~torchvision.tv_tensors.BoundingBoxes` class. It's the ``format``
4144
# parameter of the constructor that determines if a bounding box is rotated or
42-
# not. In this instance, we use the
43-
# :attr:`~torchvision.tv_tensors.BoundingBoxFormat` kind `CXCYWHR`. The first
44-
# two values are the `x` and `y` coordinates of the center of the bounding box.
45-
# The next two values are the `width` and `height` of the bounding box, and the
46-
# last value is the `rotation` of the bounding box.
45+
# not. In this instance, we use the CXCYWHR
46+
# :attr:`~torchvision.tv_tensors.BoundingBoxFormat`. The first two values are
47+
# the X and Y coordinates of the center of the bounding box. The next two
48+
# values are the width and height of the bounding box, and the last value is the
49+
# rotation of the bounding box, in degrees.
4750

4851

49-
orig_box = tv_tensors.BoundingBoxes(
52+
orig_box = BoundingBoxes(
5053
[
5154
[860.0, 1100, 570, 1840, -7],
5255
],
@@ -57,100 +60,136 @@
5760
plot([(orig_img, orig_box)], bbox_width=10)
5861

5962
# %%
60-
# Rotation
61-
# --------
62-
# Rotated bounding boxes maintain their rotation with respect to the image even
63-
# when the image itself is rotated through the
64-
# :class:`~torchvision.transforms.RandomRotation` transform.
63+
# Transforms illustrations
64+
# ------------------------
65+
#
66+
# Using :class:`~torchvision.transforms.RandomRotation`:
6567
rotater = v2.RandomRotation(degrees=(0, 180), expand=True)
6668
rotated_imgs = [rotater((orig_img, orig_box)) for _ in range(4)]
6769
plot([(orig_img, orig_box)] + rotated_imgs, bbox_width=10)
6870

6971
# %%
70-
# Padding
71-
# -------
72-
# Rotated bounding boxes also maintain their properties when the image is padded using
73-
# :class:`~torchvision.transforms.Pad`.
72+
# Using :class:`~torchvision.transforms.Pad`:
7473
padded_imgs_and_boxes = [
7574
v2.Pad(padding=padding)(orig_img, orig_box)
7675
for padding in (30, 50, 100, 200)
7776
]
7877
plot([(orig_img, orig_box)] + padded_imgs_and_boxes, bbox_width=10)
7978

8079
# %%
81-
# Resizing
82-
# --------
83-
# Rotated bounding boxes are also resized along with an image in the
84-
# :class:`~torchvision.transforms.Resize` transform.
85-
#
86-
# Note that the bounding box looking bigger in the images with less pixels is
87-
# an artifact, not reality. That is merely the rasterised representation of the
88-
# bounding box's boundaries appearing bigger because we specify a fixed width of
89-
# that rasterized line. When the image is, say, only 30 pixels wide, a
90-
# line that is 3 pixels wide is relatively large.
80+
# Using :class:`~torchvision.transforms.Resize`:
9181
resized_imgs = [
9282
v2.Resize(size=size)(orig_img, orig_box)
9383
for size in (30, 50, 100, orig_img.size)
9484
]
9585
plot([(orig_img, orig_box)] + resized_imgs, bbox_width=5)
9686

9787
# %%
98-
# Perspective
99-
# -----------
100-
# The rotated bounding box is also transformed along with the image when the
101-
# perspective is transformed with :class:`~torchvision.transforms.RandomPerspective`.
102-
perspective_transformer = v2.RandomPerspective(distortion_scale=0.6, p=1.0)
103-
perspective_imgs = [perspective_transformer(orig_img, orig_box) for _ in range(4)]
104-
plot([(orig_img, orig_box)] + perspective_imgs, bbox_width=10)
105-
106-
# %%
107-
# Elastic Transform
108-
# -----------------
109-
# The rotated bounding box is appropriately unchanged when going through the
110-
# :class:`~torchvision.transforms.ElasticTransform`.
111-
elastic_imgs = [
112-
v2.ElasticTransform(alpha=alpha)(orig_img, orig_box)
113-
for alpha in (100.0, 500.0, 1000.0, 2000.0)
114-
]
115-
plot([(orig_img, orig_box)] + elastic_imgs, bbox_width=10)
116-
117-
# %%
118-
# Crop & Clamping Modes
119-
# ---------------------
120-
# The :class:`~torchvision.transforms.CenterCrop` transform selectively crops
121-
# the image on a center location. The behavior of the rotated bounding box
122-
# depends on its `clamping_mode`. We can set the `clamping_mode` in the
123-
# :class:`~torchvision.tv_tensors.BoundingBoxes` constructur, or by directly
124-
# setting it after construction as we do in the example below.
88+
# Note that the bounding box looking bigger in the images with less pixels is
89+
# an artifact, not reality. That is merely the rasterised representation of the
90+
# bounding box's boundaries appearing bigger because we specify a fixed width of
91+
# that rasterized line. When the image is, say, only 30 pixels wide, a
92+
# line that is 3 pixels wide is relatively large.
12593
#
126-
# There are two values for `clamping_mode`:
94+
# .. _clamping_mode_tuto:
12795
#
128-
# - `"soft"`: The default when constucting
129-
# :class:`~torchvision.tv_tensors.BoundingBoxes`. <Insert semantic
130-
# description for soft mode.>
131-
# - `"hard"`: <Insert semantic description for hard mode.>
96+
# Clamping Mode, and its effect on transforms
97+
# -------------------------------------------
13298
#
133-
# For standard bounding boxes, both modes behave the same. We also need to
134-
# document:
99+
# Some transforms, such as :class:`~torchvision.transforms.CenterCrop`, may
100+
# result in having the transformed bounding box partially outside of the
101+
# transformed (cropped) image. In general, this may happen on most of the
102+
# :ref:`geometric transforms <v2_api_ref>`.
135103
#
136-
# - `clamping_mode` for individual kernels.
137-
# - `clamping_mode` in :class:`~torchvision.transforms.v2.ClampBoundingBoxes`.
138-
# - the new :class:`~torchvision.transforms.v2.SetClampingMode` transform.
104+
# In such cases, the bounding box is clamped to the transformed image size based
105+
# on its ``clamping_mode`` attribute. There are three values for
106+
# ``clamping_mode``, which determines how the box is clamped after a
107+
# transformation:
139108
#
109+
# - ``None``: No clamping is applied, and the bounding box may be partially
110+
# outside of the image.
111+
# - `"hard"`: The box is clamped to the image size, such that all its corners
112+
# are within the image canvas. This potentially results in a loss of
113+
# information, and it can lead to unintuitive resuts. But may be necessary
114+
# for some applications e.g. if the model doesn't support boxes outside of
115+
# their image.
116+
# - `"soft"`: . This is an intermediate mode between ``None`` and "hard": the
117+
# box is clamped, but not as strictly as in "hard" mode. Some box dimensions
118+
# may still be outside of the image. This is the default when constucting
119+
# :class:`~torchvision.tv_tensors.BoundingBoxes`.
120+
#
121+
# .. note::
122+
#
123+
# For axis-aligned bounding boxes, the `"soft"` and `"hard"` modes behave
124+
# the same, as the bounding box is always clamped to the image size.
125+
#
126+
# Let's illustrate the clamping modes with
127+
# :class:`~torchvision.transforms.CenterCrop` transform:
128+
140129
assert orig_box.clamping_mode == "soft"
141-
hard_box = orig_box.clone()
142-
hard_box.clamping_mode = "hard"
143130

131+
box_hard_clamping = BoundingBoxes(orig_box, format=orig_box.format, canvas_size=orig_box.canvas_size, clamping_mode="hard")
132+
133+
box_no_clamping = BoundingBoxes(orig_box, format=orig_box.format, canvas_size=orig_box.canvas_size, clamping_mode=None)
134+
135+
crop_sizes = (800, 1200, 2000, orig_img.size)
144136
soft_center_crops_and_boxes = [
145137
v2.CenterCrop(size=size)(orig_img, orig_box)
146-
for size in (800, 1200, 2000, orig_img.size)
138+
for size in crop_sizes
147139
]
148140

149141
hard_center_crops_and_boxes = [
150-
v2.CenterCrop(size=size)(orig_img, hard_box)
151-
for size in (800, 1200, 2000, orig_img.size)
142+
v2.CenterCrop(size=size)(orig_img, box_hard_clamping)
143+
for size in crop_sizes
144+
]
145+
146+
no_clamping_center_crops_and_boxes = [
147+
v2.CenterCrop(size=size)(orig_img, box_no_clamping)
148+
for size in crop_sizes
152149
]
153150

154-
plot([[(orig_img, orig_box)] + soft_center_crops_and_boxes,
155-
[(orig_img, hard_box)] + hard_center_crops_and_boxes],
151+
plot([[(orig_img, box_hard_clamping)] + hard_center_crops_and_boxes,
152+
[(orig_img, orig_box)] + soft_center_crops_and_boxes,
153+
[(orig_img, box_no_clamping)] + no_clamping_center_crops_and_boxes],
156154
bbox_width=10)
155+
156+
# %%
157+
# The plot above shows the "hard" clamping mode, "soft" and ``None``, in this
158+
# order. While "soft" and ``None`` result in similar plots, they do not lead to
159+
# the exact same clamped boxes. The non-clamped boxes will show dimensions that are further away from the image:
160+
print("boxes with soft clamping:")
161+
print(soft_center_crops_and_boxes)
162+
print()
163+
print("boxes with no clamping:")
164+
print(no_clamping_center_crops_and_boxes)
165+
166+
# %%
167+
#
168+
# Setting the clamping mode
169+
# --------------------------
170+
#
171+
# The ``clamping_mode`` attribute, which determines the clamping strategy that
172+
# is applied to a box, can be set in different ways:
173+
#
174+
# - When constructing the bounding box with its
175+
# :class:`~torchvision.tv_tensors.BoundingBoxes` constructor, as done in the example above.
176+
# - By directly setting the attribute on an existing instance, e.g. ``boxes.clamping_mode = "hard"``.
177+
# - By calling the :class:`~torchvision.transforms.v2.SetClampingMode` transform.
178+
#
179+
# Also, remember that you can always clamp the bounding box manually by
180+
# calling the :meth:`~torchvision.transforms.v2.ClampBoundingBoxes` transform!
181+
# Here's an example illustrating all of these option:
182+
183+
t = v2.Compose([
184+
v2.CenterCrop(size=(800,)), # clamps according to the current clamping_mode
185+
# attribute, in this case set by the constructor
186+
v2.SetClampingMode(None), # sets the clamping_mode attribute for future transforms
187+
v2.Pad(padding=3), # clamps according to the current clamping_mode
188+
# i.e. ``None``
189+
v2.ClampBoundingBoxes(clamping_mode="soft"), # clamps with "soft" mode.
190+
])
191+
192+
out_img, out_box = t(orig_img, orig_box)
193+
plot([(orig_img, orig_box), (out_img, out_box)], bbox_width=10)
194+
195+
# %%

torchvision/transforms/v2/_meta.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,10 @@ def transform(self, inpt: tv_tensors.BoundingBoxes, params: dict[str, Any]) -> t
2727
class ClampBoundingBoxes(Transform):
2828
"""Clamp bounding boxes to their corresponding image dimensions.
2929
30-
The clamping is done according to the bounding boxes' ``canvas_size`` meta-data.
31-
3230
Args:
33-
clamping_mode: TODOBB more docs. Default is None which relies on the input box' clamping_mode attribute.
34-
31+
clamping_mode: Default is "auto" which relies on the input box'
32+
``clamping_mode`` attribute. Read more in :ref:`clamping_mode_tuto`
33+
for more details on how to use this transform.
3534
"""
3635

3736
def __init__(self, clamping_mode: Union[CLAMPING_MODE_TYPE, str] = "auto") -> None:
@@ -57,7 +56,15 @@ def transform(self, inpt: tv_tensors.KeyPoints, params: dict[str, Any]) -> tv_te
5756

5857

5958
class SetClampingMode(Transform):
60-
"""TODOBB"""
59+
"""Sets the ``clamping_mode`` attribute of the bounding boxes for future transforms.
60+
61+
62+
63+
Args:
64+
clamping_mode: The clamping mode to set. Possible values are: "soft",
65+
"hard", or ``None``. Read more in :ref:`clamping_mode_tuto` for more
66+
details on how to use this transform.
67+
"""
6168

6269
def __init__(self, clamping_mode: CLAMPING_MODE_TYPE) -> None:
6370
super().__init__()

torchvision/tv_tensors/_bounding_boxes.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,17 @@ def is_rotated_bounding_format(format: BoundingBoxFormat | str) -> bool:
5959
# This should ideally be a Literal, but torchscript fails.
6060
CLAMPING_MODE_TYPE = Optional[str]
6161

62-
# TODOBB All docs. Add any new API to rst files, add tutorial[s].
63-
6462

6563
class BoundingBoxes(TVTensor):
6664
""":class:`torch.Tensor` subclass for bounding boxes with shape ``[N, K]``.
6765
66+
.. note::
67+
Support for rotated bounding boxes was released in TorchVision 0.23 and
68+
is currently a BETA feature. We don't expect the API to change, but
69+
there may be some rare edge-cases. If you find any issues, please report
70+
them on our bug tracker:
71+
https://github.com/pytorch/vision/issues?q=is:open+is:issue
72+
6873
Where ``N`` is the number of bounding boxes
6974
and ``K`` is 4 for unrotated boxes, and 5 or 8 for rotated boxes.
7075
@@ -78,7 +83,8 @@ class BoundingBoxes(TVTensor):
7883
data: Any data that can be turned into a tensor with :func:`torch.as_tensor`.
7984
format (BoundingBoxFormat, str): Format of the bounding box.
8085
canvas_size (two-tuple of ints): Height and width of the corresponding image or video.
81-
clamping_mode: TODOBB
86+
clamping_mode: The clamping mode to use when applying transforms that may result in bounding boxes
87+
partially outside of the image. Possible values are: "soft", "hard", or ``None``. Read more in :ref:`clamping_mode_tuto`.
8288
dtype (torch.dtype, optional): Desired data type of the bounding box. If omitted, will be inferred from
8389
``data``.
8490
device (torch.device, optional): Desired device of the bounding box. If omitted and ``data`` is a

torchvision/tv_tensors/_keypoints.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,14 @@
1111
class KeyPoints(TVTensor):
1212
""":class:`torch.Tensor` subclass for tensors with shape ``[..., 2]`` that represent points in an image.
1313
14+
.. note::
15+
Support for keypoints was released in TorchVision 0.23 and is currently
16+
a BETA feature. We don't expect the API to change, but there may be some
17+
rare edge-cases. If you find any issues, please report them on our bug
18+
tracker: https://github.com/pytorch/vision/issues?q=is:open+is:issue
19+
Each point is represented by its X and Y coordinates along the width and
20+
height dimensions, respectively.
21+
1422
Each point is represented by its X and Y coordinates along the width and height dimensions, respectively.
1523
1624
KeyPoints may represent any object that can be represented by sequences of 2D points:

0 commit comments

Comments
 (0)