Skip to content

Commit b7fdc86

Browse files
committed
Add more rotated boxes docs
1 parent c50f694 commit b7fdc86

File tree

3 files changed

+120
-83
lines changed

3 files changed

+120
-83
lines changed

gallery/transforms/plot_rotated_box_transforms.py

Lines changed: 106 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,7 @@
33
Transforms on Rotated Bounding Boxes
44
===============================================================
55
6-
This example illustrates how to define and use rotated bounding boxes. We'll
7-
cover how to define them, demonstrate their usage with some of the existing
8-
transforms, and finally some of their unique behavior in comparision to
9-
standard bounding boxes.
6+
This example illustrates how to define and use rotated bounding boxes.
107
118
First, a bit of setup code:
129
"""
@@ -18,7 +15,7 @@
1815

1916

2017
import torch
21-
from torchvision import tv_tensors
18+
from torchvision.tv_tensors import BoundingBoxes
2219
from torchvision.transforms import v2
2320
from helpers import plot
2421

@@ -37,16 +34,16 @@
3734
# Creating a Rotated Bounding Box
3835
# -------------------------------
3936
# Rotated bounding boxes are created by instantiating the
40-
# :class:`~torchvision.tv_tensors.BoundingBoxes` class. It's the `format`
37+
# :class:`~torchvision.tv_tensors.BoundingBoxes` class. It's the ``format``
4138
# parameter of the constructor that determines if a bounding box is rotated or
42-
# not. In this instance, we use the
43-
# :attr:`~torchvision.tv_tensors.BoundingBoxFormat` kind `CXCYWHR`. The first
44-
# two values are the `x` and `y` coordinates of the center of the bounding box.
45-
# The next two values are the `width` and `height` of the bounding box, and the
46-
# last value is the `rotation` of the bounding box.
39+
# not. In this instance, we use the CXCYWHR
40+
# :attr:`~torchvision.tv_tensors.BoundingBoxFormat`. The first two values are
41+
# the X and Y coordinates of the center of the bounding box. The next two
42+
# values are the width and height of the bounding box, and the last value is the
43+
# rotation of the bounding box, in degrees.
4744

4845

49-
orig_box = tv_tensors.BoundingBoxes(
46+
orig_box = BoundingBoxes(
5047
[
5148
[860.0, 1100, 570, 1840, -7],
5249
],
@@ -57,100 +54,134 @@
5754
plot([(orig_img, orig_box)], bbox_width=10)
5855

5956
# %%
60-
# Rotation
61-
# --------
62-
# Rotated bounding boxes maintain their rotation with respect to the image even
63-
# when the image itself is rotated through the
64-
# :class:`~torchvision.transforms.RandomRotation` transform.
57+
# Transforms illustrations
58+
# ------------------------
59+
#
60+
# Using :class:`~torchvision.transforms.RandomRotation`:
6561
rotater = v2.RandomRotation(degrees=(0, 180), expand=True)
6662
rotated_imgs = [rotater((orig_img, orig_box)) for _ in range(4)]
6763
plot([(orig_img, orig_box)] + rotated_imgs, bbox_width=10)
6864

6965
# %%
70-
# Padding
71-
# -------
72-
# Rotated bounding boxes also maintain their properties when the image is padded using
73-
# :class:`~torchvision.transforms.Pad`.
66+
# Using # :class:`~torchvision.transforms.Pad`:
7467
padded_imgs_and_boxes = [
7568
v2.Pad(padding=padding)(orig_img, orig_box)
7669
for padding in (30, 50, 100, 200)
7770
]
7871
plot([(orig_img, orig_box)] + padded_imgs_and_boxes, bbox_width=10)
7972

8073
# %%
81-
# Resizing
82-
# --------
83-
# Rotated bounding boxes are also resized along with an image in the
84-
# :class:`~torchvision.transforms.Resize` transform.
85-
#
86-
# Note that the bounding box looking bigger in the images with less pixels is
87-
# an artifact, not reality. That is merely the rasterised representation of the
88-
# bounding box's boundaries appearing bigger because we specify a fixed width of
89-
# that rasterized line. When the image is, say, only 30 pixels wide, a
90-
# line that is 3 pixels wide is relatively large.
74+
# Using :class:`~torchvision.transforms.Resize`:
9175
resized_imgs = [
9276
v2.Resize(size=size)(orig_img, orig_box)
9377
for size in (30, 50, 100, orig_img.size)
9478
]
9579
plot([(orig_img, orig_box)] + resized_imgs, bbox_width=5)
9680

9781
# %%
98-
# Perspective
99-
# -----------
100-
# The rotated bounding box is also transformed along with the image when the
101-
# perspective is transformed with :class:`~torchvision.transforms.RandomPerspective`.
102-
perspective_transformer = v2.RandomPerspective(distortion_scale=0.6, p=1.0)
103-
perspective_imgs = [perspective_transformer(orig_img, orig_box) for _ in range(4)]
104-
plot([(orig_img, orig_box)] + perspective_imgs, bbox_width=10)
105-
106-
# %%
107-
# Elastic Transform
108-
# -----------------
109-
# The rotated bounding box is appropriately unchanged when going through the
110-
# :class:`~torchvision.transforms.ElasticTransform`.
111-
elastic_imgs = [
112-
v2.ElasticTransform(alpha=alpha)(orig_img, orig_box)
113-
for alpha in (100.0, 500.0, 1000.0, 2000.0)
114-
]
115-
plot([(orig_img, orig_box)] + elastic_imgs, bbox_width=10)
116-
117-
# %%
118-
# Crop & Clamping Modes
119-
# ---------------------
120-
# The :class:`~torchvision.transforms.CenterCrop` transform selectively crops
121-
# the image on a center location. The behavior of the rotated bounding box
122-
# depends on its `clamping_mode`. We can set the `clamping_mode` in the
123-
# :class:`~torchvision.tv_tensors.BoundingBoxes` constructur, or by directly
124-
# setting it after construction as we do in the example below.
82+
# Note that the bounding box looking bigger in the images with less pixels is
83+
# an artifact, not reality. That is merely the rasterised representation of the
84+
# bounding box's boundaries appearing bigger because we specify a fixed width of
85+
# that rasterized line. When the image is, say, only 30 pixels wide, a
86+
# line that is 3 pixels wide is relatively large.
12587
#
126-
# There are two values for `clamping_mode`:
88+
# .. _clamping_mode_tuto:
12789
#
128-
# - `"soft"`: The default when constucting
129-
# :class:`~torchvision.tv_tensors.BoundingBoxes`. <Insert semantic
130-
# description for soft mode.>
131-
# - `"hard"`: <Insert semantic description for hard mode.>
90+
# Clamping Mode, and its effect on transforms
91+
# -------------------------------------------
13292
#
133-
# For standard bounding boxes, both modes behave the same. We also need to
134-
# document:
93+
# Some transforms, such as :class:`~torchvision.transforms.CenterCrop`, may
94+
# result in having the transformed bounding box partially outside of the
95+
# transformed (cropped) image. In general, this may happen on most of the
96+
# :ref:`geometric transforms <v2_api_ref>`.
13597
#
136-
# - `clamping_mode` for individual kernels.
137-
# - `clamping_mode` in :class:`~torchvision.transforms.v2.ClampBoundingBoxes`.
138-
# - the new :class:`~torchvision.transforms.v2.SetClampingMode` transform.
98+
# In such cases, the bounding box is clamped to the transformed image size based
99+
# on its ``clamping_mode`` attribute. There are three values for
100+
# ``clamping_mode``, which determines how the box is clamped after a
101+
# transformation:
139102
#
103+
# - ``None``: No clamping is applied, and the bounding box may be partially
104+
# outside of the image.
105+
# - `"hard"`: The box is clamped to the image size, such that all its corners
106+
# are within the image canvas. This potentially results in a loss of
107+
# information, and it can lead to unintuitive resuts. But may be necessary
108+
# for some applications e.g. if the model doesn't support boxes outside of
109+
# their image.
110+
# - `"soft"`: . This is an intermediate mode between ``None`` and "hard": the
111+
# box is clamped, but not as strictly as in "hard" mode. Some box dimensions
112+
# may still be outside of the image. This is the default when constucting
113+
# :class:`~torchvision.tv_tensors.BoundingBoxes`.
114+
#
115+
# .. note::
116+
#
117+
# For axis-aligned bounding boxes, the `"soft"` and `"hard"` modes behave
118+
# the same, as the bounding box is always clamped to the image size.
119+
#
120+
# Let's illustrate the clamping modes with
121+
# :class:`~torchvision.transforms.CenterCrop` transform:
122+
140123
assert orig_box.clamping_mode == "soft"
141-
hard_box = orig_box.clone()
142-
hard_box.clamping_mode = "hard"
143124

125+
box_hard_clamping = BoundingBoxes(orig_box, format=orig_box.format, canvas_size=orig_box.canvas_size, clamping_mode="hard")
126+
127+
box_no_clamping = BoundingBoxes(orig_box, format=orig_box.format, canvas_size=orig_box.canvas_size, clamping_mode=None)
128+
129+
crop_sizes = (800, 1200, 2000, orig_img.size)
144130
soft_center_crops_and_boxes = [
145131
v2.CenterCrop(size=size)(orig_img, orig_box)
146-
for size in (800, 1200, 2000, orig_img.size)
132+
for size in crop_sizes
147133
]
148134

149135
hard_center_crops_and_boxes = [
150-
v2.CenterCrop(size=size)(orig_img, hard_box)
151-
for size in (800, 1200, 2000, orig_img.size)
136+
v2.CenterCrop(size=size)(orig_img, box_hard_clamping)
137+
for size in crop_sizes
138+
]
139+
140+
no_clamping_center_crops_and_boxes = [
141+
v2.CenterCrop(size=size)(orig_img, box_no_clamping)
142+
for size in crop_sizes
152143
]
153144

154-
plot([[(orig_img, orig_box)] + soft_center_crops_and_boxes,
155-
[(orig_img, hard_box)] + hard_center_crops_and_boxes],
145+
plot([[(orig_img, box_hard_clamping)] + hard_center_crops_and_boxes,
146+
[(orig_img, orig_box)] + soft_center_crops_and_boxes,
147+
[(orig_img, box_no_clamping)] + no_clamping_center_crops_and_boxes],
156148
bbox_width=10)
149+
150+
# %%
151+
# The plot above shows the "hard" clamping mode, "soft" and ``None``, in this
152+
# order. While "soft" and ``None`` result in similar plots, they do not lead to
153+
# the exact same clamped boxes. The non-clamped boxes will show dimensions that are further away from the image:
154+
print("boxes with soft clamping:")
155+
print(soft_center_crops_and_boxes)
156+
print("boxes with no clamping:")
157+
print(no_clamping_center_crops_and_boxes)
158+
# %%
159+
#
160+
# Setting the clamping mode
161+
# --------------------------
162+
163+
# The ``clamping_mode`` attribute, which determines the clamping strategy that
164+
# is applied to a box, can be set in different ways:
165+
#
166+
# - When constructing the bounding box with its
167+
# :class:`~torchvision.tv_tensors.BoundingBoxes` constructor, as done in the example above.
168+
# - By directly setting the attribute on an existing instance, e.g. ``boxes.clamping_mode = "hard"``.
169+
# - By calling the :class:`~torchvision.transforms.v2.SetClampingMode` transform.
170+
#
171+
# Also, remember that you can always clamp the bounding box manually by
172+
# calling the :meth:`~torchvision.transforms.v2.ClampBoundingBoxes` transform!
173+
# Here's an example illustrating all of these option:
174+
175+
t = v2.Compose([
176+
v2.CenterCrop(size=(800,)), # clamps according to the current clamping_mode
177+
# attribute, in this case set by the constructor
178+
v2.SetClampingMode(None), # sets the clamping_mode attribute for future transforms
179+
v2.Pad(padding=3), # clamps according to the current clamping_mode
180+
# i.e. ``None``
181+
v2.ClampBoundingBoxes(clamping_mode="soft"), # clamps with "soft" mode.
182+
])
183+
184+
out_img, out_box = t(orig_img, orig_box)
185+
plot([(orig_img, orig_box), (out_img, out_box)], bbox_width=10)
186+
187+
# %%

torchvision/transforms/v2/_meta.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,10 @@ def transform(self, inpt: tv_tensors.BoundingBoxes, params: dict[str, Any]) -> t
2727
class ClampBoundingBoxes(Transform):
2828
"""Clamp bounding boxes to their corresponding image dimensions.
2929
30-
The clamping is done according to the bounding boxes' ``canvas_size`` meta-data.
31-
3230
Args:
33-
clamping_mode: TODOBB more docs. Default is None which relies on the input box' clamping_mode attribute.
34-
31+
clamping_mode: Default is "auto" which relies on the input box'
32+
``clamping_mode`` attribute. Read more in :ref:`clamping_mode_tuto`
33+
for more details on how to use this transform.
3534
"""
3635

3736
def __init__(self, clamping_mode: Union[CLAMPING_MODE_TYPE, str] = "auto") -> None:
@@ -57,7 +56,15 @@ def transform(self, inpt: tv_tensors.KeyPoints, params: dict[str, Any]) -> tv_te
5756

5857

5958
class SetClampingMode(Transform):
60-
"""TODOBB"""
59+
"""Sets the ``clamping_mode`` attribute of the bounding boxes for future transforms.
60+
61+
62+
63+
Args:
64+
clamping_mode: The clamping mode to set. Possible values are: "soft",
65+
"hard", or ``None``. Read more in :ref:`clamping_mode_tuto` for more
66+
details on how to use this transform.
67+
"""
6168

6269
def __init__(self, clamping_mode: CLAMPING_MODE_TYPE) -> None:
6370
super().__init__()

torchvision/tv_tensors/_bounding_boxes.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,6 @@ def is_rotated_bounding_format(format: BoundingBoxFormat | str) -> bool:
5959
# This should ideally be a Literal, but torchscript fails.
6060
CLAMPING_MODE_TYPE = Optional[str]
6161

62-
# TODOBB All docs. Add any new API to rst files, add tutorial[s].
63-
6462

6563
class BoundingBoxes(TVTensor):
6664
""":class:`torch.Tensor` subclass for bounding boxes with shape ``[N, K]``.
@@ -78,7 +76,8 @@ class BoundingBoxes(TVTensor):
7876
data: Any data that can be turned into a tensor with :func:`torch.as_tensor`.
7977
format (BoundingBoxFormat, str): Format of the bounding box.
8078
canvas_size (two-tuple of ints): Height and width of the corresponding image or video.
81-
clamping_mode: TODOBB
79+
clamping_mode: The clamping mode to use when applying transforms that may result in bounding boxes
80+
partially outside of the image. Possible values are: "soft", "hard", or ``None``. Read more in :ref:`clamping_mode_tuto`.
8281
dtype (torch.dtype, optional): Desired data type of the bounding box. If omitted, will be inferred from
8382
``data``.
8483
device (torch.device, optional): Desired device of the bounding box. If omitted and ``data`` is a

0 commit comments

Comments
 (0)