Skeleton for rotated bounding box tutorial (pytorch#9140)

scotts · AntoineSimoulin · commit 2bd6f415e0a0 · 2025-07-09T14:03:57.000-07:00
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -87,6 +87,7 @@ def __init__(self, src_dir):
         "plot_transforms_illustrations.py",
         "plot_transforms_e2e.py",
         "plot_cutmix_mixup.py",
+        "plot_rotated_box_transforms.py",
         "plot_custom_transforms.py",
         "plot_tv_tensors.py",
         "plot_custom_tv_tensors.py",
diff --git a/gallery/assets/leaning_tower.jpg b/gallery/assets/leaning_tower.jpg
diff --git a/gallery/transforms/helpers.py b/gallery/transforms/helpers.py
@@ -2,10 +2,11 @@
 import torch
 from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks
 from torchvision import tv_tensors
+from torchvision.transforms import v2
 from torchvision.transforms.v2 import functional as F
 
 
-def plot(imgs, row_title=None, **imshow_kwargs):
+def plot(imgs, row_title=None, bbox_width=3, **imshow_kwargs):
     if not isinstance(imgs[0], list):
         # Make a 2d grid even if there's just 1 row
         imgs = [imgs]
@@ -24,6 +25,11 @@ def plot(imgs, row_title=None, **imshow_kwargs):
                     masks = target.get("masks")
                 elif isinstance(target, tv_tensors.BoundingBoxes):
                     boxes = target
+
+                    # Conversion necessary because draw_bounding_boxes() only
+                    # work with this specific format.
+                    if tv_tensors.is_rotated_bounding_format(boxes.format):
+                        boxes = v2.ConvertBoundingBoxFormat("xyxyxyxy")(boxes)
                 else:
                     raise ValueError(f"Unexpected target type: {type(target)}")
             img = F.to_image(img)
@@ -35,7 +41,7 @@ def plot(imgs, row_title=None, **imshow_kwargs):
 
             img = F.to_dtype(img, torch.uint8, scale=True)
             if boxes is not None:
-                img = draw_bounding_boxes(img, boxes, colors="yellow", width=3)
+                img = draw_bounding_boxes(img, boxes, colors="yellow", width=bbox_width)
             if masks is not None:
                 img = draw_segmentation_masks(img, masks.to(torch.bool), colors=["green"] * masks.shape[0], alpha=.65)
 
diff --git a/gallery/transforms/plot_rotated_box_transforms.py b/gallery/transforms/plot_rotated_box_transforms.py
@@ -0,0 +1,156 @@
+"""
+===============================================================
+Transforms on Rotated Bounding Boxes
+===============================================================
+
+This example illustrates how to define and use rotated bounding boxes. We'll
+cover how to define them, demonstrate their usage with some of the existing
+transforms, and finally some of their unique behavior in comparision to
+standard bounding boxes.
+
+First, a bit of setup code:
+"""
+
+# %%
+from PIL import Image
+from pathlib import Path
+import matplotlib.pyplot as plt
+
+
+import torch
+from torchvision import tv_tensors
+from torchvision.transforms import v2
+from helpers import plot
+
+plt.rcParams["figure.figsize"] = [10, 5]
+plt.rcParams["savefig.bbox"] = "tight"
+
+# if you change the seed, make sure that the randomly-applied transforms
+# properly show that the image can be both transformed and *not* transformed!
+torch.manual_seed(0)
+
+# If you're trying to run that on Colab, you can download the assets and the
+# helpers from https://github.com/pytorch/vision/tree/main/gallery/
+orig_img = Image.open(Path('../assets') / 'leaning_tower.jpg')
+
+# %%
+# Creating a Rotated Bounding Box
+# -------------------------------
+# Rotated bounding boxes are created by instantiating the
+# :class:`~torchvision.tv_tensors.BoundingBoxes` class. It's the `format`
+# parameter of the constructor that determines if a bounding box is rotated or
+# not. In this instance, we use the
+# :attr:`~torchvision.tv_tensors.BoundingBoxFormat` kind `CXCYWHR`. The first
+# two values are the `x` and `y` coordinates of the center of the bounding box.
+# The next two values are the `width` and `height` of the bounding box, and the
+# last value is the `rotation` of the bounding box.
+
+
+orig_box = tv_tensors.BoundingBoxes(
+    [
+        [860.0, 1100, 570, 1840, -7],
+    ],
+    format="CXCYWHR",
+    canvas_size=(orig_img.size[1], orig_img.size[0]),
+)
+
+plot([(orig_img, orig_box)], bbox_width=10)
+
+# %%
+# Rotation
+# --------
+# Rotated bounding boxes maintain their rotation with respect to the image even
+# when the image itself is rotated through the
+# :class:`~torchvision.transforms.RandomRotation` transform.
+rotater = v2.RandomRotation(degrees=(0, 180), expand=True)
+rotated_imgs = [rotater((orig_img, orig_box)) for _ in range(4)]
+plot([(orig_img, orig_box)] + rotated_imgs, bbox_width=10)
+
+# %%
+# Padding
+# -------
+# Rotated bounding boxes also maintain their properties when the image is padded using
+# :class:`~torchvision.transforms.Pad`.
+padded_imgs_and_boxes = [
+    v2.Pad(padding=padding)(orig_img, orig_box)
+    for padding in (30, 50, 100, 200)
+]
+plot([(orig_img, orig_box)] + padded_imgs_and_boxes, bbox_width=10)
+
+# %%
+# Resizing
+# --------
+# Rotated bounding boxes are also resized along with an image in the
+# :class:`~torchvision.transforms.Resize` transform.
+#
+# Note that the bounding box looking bigger in the images with less pixels is
+# an artifact, not reality. That is merely the rasterised representation of the
+# bounding box's boundaries appearing bigger because we specify a fixed width of
+# that rasterized line. When the image is, say, only 30 pixels wide, a
+# line that is 3 pixels wide is relatively large.
+resized_imgs = [
+    v2.Resize(size=size)(orig_img, orig_box)
+    for size in (30, 50, 100, orig_img.size)
+]
+plot([(orig_img, orig_box)] + resized_imgs, bbox_width=5)
+
+# %%
+# Perspective
+# -----------
+# The rotated bounding box is also transformed along with the image when the
+# perspective is transformed with :class:`~torchvision.transforms.RandomPerspective`.
+perspective_transformer = v2.RandomPerspective(distortion_scale=0.6, p=1.0)
+perspective_imgs = [perspective_transformer(orig_img, orig_box) for _ in range(4)]
+plot([(orig_img, orig_box)] + perspective_imgs, bbox_width=10)
+
+# %%
+# Elastic Transform
+# -----------------
+# The rotated bounding box is appropriately unchanged when going through the
+# :class:`~torchvision.transforms.ElasticTransform`.
+elastic_imgs = [
+    v2.ElasticTransform(alpha=alpha)(orig_img, orig_box)
+    for alpha in (100.0, 500.0, 1000.0, 2000.0)
+]
+plot([(orig_img, orig_box)] + elastic_imgs, bbox_width=10)
+
+# %%
+# Crop & Clamping Modes
+# ---------------------
+# The :class:`~torchvision.transforms.CenterCrop` transform selectively crops
+# the image on a center location. The behavior of the rotated bounding box
+# depends on its `clamping_mode`. We can set the `clamping_mode` in the
+# :class:`~torchvision.tv_tensors.BoundingBoxes` constructur, or by directly
+# setting it after construction as we do in the example below.
+#
+# There are two values for `clamping_mode`:
+#
+#  - `"soft"`: The default when constucting
+#    :class:`~torchvision.tv_tensors.BoundingBoxes`. <Insert semantic
+#    description for soft mode.>
+#  - `"hard"`: <Insert semantic description for hard mode.>
+#
+# For standard bounding boxes, both modes behave the same. We also need to
+# document:
+#
+#  - `clamping_mode` for individual kernels.
+#  - `clamping_mode` in :class:`~torchvision.transforms.v2.ClampBoundingBoxes`.
+#  - the new :class:`~torchvision.transforms.v2.SetClampingMode` transform.
+#
+assert orig_box.clamping_mode == "soft"
+hard_box = orig_box.clone()
+hard_box.clamping_mode = "hard"
+
+soft_center_crops_and_boxes = [
+    v2.CenterCrop(size=size)(orig_img, orig_box)
+    for size in (800, 1200, 2000, orig_img.size)
+]
+
+hard_center_crops_and_boxes = [
+    v2.CenterCrop(size=size)(orig_img, hard_box)
+    for size in (800, 1200, 2000, orig_img.size)
+]
+
+plot([[(orig_img, orig_box)] + soft_center_crops_and_boxes,
+      [(orig_img, hard_box)] + hard_center_crops_and_boxes],
+     bbox_width=10)