pytorch
diff --git a/‎.github/workflows/build-cmake.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/build-cmake.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/prototype-tests-linux-gpu.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/prototype-tests-linux-gpu.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/tests.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/tests.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 3 additions & 3 deletions b/‎CONTRIBUTING.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/source/transforms.rst‎
Lines changed: 3 additions & 1 deletion b/‎docs/source/transforms.rst‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎docs/source/tv_tensors.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/tv_tensors.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎gallery/transforms/plot_tv_tensors.py‎
Lines changed: 10 additions & 1 deletion b/‎gallery/transforms/plot_tv_tensors.py‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎references/detection/coco_utils.py‎
Lines changed: 1 addition & 1 deletion b/‎references/detection/coco_utils.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/assets/fakedata/draw_rotated_boxes.png‎
2.9 KB b/‎test/assets/fakedata/draw_rotated_boxes.png‎
2.9 KB
diff --git a/‎test/common_utils.py‎
Lines changed: 37 additions & 13 deletions b/‎test/common_utils.py‎
Lines changed: 37 additions & 13 deletions
@@ -18,7 +18,7 @@ jobs:
             gpu-arch-type: cpu
           - runner: linux.g5.4xlarge.nvidia.gpu
             gpu-arch-type: cuda
-            gpu-arch-version: "11.8"
+            gpu-arch-version: "12.6"
       fail-fast: false
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     permissions:
@@ -66,7 +66,7 @@ jobs:
             gpu-arch-type: cpu
           - runner: windows.g5.4xlarge.nvidia.gpu
             gpu-arch-type: cuda
-            gpu-arch-version: "11.8"
+            gpu-arch-version: "12.6"
       fail-fast: false
     uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
     with:
 
@@ -21,7 +21,7 @@ jobs:
           - python-version: "3.9"
             runner: linux.g5.4xlarge.nvidia.gpu
             gpu-arch-type: cuda
-            gpu-arch-version: "11.8"
+            gpu-arch-version: "12.6"
       fail-fast: false
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     permissions:
 
@@ -24,7 +24,7 @@ jobs:
           - python-version: 3.9
             runner: linux.g5.4xlarge.nvidia.gpu
             gpu-arch-type: cuda
-            gpu-arch-version: "11.8"
+            gpu-arch-version: "12.6"
       fail-fast: false
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     permissions:
 
@@ -38,13 +38,13 @@ instructions](https://pytorch.org/get-started/locally/). Note that the official
 instructions may ask you to install torchvision itself. If you are doing development
 on torchvision, you should not install prebuilt torchvision packages.
 
-**Optionally**, install `libpng` and `libjpeg-turbo` if you want to enable
+**Optionally**, install `libpng`, `libjpeg-turbo` and `libwebp` if you want to enable
 support for
-native encoding / decoding of PNG and JPEG formats in
+native encoding / decoding of PNG, JPEG and WebP formats in
 [torchvision.io](https://pytorch.org/vision/stable/io.html#image):
 
 ```bash
-conda install libpng libjpeg-turbo -c pytorch
+conda install libpng libjpeg-turbo libwebp -c pytorch
 ```
 
 Note: you can use the `TORCHVISION_INCLUDE` and `TORCHVISION_LIBRARY`
 
@@ -101,7 +101,7 @@ range of the inputs.
 V1 or V2? Which one should I use?
 ---------------------------------
 
-**TL;DR** We recommending using the ``torchvision.transforms.v2`` transforms
+**TL;DR** We recommend using the ``torchvision.transforms.v2`` transforms
 instead of those in ``torchvision.transforms``. They're faster and they can do
 more things. Just change the import and you should be good to go. Moving
 forward, new features and improvements will only be considered for the v2
@@ -408,6 +408,7 @@ Miscellaneous
     v2.Lambda
     v2.SanitizeBoundingBoxes
     v2.ClampBoundingBoxes
+    v2.ClampKeyPoints
     v2.UniformTemporalSubsample
     v2.JPEG
 
@@ -421,6 +422,7 @@ Functionals
     v2.functional.erase
     v2.functional.sanitize_bounding_boxes
     v2.functional.clamp_bounding_boxes
+    v2.functional.clamp_keypoints
     v2.functional.uniform_temporal_subsample
     v2.functional.jpeg
 
 
@@ -21,6 +21,7 @@ info.
 
     Image
     Video
+    KeyPoints
     BoundingBoxFormat
     BoundingBoxes
     Mask
 
@@ -46,11 +46,12 @@
 # Under the hood, they are needed in :mod:`torchvision.transforms.v2` to correctly dispatch to the appropriate function
 # for the input data.
 #
-# :mod:`torchvision.tv_tensors` supports four types of TVTensors:
+# :mod:`torchvision.tv_tensors` supports five types of TVTensors:
 #
 # * :class:`~torchvision.tv_tensors.Image`
 # * :class:`~torchvision.tv_tensors.Video`
 # * :class:`~torchvision.tv_tensors.BoundingBoxes`
+# * :class:`~torchvision.tv_tensors.KeyPoints`
 # * :class:`~torchvision.tv_tensors.Mask`
 #
 # What can I do with a TVTensor?
@@ -96,6 +97,7 @@
 # :class:`~torchvision.tv_tensors.BoundingBoxes` requires the coordinate format as well as the size of the
 # corresponding image (``canvas_size``) alongside the actual values. These
 # metadata are required to properly transform the bounding boxes.
+# In a similar fashion, :class:`~torchvision.tv_tensors.KeyPoints` also require the ``canvas_size`` metadata to be added.
 
 bboxes = tv_tensors.BoundingBoxes(
     [[17, 16, 344, 495], [0, 10, 0, 10]],
@@ -104,6 +106,13 @@
 )
 print(bboxes)
 
+
+keypoints = tv_tensors.KeyPoints(
+    [[17, 16], [344, 495], [0, 10], [0, 10]],
+    canvas_size=image.shape[-2:]
+)
+print(keypoints)
+
 # %%
 # Using ``tv_tensors.wrap()``
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -123,7 +123,7 @@ def convert_to_coco_api(ds):
     coco_ds = COCO()
     # annotation IDs need to start at 1, not 0, see torchvision issue #1530
     ann_id = 1
-    dataset = {"images": [], "categories": [], "annotations": []}
+    dataset = {"images": [], "categories": [], "annotations": [], "info": {}}
     categories = set()
     for img_idx in range(len(ds)):
         # find better way to get target
 
@@ -21,7 +21,7 @@
 from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair
 from torchvision import io, tv_tensors
 from torchvision.transforms._functional_tensor import _max_value as get_max_value
-from torchvision.transforms.v2.functional import to_image, to_pil_image
+from torchvision.transforms.v2.functional import clamp_bounding_boxes, to_image, to_pil_image
 
 
 IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"])
@@ -400,6 +400,12 @@ def make_image_pil(*args, **kwargs):
     return to_pil_image(make_image(*args, **kwargs))
 
 
+def make_keypoints(canvas_size=DEFAULT_SIZE, *, num_points=4, dtype=None, device="cpu"):
+    y = torch.randint(0, canvas_size[0], size=(num_points, 1), dtype=dtype, device=device)
+    x = torch.randint(0, canvas_size[1], size=(num_points, 1), dtype=dtype, device=device)
+    return tv_tensors.KeyPoints(torch.cat((x, y), dim=-1), canvas_size=canvas_size)
+
+
 def make_bounding_boxes(
     canvas_size=DEFAULT_SIZE,
     *,
@@ -417,6 +423,13 @@ def sample_position(values, max_value):
         format = tv_tensors.BoundingBoxFormat[format]
 
     dtype = dtype or torch.float32
+    int_dtype = dtype in (
+        torch.uint8,
+        torch.int8,
+        torch.int16,
+        torch.int32,
+        torch.int64,
+    )
 
     h, w = (torch.randint(1, s, (num_boxes,)) for s in canvas_size)
     y = sample_position(h, canvas_size[0])
@@ -443,20 +456,31 @@ def sample_position(values, max_value):
     elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
         r_rad = r * torch.pi / 180.0
         cos, sin = torch.cos(r_rad), torch.sin(r_rad)
-        x1, y1 = x, y
-        x3 = x1 + w * cos
-        y3 = y1 - w * sin
-        x2 = x3 + h * sin
-        y2 = y3 + h * cos
-        x4 = x1 + h * sin
-        y4 = y1 + h * cos
-        parts = (x1, y1, x3, y3, x2, y2, x4, y4)
+        x1 = torch.round(x) if int_dtype else x
+        y1 = torch.round(y) if int_dtype else y
+        x2 = torch.round(x1 + w * cos) if int_dtype else x1 + w * cos
+        y2 = torch.round(y1 - w * sin) if int_dtype else y1 - w * sin
+        x3 = torch.round(x2 + h * sin) if int_dtype else x2 + h * sin
+        y3 = torch.round(y2 + h * cos) if int_dtype else y2 + h * cos
+        x4 = torch.round(x1 + h * sin) if int_dtype else x1 + h * sin
+        y4 = torch.round(y1 + h * cos) if int_dtype else y1 + h * cos
+        parts = (x1, y1, x2, y2, x3, y3, x4, y4)
     else:
         raise ValueError(f"Format {format} is not supported")
-
-    return tv_tensors.BoundingBoxes(
-        torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, canvas_size=canvas_size
-    )
+    out_boxes = torch.stack(parts, dim=-1).to(dtype=dtype, device=device)
+    if tv_tensors.is_rotated_bounding_format(format):
+        # The rotated bounding boxes are not guaranteed to be within the canvas by design,
+        # so we apply clamping. We also add a 2 buffer to the canvas size to avoid
+        # numerical issues during the testing
+        buffer = 4
+        out_boxes = clamp_bounding_boxes(
+            out_boxes, format=format, canvas_size=(canvas_size[0] - buffer, canvas_size[1] - buffer)
+        )
+        if format is tv_tensors.BoundingBoxFormat.XYWHR or format is tv_tensors.BoundingBoxFormat.CXCYWHR:
+            out_boxes[:, :2] += buffer // 2
+        elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
+            out_boxes[:, :] += buffer // 2
+    return tv_tensors.BoundingBoxes(out_boxes, format=format, canvas_size=canvas_size)
 
 
 def make_detection_masks(size=DEFAULT_SIZE, *, num_masks=1, dtype=None, device="cpu"):