Skip to content

Commit d1b5e83

Browse files
authored
Merge branch 'main' into please_dont_modify_this_branch_unless_you_are_just_merging_with_main__
2 parents 8d99ea8 + 89f5855 commit d1b5e83

37 files changed

+2900
-327
lines changed

.github/workflows/build-cmake.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
gpu-arch-type: cpu
1919
- runner: linux.g5.4xlarge.nvidia.gpu
2020
gpu-arch-type: cuda
21-
gpu-arch-version: "11.8"
21+
gpu-arch-version: "12.6"
2222
fail-fast: false
2323
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
2424
permissions:
@@ -66,7 +66,7 @@ jobs:
6666
gpu-arch-type: cpu
6767
- runner: windows.g5.4xlarge.nvidia.gpu
6868
gpu-arch-type: cuda
69-
gpu-arch-version: "11.8"
69+
gpu-arch-version: "12.6"
7070
fail-fast: false
7171
uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
7272
with:

.github/workflows/prototype-tests-linux-gpu.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
- python-version: "3.9"
2222
runner: linux.g5.4xlarge.nvidia.gpu
2323
gpu-arch-type: cuda
24-
gpu-arch-version: "11.8"
24+
gpu-arch-version: "12.6"
2525
fail-fast: false
2626
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
2727
permissions:

.github/workflows/tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
- python-version: 3.9
2525
runner: linux.g5.4xlarge.nvidia.gpu
2626
gpu-arch-type: cuda
27-
gpu-arch-version: "11.8"
27+
gpu-arch-version: "12.6"
2828
fail-fast: false
2929
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
3030
permissions:

CONTRIBUTING.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ instructions](https://pytorch.org/get-started/locally/). Note that the official
3838
instructions may ask you to install torchvision itself. If you are doing development
3939
on torchvision, you should not install prebuilt torchvision packages.
4040

41-
**Optionally**, install `libpng` and `libjpeg-turbo` if you want to enable
41+
**Optionally**, install `libpng`, `libjpeg-turbo` and `libwebp` if you want to enable
4242
support for
43-
native encoding / decoding of PNG and JPEG formats in
43+
native encoding / decoding of PNG, JPEG and WebP formats in
4444
[torchvision.io](https://pytorch.org/vision/stable/io.html#image):
4545

4646
```bash
47-
conda install libpng libjpeg-turbo -c pytorch
47+
conda install libpng libjpeg-turbo libwebp -c pytorch
4848
```
4949

5050
Note: you can use the `TORCHVISION_INCLUDE` and `TORCHVISION_LIBRARY`

docs/source/transforms.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ range of the inputs.
101101
V1 or V2? Which one should I use?
102102
---------------------------------
103103

104-
**TL;DR** We recommending using the ``torchvision.transforms.v2`` transforms
104+
**TL;DR** We recommend using the ``torchvision.transforms.v2`` transforms
105105
instead of those in ``torchvision.transforms``. They're faster and they can do
106106
more things. Just change the import and you should be good to go. Moving
107107
forward, new features and improvements will only be considered for the v2
@@ -408,6 +408,7 @@ Miscellaneous
408408
v2.Lambda
409409
v2.SanitizeBoundingBoxes
410410
v2.ClampBoundingBoxes
411+
v2.ClampKeyPoints
411412
v2.UniformTemporalSubsample
412413
v2.JPEG
413414

@@ -421,6 +422,7 @@ Functionals
421422
v2.functional.erase
422423
v2.functional.sanitize_bounding_boxes
423424
v2.functional.clamp_bounding_boxes
425+
v2.functional.clamp_keypoints
424426
v2.functional.uniform_temporal_subsample
425427
v2.functional.jpeg
426428

docs/source/tv_tensors.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ info.
2121

2222
Image
2323
Video
24+
KeyPoints
2425
BoundingBoxFormat
2526
BoundingBoxes
2627
Mask

gallery/transforms/plot_tv_tensors.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,12 @@
4646
# Under the hood, they are needed in :mod:`torchvision.transforms.v2` to correctly dispatch to the appropriate function
4747
# for the input data.
4848
#
49-
# :mod:`torchvision.tv_tensors` supports four types of TVTensors:
49+
# :mod:`torchvision.tv_tensors` supports five types of TVTensors:
5050
#
5151
# * :class:`~torchvision.tv_tensors.Image`
5252
# * :class:`~torchvision.tv_tensors.Video`
5353
# * :class:`~torchvision.tv_tensors.BoundingBoxes`
54+
# * :class:`~torchvision.tv_tensors.KeyPoints`
5455
# * :class:`~torchvision.tv_tensors.Mask`
5556
#
5657
# What can I do with a TVTensor?
@@ -96,6 +97,7 @@
9697
# :class:`~torchvision.tv_tensors.BoundingBoxes` requires the coordinate format as well as the size of the
9798
# corresponding image (``canvas_size``) alongside the actual values. These
9899
# metadata are required to properly transform the bounding boxes.
100+
# In a similar fashion, :class:`~torchvision.tv_tensors.KeyPoints` also require the ``canvas_size`` metadata to be added.
99101

100102
bboxes = tv_tensors.BoundingBoxes(
101103
[[17, 16, 344, 495], [0, 10, 0, 10]],
@@ -104,6 +106,13 @@
104106
)
105107
print(bboxes)
106108

109+
110+
keypoints = tv_tensors.KeyPoints(
111+
[[17, 16], [344, 495], [0, 10], [0, 10]],
112+
canvas_size=image.shape[-2:]
113+
)
114+
print(keypoints)
115+
107116
# %%
108117
# Using ``tv_tensors.wrap()``
109118
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^

references/detection/coco_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def convert_to_coco_api(ds):
123123
coco_ds = COCO()
124124
# annotation IDs need to start at 1, not 0, see torchvision issue #1530
125125
ann_id = 1
126-
dataset = {"images": [], "categories": [], "annotations": []}
126+
dataset = {"images": [], "categories": [], "annotations": [], "info": {}}
127127
categories = set()
128128
for img_idx in range(len(ds)):
129129
# find better way to get target
2.9 KB
Loading

test/common_utils.py

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair
2222
from torchvision import io, tv_tensors
2323
from torchvision.transforms._functional_tensor import _max_value as get_max_value
24-
from torchvision.transforms.v2.functional import to_image, to_pil_image
24+
from torchvision.transforms.v2.functional import clamp_bounding_boxes, to_image, to_pil_image
2525

2626

2727
IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"])
@@ -400,6 +400,12 @@ def make_image_pil(*args, **kwargs):
400400
return to_pil_image(make_image(*args, **kwargs))
401401

402402

403+
def make_keypoints(canvas_size=DEFAULT_SIZE, *, num_points=4, dtype=None, device="cpu"):
404+
y = torch.randint(0, canvas_size[0], size=(num_points, 1), dtype=dtype, device=device)
405+
x = torch.randint(0, canvas_size[1], size=(num_points, 1), dtype=dtype, device=device)
406+
return tv_tensors.KeyPoints(torch.cat((x, y), dim=-1), canvas_size=canvas_size)
407+
408+
403409
def make_bounding_boxes(
404410
canvas_size=DEFAULT_SIZE,
405411
*,
@@ -417,6 +423,13 @@ def sample_position(values, max_value):
417423
format = tv_tensors.BoundingBoxFormat[format]
418424

419425
dtype = dtype or torch.float32
426+
int_dtype = dtype in (
427+
torch.uint8,
428+
torch.int8,
429+
torch.int16,
430+
torch.int32,
431+
torch.int64,
432+
)
420433

421434
h, w = (torch.randint(1, s, (num_boxes,)) for s in canvas_size)
422435
y = sample_position(h, canvas_size[0])
@@ -443,20 +456,31 @@ def sample_position(values, max_value):
443456
elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
444457
r_rad = r * torch.pi / 180.0
445458
cos, sin = torch.cos(r_rad), torch.sin(r_rad)
446-
x1, y1 = x, y
447-
x3 = x1 + w * cos
448-
y3 = y1 - w * sin
449-
x2 = x3 + h * sin
450-
y2 = y3 + h * cos
451-
x4 = x1 + h * sin
452-
y4 = y1 + h * cos
453-
parts = (x1, y1, x3, y3, x2, y2, x4, y4)
459+
x1 = torch.round(x) if int_dtype else x
460+
y1 = torch.round(y) if int_dtype else y
461+
x2 = torch.round(x1 + w * cos) if int_dtype else x1 + w * cos
462+
y2 = torch.round(y1 - w * sin) if int_dtype else y1 - w * sin
463+
x3 = torch.round(x2 + h * sin) if int_dtype else x2 + h * sin
464+
y3 = torch.round(y2 + h * cos) if int_dtype else y2 + h * cos
465+
x4 = torch.round(x1 + h * sin) if int_dtype else x1 + h * sin
466+
y4 = torch.round(y1 + h * cos) if int_dtype else y1 + h * cos
467+
parts = (x1, y1, x2, y2, x3, y3, x4, y4)
454468
else:
455469
raise ValueError(f"Format {format} is not supported")
456-
457-
return tv_tensors.BoundingBoxes(
458-
torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, canvas_size=canvas_size
459-
)
470+
out_boxes = torch.stack(parts, dim=-1).to(dtype=dtype, device=device)
471+
if tv_tensors.is_rotated_bounding_format(format):
472+
# The rotated bounding boxes are not guaranteed to be within the canvas by design,
473+
# so we apply clamping. We also add a 2 buffer to the canvas size to avoid
474+
# numerical issues during the testing
475+
buffer = 4
476+
out_boxes = clamp_bounding_boxes(
477+
out_boxes, format=format, canvas_size=(canvas_size[0] - buffer, canvas_size[1] - buffer)
478+
)
479+
if format is tv_tensors.BoundingBoxFormat.XYWHR or format is tv_tensors.BoundingBoxFormat.CXCYWHR:
480+
out_boxes[:, :2] += buffer // 2
481+
elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
482+
out_boxes[:, :] += buffer // 2
483+
return tv_tensors.BoundingBoxes(out_boxes, format=format, canvas_size=canvas_size)
460484

461485

462486
def make_detection_masks(size=DEFAULT_SIZE, *, num_masks=1, dtype=None, device="cpu"):

0 commit comments

Comments
 (0)