Skip to content

Commit 2f26e5a

Browse files
committed
Merge branch 'main' of github.com:pytorch/vision into contribution_libwebp
2 parents b00d790 + 7b283c2 commit 2f26e5a

32 files changed

+2356
-237
lines changed

.github/workflows/build-cmake.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
gpu-arch-type: cpu
1919
- runner: linux.g5.4xlarge.nvidia.gpu
2020
gpu-arch-type: cuda
21-
gpu-arch-version: "11.8"
21+
gpu-arch-version: "12.6"
2222
fail-fast: false
2323
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
2424
permissions:
@@ -66,7 +66,7 @@ jobs:
6666
gpu-arch-type: cpu
6767
- runner: windows.g5.4xlarge.nvidia.gpu
6868
gpu-arch-type: cuda
69-
gpu-arch-version: "11.8"
69+
gpu-arch-version: "12.6"
7070
fail-fast: false
7171
uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
7272
with:

.github/workflows/prototype-tests-linux-gpu.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
- python-version: "3.9"
2222
runner: linux.g5.4xlarge.nvidia.gpu
2323
gpu-arch-type: cuda
24-
gpu-arch-version: "11.8"
24+
gpu-arch-version: "12.6"
2525
fail-fast: false
2626
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
2727
permissions:

.github/workflows/tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
- python-version: 3.9
2525
runner: linux.g5.4xlarge.nvidia.gpu
2626
gpu-arch-type: cuda
27-
gpu-arch-version: "11.8"
27+
gpu-arch-version: "12.6"
2828
fail-fast: false
2929
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
3030
permissions:

docs/source/transforms.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ range of the inputs.
101101
V1 or V2? Which one should I use?
102102
---------------------------------
103103

104-
**TL;DR** We recommending using the ``torchvision.transforms.v2`` transforms
104+
**TL;DR** We recommend using the ``torchvision.transforms.v2`` transforms
105105
instead of those in ``torchvision.transforms``. They're faster and they can do
106106
more things. Just change the import and you should be good to go. Moving
107107
forward, new features and improvements will only be considered for the v2
@@ -408,6 +408,7 @@ Miscellaneous
408408
v2.Lambda
409409
v2.SanitizeBoundingBoxes
410410
v2.ClampBoundingBoxes
411+
v2.ClampKeyPoints
411412
v2.UniformTemporalSubsample
412413
v2.JPEG
413414

@@ -421,6 +422,7 @@ Functionals
421422
v2.functional.erase
422423
v2.functional.sanitize_bounding_boxes
423424
v2.functional.clamp_bounding_boxes
425+
v2.functional.clamp_keypoints
424426
v2.functional.uniform_temporal_subsample
425427
v2.functional.jpeg
426428

docs/source/tv_tensors.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ info.
2121

2222
Image
2323
Video
24+
KeyPoints
2425
BoundingBoxFormat
2526
BoundingBoxes
2627
Mask

gallery/transforms/plot_tv_tensors.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,12 @@
4646
# Under the hood, they are needed in :mod:`torchvision.transforms.v2` to correctly dispatch to the appropriate function
4747
# for the input data.
4848
#
49-
# :mod:`torchvision.tv_tensors` supports four types of TVTensors:
49+
# :mod:`torchvision.tv_tensors` supports five types of TVTensors:
5050
#
5151
# * :class:`~torchvision.tv_tensors.Image`
5252
# * :class:`~torchvision.tv_tensors.Video`
5353
# * :class:`~torchvision.tv_tensors.BoundingBoxes`
54+
# * :class:`~torchvision.tv_tensors.KeyPoints`
5455
# * :class:`~torchvision.tv_tensors.Mask`
5556
#
5657
# What can I do with a TVTensor?
@@ -96,6 +97,7 @@
9697
# :class:`~torchvision.tv_tensors.BoundingBoxes` requires the coordinate format as well as the size of the
9798
# corresponding image (``canvas_size``) alongside the actual values. These
9899
# metadata are required to properly transform the bounding boxes.
100+
# In a similar fashion, :class:`~torchvision.tv_tensors.KeyPoints` also require the ``canvas_size`` metadata to be added.
99101

100102
bboxes = tv_tensors.BoundingBoxes(
101103
[[17, 16, 344, 495], [0, 10, 0, 10]],
@@ -104,6 +106,13 @@
104106
)
105107
print(bboxes)
106108

109+
110+
keypoints = tv_tensors.KeyPoints(
111+
[[17, 16], [344, 495], [0, 10], [0, 10]],
112+
canvas_size=image.shape[-2:]
113+
)
114+
print(keypoints)
115+
107116
# %%
108117
# Using ``tv_tensors.wrap()``
109118
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^

references/detection/coco_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def convert_to_coco_api(ds):
123123
coco_ds = COCO()
124124
# annotation IDs need to start at 1, not 0, see torchvision issue #1530
125125
ann_id = 1
126-
dataset = {"images": [], "categories": [], "annotations": []}
126+
dataset = {"images": [], "categories": [], "annotations": [], "info": {}}
127127
categories = set()
128128
for img_idx in range(len(ds)):
129129
# find better way to get target

test/common_utils.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair
2222
from torchvision import io, tv_tensors
2323
from torchvision.transforms._functional_tensor import _max_value as get_max_value
24-
from torchvision.transforms.v2.functional import to_image, to_pil_image
24+
from torchvision.transforms.v2.functional import clamp_bounding_boxes, to_image, to_pil_image
2525

2626

2727
IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"])
@@ -400,6 +400,12 @@ def make_image_pil(*args, **kwargs):
400400
return to_pil_image(make_image(*args, **kwargs))
401401

402402

403+
def make_keypoints(canvas_size=DEFAULT_SIZE, *, num_points=4, dtype=None, device="cpu"):
404+
y = torch.randint(0, canvas_size[0], size=(num_points, 1), dtype=dtype, device=device)
405+
x = torch.randint(0, canvas_size[1], size=(num_points, 1), dtype=dtype, device=device)
406+
return tv_tensors.KeyPoints(torch.cat((x, y), dim=-1), canvas_size=canvas_size)
407+
408+
403409
def make_bounding_boxes(
404410
canvas_size=DEFAULT_SIZE,
405411
*,
@@ -461,9 +467,20 @@ def sample_position(values, max_value):
461467
parts = (x1, y1, x2, y2, x3, y3, x4, y4)
462468
else:
463469
raise ValueError(f"Format {format} is not supported")
464-
return tv_tensors.BoundingBoxes(
465-
torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, canvas_size=canvas_size
466-
)
470+
out_boxes = torch.stack(parts, dim=-1).to(dtype=dtype, device=device)
471+
if tv_tensors.is_rotated_bounding_format(format):
472+
# The rotated bounding boxes are not guaranteed to be within the canvas by design,
473+
# so we apply clamping. We also add a 2 buffer to the canvas size to avoid
474+
# numerical issues during the testing
475+
buffer = 4
476+
out_boxes = clamp_bounding_boxes(
477+
out_boxes, format=format, canvas_size=(canvas_size[0] - buffer, canvas_size[1] - buffer)
478+
)
479+
if format is tv_tensors.BoundingBoxFormat.XYWHR or format is tv_tensors.BoundingBoxFormat.CXCYWHR:
480+
out_boxes[:, :2] += buffer // 2
481+
elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
482+
out_boxes[:, :] += buffer // 2
483+
return tv_tensors.BoundingBoxes(out_boxes, format=format, canvas_size=canvas_size)
467484

468485

469486
def make_detection_masks(size=DEFAULT_SIZE, *, num_masks=1, dtype=None, device="cpu"):

test/test_image.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import concurrent.futures
2+
import contextlib
23
import glob
34
import io
45
import os
@@ -934,6 +935,32 @@ def test_decode_webp(decode_fun, scripted):
934935
img += 123 # make sure image buffer wasn't freed by underlying decoding lib
935936

936937

938+
@pytest.mark.parametrize("decode_fun", (decode_webp, decode_image))
939+
def test_decode_webp_grayscale(decode_fun, capfd):
940+
encoded_bytes = read_file(next(get_images(FAKEDATA_DIR, ".webp")))
941+
942+
# We warn at the C++ layer because for decode_image(), we don't do the image
943+
# type dispatch until we get to the C++ version of decode_image(). We could
944+
# warn at the Python layer in decode_webp(), but then users would get a
945+
# double wanring: one from the Python layer and one from the C++ layer.
946+
#
947+
# Because we use the TORCH_WARN_ONCE macro, we need to do this dance to
948+
# temporarily always warn so we can test.
949+
@contextlib.contextmanager
950+
def set_always_warn():
951+
torch._C._set_warnAlways(True)
952+
yield
953+
torch._C._set_warnAlways(False)
954+
955+
with set_always_warn():
956+
img = decode_fun(encoded_bytes, mode=ImageReadMode.GRAY)
957+
assert "Webp does not support grayscale conversions" in capfd.readouterr().err
958+
959+
# Note that because we do not support grayscale conversions, we expect
960+
# that the number of color channels is still 3.
961+
assert img.shape == (3, 100, 100)
962+
963+
937964
# This test is skipped by default because it requires webp images that we're not
938965
# including within the repo. The test images were downloaded manually from the
939966
# different pages of https://developers.google.com/speed/webp/gallery

test/test_ops.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -929,6 +929,7 @@ def test_batched_nms_implementations(self, seed):
929929

930930
class TestDeformConv:
931931
dtype = torch.float64
932+
mps_dtype = torch.float32
932933

933934
def expected_fn(self, x, weight, offset, mask, bias, stride=1, padding=0, dilation=1):
934935
stride_h, stride_w = _pair(stride)
@@ -1050,12 +1051,11 @@ def test_is_leaf_node(self, device):
10501051
assert len(graph_node_names[0]) == len(graph_node_names[1])
10511052
assert len(graph_node_names[0]) == 1 + op_obj.n_inputs
10521053

1053-
@pytest.mark.parametrize("device", cpu_and_cuda())
1054+
@pytest.mark.parametrize("device", cpu_and_cuda_and_mps())
10541055
@pytest.mark.parametrize("contiguous", (True, False))
10551056
@pytest.mark.parametrize("batch_sz", (0, 33))
1056-
@pytest.mark.opcheck_only_one()
10571057
def test_forward(self, device, contiguous, batch_sz, dtype=None):
1058-
dtype = dtype or self.dtype
1058+
dtype = self.mps_dtype if device == "mps" else dtype or self.dtype
10591059
x, _, offset, mask, _, stride, padding, dilation = self.get_fn_args(device, contiguous, batch_sz, dtype)
10601060
in_channels = 6
10611061
out_channels = 2

0 commit comments

Comments
 (0)