Skip to content

Commit ee6104d

Browse files
authored
Merge branch 'main' into deform-conv2d-mps
2 parents 751919e + 6473b77 commit ee6104d

36 files changed

+2314
-284
lines changed

.github/workflows/build-wheels-aarch64-linux.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
os: linux-aarch64
2626
test-infra-repository: pytorch/test-infra
2727
test-infra-ref: main
28-
with-cuda: disable
28+
with-cuda: enable
2929
build:
3030
needs: generate-matrix
3131
strategy:

docs/source/training_references.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ guarantees.
1919

2020
In general, these scripts rely on the latest (not yet released) pytorch version
2121
or the latest torchvision version. This means that to use them, **you might need
22-
to install the latest pytorch and torchvision versions**, with e.g.::
22+
to install the latest pytorch and torchvision versions** following the `official
23+
instructions <https://pytorch.org/get-started/locally/>`_.
2324

24-
conda install pytorch torchvision -c pytorch-nightly
2525

2626
If you need to rely on an older stable version of pytorch or torchvision, e.g.
2727
torchvision 0.10, then it's safer to use the scripts from that corresponding

docs/source/transforms.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ range of the inputs.
101101
V1 or V2? Which one should I use?
102102
---------------------------------
103103

104-
**TL;DR** We recommending using the ``torchvision.transforms.v2`` transforms
104+
**TL;DR** We recommend using the ``torchvision.transforms.v2`` transforms
105105
instead of those in ``torchvision.transforms``. They're faster and they can do
106106
more things. Just change the import and you should be good to go. Moving
107107
forward, new features and improvements will only be considered for the v2
@@ -408,6 +408,7 @@ Miscellaneous
408408
v2.Lambda
409409
v2.SanitizeBoundingBoxes
410410
v2.ClampBoundingBoxes
411+
v2.ClampKeyPoints
411412
v2.UniformTemporalSubsample
412413
v2.JPEG
413414

@@ -421,6 +422,7 @@ Functionals
421422
v2.functional.erase
422423
v2.functional.sanitize_bounding_boxes
423424
v2.functional.clamp_bounding_boxes
425+
v2.functional.clamp_keypoints
424426
v2.functional.uniform_temporal_subsample
425427
v2.functional.jpeg
426428

docs/source/tv_tensors.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ info.
2121

2222
Image
2323
Video
24+
KeyPoints
2425
BoundingBoxFormat
2526
BoundingBoxes
2627
Mask

gallery/transforms/plot_tv_tensors.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,12 @@
4646
# Under the hood, they are needed in :mod:`torchvision.transforms.v2` to correctly dispatch to the appropriate function
4747
# for the input data.
4848
#
49-
# :mod:`torchvision.tv_tensors` supports four types of TVTensors:
49+
# :mod:`torchvision.tv_tensors` supports five types of TVTensors:
5050
#
5151
# * :class:`~torchvision.tv_tensors.Image`
5252
# * :class:`~torchvision.tv_tensors.Video`
5353
# * :class:`~torchvision.tv_tensors.BoundingBoxes`
54+
# * :class:`~torchvision.tv_tensors.KeyPoints`
5455
# * :class:`~torchvision.tv_tensors.Mask`
5556
#
5657
# What can I do with a TVTensor?
@@ -96,6 +97,7 @@
9697
# :class:`~torchvision.tv_tensors.BoundingBoxes` requires the coordinate format as well as the size of the
9798
# corresponding image (``canvas_size``) alongside the actual values. These
9899
# metadata are required to properly transform the bounding boxes.
100+
# In a similar fashion, :class:`~torchvision.tv_tensors.KeyPoints` also require the ``canvas_size`` metadata to be added.
99101

100102
bboxes = tv_tensors.BoundingBoxes(
101103
[[17, 16, 344, 495], [0, 10, 0, 10]],
@@ -104,6 +106,13 @@
104106
)
105107
print(bboxes)
106108

109+
110+
keypoints = tv_tensors.KeyPoints(
111+
[[17, 16], [344, 495], [0, 10], [0, 10]],
112+
canvas_size=image.shape[-2:]
113+
)
114+
print(keypoints)
115+
107116
# %%
108117
# Using ``tv_tensors.wrap()``
109118
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^

mypy.ini

Lines changed: 5 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ ignore_errors = True
2929

3030
ignore_errors = True
3131

32+
[mypy-torchvision.prototype.models.*]
33+
34+
ignore_errors = True
35+
3236
[mypy-torchvision.io.image.*]
3337

3438
ignore_errors = True
@@ -41,54 +45,10 @@ ignore_errors = True
4145

4246
ignore_errors = True
4347

44-
[mypy-torchvision.models.densenet.*]
48+
[mypy-torchvision.models.*]
4549

4650
ignore_errors=True
4751

48-
[mypy-torchvision.models.maxvit.*]
49-
50-
ignore_errors=True
51-
52-
[mypy-torchvision.models.detection.anchor_utils]
53-
54-
ignore_errors = True
55-
56-
[mypy-torchvision.models.detection.transform]
57-
58-
ignore_errors = True
59-
60-
[mypy-torchvision.models.detection.roi_heads]
61-
62-
ignore_errors = True
63-
64-
[mypy-torchvision.models.detection.faster_rcnn]
65-
66-
ignore_errors = True
67-
68-
[mypy-torchvision.models.detection.mask_rcnn]
69-
70-
ignore_errors = True
71-
72-
[mypy-torchvision.models.detection.keypoint_rcnn]
73-
74-
ignore_errors = True
75-
76-
[mypy-torchvision.models.detection.retinanet]
77-
78-
ignore_errors = True
79-
80-
[mypy-torchvision.models.detection.ssd]
81-
82-
ignore_errors = True
83-
84-
[mypy-torchvision.models.detection.ssdlite]
85-
86-
ignore_errors = True
87-
88-
[mypy-torchvision.models.detection.fcos]
89-
90-
ignore_errors = True
91-
9252
[mypy-torchvision.ops.*]
9353

9454
ignore_errors = True
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#!/bin/bash
2+
3+
echo "Building vision dependencies and wheel started."
4+
5+
# Set environment variables
6+
export SRC_PATH="$GITHUB_WORKSPACE/$SRC_DIR"
7+
export CMAKE_BUILD_TYPE="$BUILD_TYPE"
8+
export VCVARSALL_PATH="$DEPENDENCIES_DIR/VSBuildTools/VC/Auxiliary/Build/vcvarsall.bat"
9+
export CONDA_PREFIX="$DEPENDENCIES_DIR"
10+
export PATH="$PATH:$CONDA_PREFIX/Library/bin"
11+
export DISTUTILS_USE_SDK=1
12+
export TRIPLET_FILE="triplets/arm64-windows.cmake"
13+
export PYTORCH_VERSION="$PYTORCH_VERSION"
14+
export CHANNEL="$CHANNEL"
15+
16+
# Dependencies
17+
mkdir -p "$DOWNLOADS_DIR"
18+
mkdir -p "$DEPENDENCIES_DIR"
19+
echo "*" > "$DOWNLOADS_DIR/.gitignore"
20+
echo "*" > "$DEPENDENCIES_DIR/.gitignore"
21+
22+
# Install vcpkg
23+
cd "$DOWNLOADS_DIR" || exit
24+
git clone https://github.com/microsoft/vcpkg.git
25+
cd vcpkg || exit
26+
./bootstrap-vcpkg.sh
27+
28+
# Set vcpkg to only build release packages
29+
echo "set(VCPKG_BUILD_TYPE release)" >> "$TRIPLET_FILE"
30+
31+
# Install dependencies using vcpkg
32+
./vcpkg install libjpeg-turbo:arm64-windows --x-install-root="$DEPENDENCIES_DIR"
33+
./vcpkg install libwebp:arm64-windows --x-install-root="$DEPENDENCIES_DIR"
34+
./vcpkg install libpng[tools]:arm64-windows --x-install-root="$DEPENDENCIES_DIR"
35+
36+
# Copy files using cp
37+
cp "$DEPENDENCIES_DIR/arm64-windows/lib/libpng16.lib" "$DEPENDENCIES_DIR/arm64-windows/lib/libpng.lib"
38+
cp "$DEPENDENCIES_DIR/arm64-windows/bin/libpng16.dll" "$DEPENDENCIES_DIR/arm64-windows/bin/libpng.dll"
39+
cp "$DEPENDENCIES_DIR/arm64-windows/bin/libpng16.pdb" "$DEPENDENCIES_DIR/arm64-windows/bin/libpng.pdb"
40+
mkdir -p "$DEPENDENCIES_DIR/Library/"
41+
cp -r "$DEPENDENCIES_DIR/arm64-windows/"* "$DEPENDENCIES_DIR/Library/"
42+
cp -r "$DEPENDENCIES_DIR/Library/tools/libpng/"* "$DEPENDENCIES_DIR/Library/bin/"
43+
cp -r "$DEPENDENCIES_DIR/Library/bin/"* "$SRC_PATH/torchvision"
44+
45+
# Source directory
46+
cd "$SRC_PATH" || exit
47+
48+
# Create virtual environment
49+
python -m pip install --upgrade pip
50+
python -m venv .venv
51+
echo "*" > .venv/.gitignore
52+
source .venv/Scripts/activate
53+
54+
# Install dependencies
55+
pip install numpy==2.2.3
56+
57+
if [ "$CHANNEL" = "release" ]; then
58+
echo "Installing latest stable version of PyTorch."
59+
# TODO: update when arm64 torch available on pypi
60+
pip3 install --pre torch --index-url https://download.pytorch.org/whl/torch/
61+
elif [ "$CHANNEL" = "test" ]; then
62+
echo "Installing PyTorch version $PYTORCH_VERSION."
63+
pip3 install --pre torch=="$PYTORCH_VERSION" --index-url https://download.pytorch.org/whl/test
64+
else
65+
echo "CHANNEL is not set, installing PyTorch from nightly."
66+
pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
67+
fi
68+
69+
echo "Dependencies install finished successfully."

setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
NVCC_FLAGS = os.getenv("NVCC_FLAGS", None)
2525
# Note: the GPU video decoding stuff used to be called "video codec", which
2626
# isn't an accurate or descriptive name considering there are at least 2 other
27-
# video deocding backends in torchvision. I'm renaming this to "gpu video
27+
# video decoding backends in torchvision. I'm renaming this to "gpu video
2828
# decoder" where possible, keeping user facing names (like the env var below) to
2929
# the old scheme for BC.
3030
USE_GPU_VIDEO_DECODER = os.getenv("TORCHVISION_USE_VIDEO_CODEC", "1") == "1"
@@ -211,7 +211,7 @@ def find_libpng():
211211
subprocess.run([libpng_config, "--version"], stdout=subprocess.PIPE).stdout.strip().decode("utf-8")
212212
)
213213
if png_version < min_version:
214-
warnings.warn("libpng version {png_version} is less than minimum required version {min_version}")
214+
warnings.warn(f"libpng version {png_version} is less than minimum required version {min_version}")
215215
return False, None, None, None
216216

217217
include_dir = (
@@ -448,7 +448,7 @@ def find_ffmpeg_libraries():
448448

449449
extensions.append(
450450
CppExtension(
451-
# This is an aweful name. It should be "cpu_video_decoder". Keeping for BC.
451+
# This is an awful name. It should be "cpu_video_decoder". Keeping for BC.
452452
"torchvision.video_reader",
453453
combined_src,
454454
include_dirs=[
2.9 KB
Loading

test/common_utils.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,12 @@ def make_image_pil(*args, **kwargs):
400400
return to_pil_image(make_image(*args, **kwargs))
401401

402402

403+
def make_keypoints(canvas_size=DEFAULT_SIZE, *, num_points=4, dtype=None, device="cpu"):
404+
y = torch.randint(0, canvas_size[0], size=(num_points, 1), dtype=dtype, device=device)
405+
x = torch.randint(0, canvas_size[1], size=(num_points, 1), dtype=dtype, device=device)
406+
return tv_tensors.KeyPoints(torch.cat((x, y), dim=-1), canvas_size=canvas_size)
407+
408+
403409
def make_bounding_boxes(
404410
canvas_size=DEFAULT_SIZE,
405411
*,
@@ -417,6 +423,13 @@ def sample_position(values, max_value):
417423
format = tv_tensors.BoundingBoxFormat[format]
418424

419425
dtype = dtype or torch.float32
426+
int_dtype = dtype in (
427+
torch.uint8,
428+
torch.int8,
429+
torch.int16,
430+
torch.int32,
431+
torch.int64,
432+
)
420433

421434
h, w = (torch.randint(1, s, (num_boxes,)) for s in canvas_size)
422435
y = sample_position(h, canvas_size[0])
@@ -443,17 +456,17 @@ def sample_position(values, max_value):
443456
elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
444457
r_rad = r * torch.pi / 180.0
445458
cos, sin = torch.cos(r_rad), torch.sin(r_rad)
446-
x1, y1 = x, y
447-
x3 = x1 + w * cos
448-
y3 = y1 - w * sin
449-
x2 = x3 + h * sin
450-
y2 = y3 + h * cos
451-
x4 = x1 + h * sin
452-
y4 = y1 + h * cos
453-
parts = (x1, y1, x3, y3, x2, y2, x4, y4)
459+
x1 = torch.round(x) if int_dtype else x
460+
y1 = torch.round(y) if int_dtype else y
461+
x2 = torch.round(x1 + w * cos) if int_dtype else x1 + w * cos
462+
y2 = torch.round(y1 - w * sin) if int_dtype else y1 - w * sin
463+
x3 = torch.round(x2 + h * sin) if int_dtype else x2 + h * sin
464+
y3 = torch.round(y2 + h * cos) if int_dtype else y2 + h * cos
465+
x4 = torch.round(x1 + h * sin) if int_dtype else x1 + h * sin
466+
y4 = torch.round(y1 + h * cos) if int_dtype else y1 + h * cos
467+
parts = (x1, y1, x2, y2, x3, y3, x4, y4)
454468
else:
455469
raise ValueError(f"Format {format} is not supported")
456-
457470
return tv_tensors.BoundingBoxes(
458471
torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, canvas_size=canvas_size
459472
)

0 commit comments

Comments
 (0)