pytorch
diff --git a/‎.github/workflows/build-wheels-aarch64-linux.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build-wheels-aarch64-linux.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/training_references.rst‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/training_references.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/transforms.rst‎
Lines changed: 3 additions & 1 deletion b/‎docs/source/transforms.rst‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎docs/source/tv_tensors.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/tv_tensors.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎gallery/transforms/plot_tv_tensors.py‎
Lines changed: 10 additions & 1 deletion b/‎gallery/transforms/plot_tv_tensors.py‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎mypy.ini‎
Lines changed: 5 additions & 45 deletions b/‎mypy.ini‎
Lines changed: 5 additions & 45 deletions
diff --git a/‎packaging/pre_build_script_arm64.sh‎
Lines changed: 69 additions & 0 deletions b/‎packaging/pre_build_script_arm64.sh‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎setup.py‎
Lines changed: 3 additions & 3 deletions b/‎setup.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎test/assets/fakedata/draw_rotated_boxes.png‎
2.9 KB b/‎test/assets/fakedata/draw_rotated_boxes.png‎
2.9 KB
diff --git a/‎test/common_utils.py‎
Lines changed: 22 additions & 9 deletions b/‎test/common_utils.py‎
Lines changed: 22 additions & 9 deletions
@@ -25,7 +25,7 @@ jobs:
       os: linux-aarch64
       test-infra-repository: pytorch/test-infra
       test-infra-ref: main
-      with-cuda: disable
+      with-cuda: enable
   build:
     needs: generate-matrix
     strategy:
 
@@ -19,9 +19,9 @@ guarantees.
 
 In general, these scripts rely on the latest (not yet released) pytorch version
 or the latest torchvision version. This means that to use them, **you might need
-to install the latest pytorch and torchvision versions**, with e.g.::
+to install the latest pytorch and torchvision versions** following the `official
+instructions <https://pytorch.org/get-started/locally/>`_.
 
-    conda install pytorch torchvision -c pytorch-nightly
 
 If you need to rely on an older stable version of pytorch or torchvision, e.g.
 torchvision 0.10, then it's safer to use the scripts from that corresponding
 
@@ -101,7 +101,7 @@ range of the inputs.
 V1 or V2? Which one should I use?
 ---------------------------------
 
-**TL;DR** We recommending using the ``torchvision.transforms.v2`` transforms
+**TL;DR** We recommend using the ``torchvision.transforms.v2`` transforms
 instead of those in ``torchvision.transforms``. They're faster and they can do
 more things. Just change the import and you should be good to go. Moving
 forward, new features and improvements will only be considered for the v2
@@ -408,6 +408,7 @@ Miscellaneous
     v2.Lambda
     v2.SanitizeBoundingBoxes
     v2.ClampBoundingBoxes
+    v2.ClampKeyPoints
     v2.UniformTemporalSubsample
     v2.JPEG
 
@@ -421,6 +422,7 @@ Functionals
     v2.functional.erase
     v2.functional.sanitize_bounding_boxes
     v2.functional.clamp_bounding_boxes
+    v2.functional.clamp_keypoints
     v2.functional.uniform_temporal_subsample
     v2.functional.jpeg
 
 
@@ -21,6 +21,7 @@ info.
 
     Image
     Video
+    KeyPoints
     BoundingBoxFormat
     BoundingBoxes
     Mask
 
@@ -46,11 +46,12 @@
 # Under the hood, they are needed in :mod:`torchvision.transforms.v2` to correctly dispatch to the appropriate function
 # for the input data.
 #
-# :mod:`torchvision.tv_tensors` supports four types of TVTensors:
+# :mod:`torchvision.tv_tensors` supports five types of TVTensors:
 #
 # * :class:`~torchvision.tv_tensors.Image`
 # * :class:`~torchvision.tv_tensors.Video`
 # * :class:`~torchvision.tv_tensors.BoundingBoxes`
+# * :class:`~torchvision.tv_tensors.KeyPoints`
 # * :class:`~torchvision.tv_tensors.Mask`
 #
 # What can I do with a TVTensor?
@@ -96,6 +97,7 @@
 # :class:`~torchvision.tv_tensors.BoundingBoxes` requires the coordinate format as well as the size of the
 # corresponding image (``canvas_size``) alongside the actual values. These
 # metadata are required to properly transform the bounding boxes.
+# In a similar fashion, :class:`~torchvision.tv_tensors.KeyPoints` also require the ``canvas_size`` metadata to be added.
 
 bboxes = tv_tensors.BoundingBoxes(
     [[17, 16, 344, 495], [0, 10, 0, 10]],
@@ -104,6 +106,13 @@
 )
 print(bboxes)
 
+
+keypoints = tv_tensors.KeyPoints(
+    [[17, 16], [344, 495], [0, 10], [0, 10]],
+    canvas_size=image.shape[-2:]
+)
+print(keypoints)
+
 # %%
 # Using ``tv_tensors.wrap()``
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -29,6 +29,10 @@ ignore_errors = True
 
 ignore_errors = True
 
+[mypy-torchvision.prototype.models.*]
+
+ignore_errors = True
+
 [mypy-torchvision.io.image.*]
 
 ignore_errors = True
@@ -41,54 +45,10 @@ ignore_errors = True
 
 ignore_errors = True
 
-[mypy-torchvision.models.densenet.*]
+[mypy-torchvision.models.*]
 
 ignore_errors=True
 
-[mypy-torchvision.models.maxvit.*]
-
-ignore_errors=True
-
-[mypy-torchvision.models.detection.anchor_utils]
-
-ignore_errors = True
-
-[mypy-torchvision.models.detection.transform]
-
-ignore_errors = True
-
-[mypy-torchvision.models.detection.roi_heads]
-
-ignore_errors = True
-
-[mypy-torchvision.models.detection.faster_rcnn]
-
-ignore_errors = True
-
-[mypy-torchvision.models.detection.mask_rcnn]
-
-ignore_errors = True
-
-[mypy-torchvision.models.detection.keypoint_rcnn]
-
-ignore_errors = True
-
-[mypy-torchvision.models.detection.retinanet]
-
-ignore_errors = True
-
-[mypy-torchvision.models.detection.ssd]
-
-ignore_errors = True
-
-[mypy-torchvision.models.detection.ssdlite]
-
-ignore_errors = True
-
-[mypy-torchvision.models.detection.fcos]
-
-ignore_errors = True
-
 [mypy-torchvision.ops.*]
 
 ignore_errors = True
 
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+echo "Building vision dependencies and wheel started."
+
+# Set environment variables
+export SRC_PATH="$GITHUB_WORKSPACE/$SRC_DIR"
+export CMAKE_BUILD_TYPE="$BUILD_TYPE"
+export VCVARSALL_PATH="$DEPENDENCIES_DIR/VSBuildTools/VC/Auxiliary/Build/vcvarsall.bat"
+export CONDA_PREFIX="$DEPENDENCIES_DIR"
+export PATH="$PATH:$CONDA_PREFIX/Library/bin"
+export DISTUTILS_USE_SDK=1
+export TRIPLET_FILE="triplets/arm64-windows.cmake"
+export PYTORCH_VERSION="$PYTORCH_VERSION"
+export CHANNEL="$CHANNEL"
+
+# Dependencies
+mkdir -p "$DOWNLOADS_DIR"
+mkdir -p "$DEPENDENCIES_DIR"
+echo "*" > "$DOWNLOADS_DIR/.gitignore"
+echo "*" > "$DEPENDENCIES_DIR/.gitignore"
+
+# Install vcpkg
+cd "$DOWNLOADS_DIR" || exit
+git clone https://github.com/microsoft/vcpkg.git
+cd vcpkg || exit
+./bootstrap-vcpkg.sh
+
+# Set vcpkg to only build release packages
+echo "set(VCPKG_BUILD_TYPE release)" >> "$TRIPLET_FILE"
+
+# Install dependencies using vcpkg
+./vcpkg install libjpeg-turbo:arm64-windows --x-install-root="$DEPENDENCIES_DIR"
+./vcpkg install libwebp:arm64-windows --x-install-root="$DEPENDENCIES_DIR"
+./vcpkg install libpng[tools]:arm64-windows --x-install-root="$DEPENDENCIES_DIR"
+
+# Copy files using cp
+cp "$DEPENDENCIES_DIR/arm64-windows/lib/libpng16.lib" "$DEPENDENCIES_DIR/arm64-windows/lib/libpng.lib"
+cp "$DEPENDENCIES_DIR/arm64-windows/bin/libpng16.dll" "$DEPENDENCIES_DIR/arm64-windows/bin/libpng.dll"
+cp "$DEPENDENCIES_DIR/arm64-windows/bin/libpng16.pdb" "$DEPENDENCIES_DIR/arm64-windows/bin/libpng.pdb"
+mkdir -p "$DEPENDENCIES_DIR/Library/"
+cp -r "$DEPENDENCIES_DIR/arm64-windows/"* "$DEPENDENCIES_DIR/Library/"
+cp -r "$DEPENDENCIES_DIR/Library/tools/libpng/"* "$DEPENDENCIES_DIR/Library/bin/"
+cp -r "$DEPENDENCIES_DIR/Library/bin/"* "$SRC_PATH/torchvision"
+
+# Source directory
+cd "$SRC_PATH" || exit
+
+# Create virtual environment
+python -m pip install --upgrade pip
+python -m venv .venv
+echo "*" > .venv/.gitignore
+source .venv/Scripts/activate
+
+# Install dependencies
+pip install numpy==2.2.3
+
+if [ "$CHANNEL" = "release" ]; then
+  echo "Installing latest stable version of PyTorch."
+  # TODO: update when arm64 torch available on pypi
+  pip3 install --pre torch --index-url https://download.pytorch.org/whl/torch/
+elif [ "$CHANNEL" = "test" ]; then
+  echo "Installing PyTorch version $PYTORCH_VERSION."
+  pip3 install --pre torch=="$PYTORCH_VERSION" --index-url https://download.pytorch.org/whl/test
+else
+  echo "CHANNEL is not set, installing PyTorch from nightly."
+  pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
+fi
+
+echo "Dependencies install finished successfully."
@@ -24,7 +24,7 @@
 NVCC_FLAGS = os.getenv("NVCC_FLAGS", None)
 # Note: the GPU video decoding stuff used to be called "video codec", which
 # isn't an accurate or descriptive name considering there are at least 2 other
-# video deocding backends in torchvision. I'm renaming this to "gpu video
+# video decoding backends in torchvision. I'm renaming this to "gpu video
 # decoder" where possible, keeping user facing names (like the env var below) to
 # the old scheme for BC.
 USE_GPU_VIDEO_DECODER = os.getenv("TORCHVISION_USE_VIDEO_CODEC", "1") == "1"
@@ -211,7 +211,7 @@ def find_libpng():
             subprocess.run([libpng_config, "--version"], stdout=subprocess.PIPE).stdout.strip().decode("utf-8")
         )
         if png_version < min_version:
-            warnings.warn("libpng version {png_version} is less than minimum required version {min_version}")
+            warnings.warn(f"libpng version {png_version} is less than minimum required version {min_version}")
             return False, None, None, None
 
         include_dir = (
@@ -448,7 +448,7 @@ def find_ffmpeg_libraries():
 
         extensions.append(
             CppExtension(
-                # This is an aweful name. It should be "cpu_video_decoder". Keeping for BC.
+                # This is an awful name. It should be "cpu_video_decoder". Keeping for BC.
                 "torchvision.video_reader",
                 combined_src,
                 include_dirs=[
 
@@ -400,6 +400,12 @@ def make_image_pil(*args, **kwargs):
     return to_pil_image(make_image(*args, **kwargs))
 
 
+def make_keypoints(canvas_size=DEFAULT_SIZE, *, num_points=4, dtype=None, device="cpu"):
+    y = torch.randint(0, canvas_size[0], size=(num_points, 1), dtype=dtype, device=device)
+    x = torch.randint(0, canvas_size[1], size=(num_points, 1), dtype=dtype, device=device)
+    return tv_tensors.KeyPoints(torch.cat((x, y), dim=-1), canvas_size=canvas_size)
+
+
 def make_bounding_boxes(
     canvas_size=DEFAULT_SIZE,
     *,
@@ -417,6 +423,13 @@ def sample_position(values, max_value):
         format = tv_tensors.BoundingBoxFormat[format]
 
     dtype = dtype or torch.float32
+    int_dtype = dtype in (
+        torch.uint8,
+        torch.int8,
+        torch.int16,
+        torch.int32,
+        torch.int64,
+    )
 
     h, w = (torch.randint(1, s, (num_boxes,)) for s in canvas_size)
     y = sample_position(h, canvas_size[0])
@@ -443,17 +456,17 @@ def sample_position(values, max_value):
     elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
         r_rad = r * torch.pi / 180.0
         cos, sin = torch.cos(r_rad), torch.sin(r_rad)
-        x1, y1 = x, y
-        x3 = x1 + w * cos
-        y3 = y1 - w * sin
-        x2 = x3 + h * sin
-        y2 = y3 + h * cos
-        x4 = x1 + h * sin
-        y4 = y1 + h * cos
-        parts = (x1, y1, x3, y3, x2, y2, x4, y4)
+        x1 = torch.round(x) if int_dtype else x
+        y1 = torch.round(y) if int_dtype else y
+        x2 = torch.round(x1 + w * cos) if int_dtype else x1 + w * cos
+        y2 = torch.round(y1 - w * sin) if int_dtype else y1 - w * sin
+        x3 = torch.round(x2 + h * sin) if int_dtype else x2 + h * sin
+        y3 = torch.round(y2 + h * cos) if int_dtype else y2 + h * cos
+        x4 = torch.round(x1 + h * sin) if int_dtype else x1 + h * sin
+        y4 = torch.round(y1 + h * cos) if int_dtype else y1 + h * cos
+        parts = (x1, y1, x2, y2, x3, y3, x4, y4)
     else:
         raise ValueError(f"Format {format} is not supported")
-
     return tv_tensors.BoundingBoxes(
         torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, canvas_size=canvas_size
     )