From aac481cb9f65647d53825dd9a3de48d9ff76df17 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 6 Feb 2025 19:25:20 +1100
Subject: [PATCH] go back to state of last passing run

see https://github.com/conda-forge/pytorch-cpu-feedstock/commit/dfadf157ebdb735d245a3c5f6f9d4cce46f92dd7
---
 .github/workflows/conda-build.yml             |   2 +-
 README.md                                     |   8 -
 conda-forge.yml                               |   2 +-
 recipe/bld.bat                                |  76 ++--
 recipe/build.sh                               |  76 +---
 recipe/cmake_test/CMakeLists.txt              |   4 -
 recipe/meta.yaml                              | 106 +----
 ...-of-python-3-and-error-without-numpy.patch |   9 +-
 recipe/patches/0002-Help-find-numpy.patch     |   9 +-
 ...03-Add-USE_SYSTEM_NVTX-option-138287.patch |  13 +-
 .../patches/0004-Update-sympy-version.patch   |   9 +-
 .../0005-Fix-duplicate-linker-script.patch    |   9 +-
 ...kle-error-in-serialization.py-136034.patch |   9 +-
 ...verwrite-ld-with-environment-variabl.patch |   9 +-
 ...-Allow-overriding-CUDA-related-paths.patch |  11 +-
 ...st-test_linalg.py-for-NumPy-2-136800.patch |   9 +-
 ...est-failures-in-test_torch.py-137740.patch |   9 +-
 ...AS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch |   9 +-
 recipe/patches/0012-fix-issue-142484.patch    |   9 +-
 recipe/patches/0013-Fix-FindOpenBLAS.patch    |   9 +-
 ...Enable-Python-3.13-on-windows-138095.patch |  11 +-
 ...tils.cpp_extension.include_paths-use.patch |  11 +-
 ...oint-include-paths-to-PREFIX-include.patch |   9 +-
 ...nda-prefix-to-inductor-include-paths.patch |  27 --
 ...E_DIR-relative-to-TORCH_INSTALL_PREF.patch |  25 --
 ...ON-lib-from-CMake-install-TARGETS-di.patch | 158 --------
 ...e-in-test_mutable_custom_op_fixed_la.patch |  57 ---
 ...-find_package-CUDA-in-caffe2-CMake-m.patch | 367 ------------------
 ...ON-lib-from-CMake-install-directives.patch |  25 --
 ...1-switch-away-from-find_package-CUDA.patch |  22 --
 recipe/third_party/CMake/Copyright.txt        | 136 -------
 31 files changed, 165 insertions(+), 1080 deletions(-)
 delete mode 100644 recipe/cmake_test/CMakeLists.txt
 delete mode 100644 recipe/patches/0017-Add-conda-prefix-to-inductor-include-paths.patch
 delete mode 100644 recipe/patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
 delete mode 100644 recipe/patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch
 delete mode 100644 recipe/patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch
 delete mode 100644 recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
 delete mode 100644 recipe/patches_submodules/fbgemm/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch
 delete mode 100644 recipe/patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch
 delete mode 100644 recipe/third_party/CMake/Copyright.txt

diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml
index 3c036ad9f..40a005a95 100644
--- a/.github/workflows/conda-build.yml
+++ b/.github/workflows/conda-build.yml
@@ -16,7 +16,7 @@ jobs:
   build:
     name: ${{ matrix.CONFIG }}
     runs-on: ${{ matrix.runs_on }}
-    timeout-minutes: 1080
+    timeout-minutes: 900
     strategy:
       fail-fast: false
       matrix:
diff --git a/README.md b/README.md
index 4ce0f7506..22e879dc5 100644
--- a/README.md
+++ b/README.md
@@ -11,14 +11,6 @@ Summary: PyTorch is an optimized tensor library for deep learning using GPUs and
 
 Development: https://github.com/pytorch/pytorch
 
-Documentation: https://pytorch.org/docs/
-
-PyTorch is a Python package that provides two high-level features:
-  - Tensor computation (like NumPy) with strong GPU acceleration
-  - Deep neural networks built on a tape-based autograd system
-You can reuse your favorite Python packages such as NumPy, SciPy, and Cython to extend PyTorch when needed.
-
-
 Current build status
 ====================
 
diff --git a/conda-forge.yml b/conda-forge.yml
index e8a1366ff..47ae951d6 100644
--- a/conda-forge.yml
+++ b/conda-forge.yml
@@ -16,7 +16,7 @@ github:
   tooling_branch_name: main
 github_actions:
   self_hosted: true
-  timeout_minutes: 1080
+  timeout_minutes: 900
   triggers:
   - push
   - pull_request
diff --git a/recipe/bld.bat b/recipe/bld.bat
index 4089b425f..e4d0bae51 100644
--- a/recipe/bld.bat
+++ b/recipe/bld.bat
@@ -7,12 +7,6 @@ if EXIST pyproject.toml (
   if %ERRORLEVEL% neq 0 exit 1
 )
 
-@REM The PyTorch test suite includes some symlinks, which aren't resolved on Windows, leading to packaging errors.
-@REM ATTN! These change and have to be updated manually, often with each release.
-@REM (no current symlinks being packaged. Leaving this information here as it took some months to find the issue. Look out
-@REM for a failure with error message: "conda_package_handling.exceptions.ArchiveCreationError: <somefile> Cannot stat
-@REM while writing file")
-
 set PYTORCH_BUILD_VERSION=%PKG_VERSION%
 @REM Always pass 0 to avoid appending ".post" to version string.
 @REM https://github.com/conda-forge/pytorch-cpu-feedstock/issues/315
@@ -27,9 +21,12 @@ if "%blas_impl%" == "generic" (
     SET BLAS=MKL
 )
 
+@REM TODO(baszalmstra): Figure out if we need these flags
+SET "USE_NUMA=0"
+SET "USE_ITT=0"
+
 if "%PKG_NAME%" == "pytorch" (
   set "PIP_ACTION=install"
-  set "PIP_VERBOSITY=-v"
   @REM We build libtorch for a specific python version.
   @REM This ensures its only build once. However, when that version changes
   @REM we need to make sure to update that here.
@@ -59,54 +56,43 @@ if "%PKG_NAME%" == "pytorch" (
   @REM For the main script we just build a wheel for so that the C++/CUDA
   @REM parts are built. Then they are reused in each python version.
   set "PIP_ACTION=wheel"
-  set "PIP_VERBOSITY=-vvv"
 )
 
-set "BUILD_CUSTOM_PROTOBUF=OFF"
-set "USE_LITE_PROTO=ON"
+if not "%cuda_compiler_version%" == "None" (
+    set USE_CUDA=1
 
-@REM TODO(baszalmstra): Figure out if we need these flags
-SET "USE_ITT=0"
-SET "USE_NUMA=0"
+    @REM set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%desired_cuda%
+    @REM set CUDA_BIN_PATH=%CUDA_PATH%\bin
 
-@REM TODO(baszalmstra): There are linker errors because of mixing Intel OpenMP (iomp) and Microsoft OpenMP (vcomp)
-set "USE_OPENMP=OFF"
+    set TORCH_CUDA_ARCH_LIST=5.0;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0+PTX
 
-@REM Use our Pybind11, Eigen, sleef
-set USE_SYSTEM_EIGEN_INSTALL=1
-set USE_SYSTEM_PYBIND11=1
-set USE_SYSTEM_SLEEF=1
+    set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
 
-if not "%cuda_compiler_version%" == "None" (
-    set USE_CUDA=1
     set USE_STATIC_CUDNN=0
+    set MAGMA_HOME=%PREFIX%
+
     @REM NCCL is not available on windows
     set USE_NCCL=0
     set USE_STATIC_NCCL=0
 
-    @REM set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%desired_cuda%
-    @REM set CUDA_BIN_PATH=%CUDA_PATH%\bin
-
-    set "TORCH_CUDA_ARCH_LIST=5.0;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0+PTX"
-    set "TORCH_NVCC_FLAGS=-Xfatbin -compress-all"
-
     set MAGMA_HOME=%LIBRARY_PREFIX%
+
     set "PATH=%CUDA_BIN_PATH%;%PATH%"
+
     set CUDNN_INCLUDE_DIR=%LIBRARY_PREFIX%\include
-    @REM turn off very noisy nvcc warnings
-    set "CUDAFLAGS=-w --ptxas-options=-w"
+
 ) else (
     set USE_CUDA=0
-    @REM MKLDNN is an Apache-2.0 licensed library for DNNs and is used
-    @REM for CPU builds. Not to be confused with MKL.
-    set "USE_MKLDNN=1"
-
     @REM On windows, env vars are case-insensitive and setup.py
     @REM passes all env vars starting with CUDA_*, CMAKE_* to
     @REM to cmake
     set "cuda_compiler_version="
     set "cuda_compiler="
     set "CUDA_VERSION="
+
+    @REM MKLDNN is an Apache-2.0 licensed library for DNNs and is used
+    @REM for CPU builds. Not to be confused with MKL.
+    set "USE_MKLDNN=1"
 )
 
 set DISTUTILS_USE_SDK=1
@@ -130,12 +116,19 @@ set "INSTALL_TEST=0"
 set "BUILD_TEST=0"
 
 set "libuv_ROOT=%LIBRARY_PREFIX%"
+set "USE_SYSTEM_SLEEF=ON"
 
 @REM uncomment to debug cmake build
 @REM set "CMAKE_VERBOSE_MAKEFILE=1"
 
+set "BUILD_CUSTOM_PROTOBUF=OFF"
+set "USE_LITE_PROTO=ON"
+
+@REM TODO(baszalmstra): There are linker errors because of mixing Intel OpenMP (iomp) and Microsoft OpenMP (vcomp)
+set "USE_OPENMP=OFF"
+
 @REM The activation script for cuda-nvcc doesnt add the CUDA_CFLAGS on windows.
-@REM Therefore we do this manually here. See:
+@REM Therefor we do this manually here. See:
 @REM https://github.com/conda-forge/cuda-nvcc-feedstock/issues/47
 echo "CUDA_CFLAGS=%CUDA_CFLAGS%"
 set "CUDA_CFLAGS=-I%PREFIX%/Library/include -I%BUILD_PREFIX%/Library/include"
@@ -162,7 +155,7 @@ if EXIST build (
     if %ERRORLEVEL% neq 0 exit 1
 )
 
-%PYTHON% -m pip %PIP_ACTION% . --no-build-isolation --no-deps %PIP_VERBOSITY% --no-clean
+%PYTHON% -m pip %PIP_ACTION% . --no-build-isolation --no-deps -vvv --no-clean
 if %ERRORLEVEL% neq 0 exit 1
 
 @REM Here we split the build into two parts.
@@ -190,12 +183,19 @@ if "%PKG_NAME%" == "libtorch" (
     pushd torch-%PKG_VERSION%
     if %ERRORLEVEL% neq 0 exit 1
 
+    @REM Do not package `fmt.lib` (and its metadata); delete it before the move into
+    @REM %LIBRARY_BIN% because it may exist in host before installation already
+    del torch\lib\fmt.lib torch\lib\pkgconfig\fmt.pc
+    if %ERRORLEVEL% neq 0 exit 1
+    @REM also delete rest of fmt metadata
+    rmdir /s /q torch\lib\cmake\fmt
+
     @REM Move the binaries into the packages site-package directory
     @REM the only content of torch\bin, {asmjit,fbgemm}.dll, also exists in torch\lib
-    robocopy /NP /NFL /NDL /NJH /E torch\bin\ %LIBRARY_BIN%\ torch*.dll c10.dll shm.dll asmjit.dll fbgemm.dll
+    robocopy /NP /NFL /NDL /NJH /E torch\lib\ %LIBRARY_BIN%\ torch*.dll c10.dll shm.dll asmjit.dll fbgemm.dll
     robocopy /NP /NFL /NDL /NJH /E torch\lib\ %LIBRARY_LIB%\ torch*.lib c10.lib shm.lib asmjit.lib fbgemm.lib
     if not "%cuda_compiler_version%" == "None" (
-        robocopy /NP /NFL /NDL /NJH /E torch\bin\ %LIBRARY_BIN%\ c10_cuda.dll caffe2_nvrtc.dll
+        robocopy /NP /NFL /NDL /NJH /E torch\lib\ %LIBRARY_BIN%\ c10_cuda.dll caffe2_nvrtc.dll
         robocopy /NP /NFL /NDL /NJH /E torch\lib\ %LIBRARY_LIB%\ c10_cuda.lib caffe2_nvrtc.lib
     )
     robocopy /NP /NFL /NDL /NJH /E torch\share\ %LIBRARY_PREFIX%\share
@@ -216,7 +216,7 @@ if "%PKG_NAME%" == "libtorch" (
     if %ERRORLEVEL% neq 0 exit 1
 ) else if "%PKG_NAME%" == "pytorch" (
     @REM Move libtorch_python and remove the other directories afterwards.
-    robocopy /NP /NFL /NDL /NJH /E %SP_DIR%\torch\bin\ %LIBRARY_BIN%\ torch_python.dll
+    robocopy /NP /NFL /NDL /NJH /E %SP_DIR%\torch\lib\ %LIBRARY_BIN%\ torch_python.dll
     robocopy /NP /NFL /NDL /NJH /E %SP_DIR%\torch\lib\ %LIBRARY_LIB%\ torch_python.lib
     robocopy /NP /NFL /NDL /NJH /E %SP_DIR%\torch\lib\ %LIBRARY_LIB%\ _C.lib
     rmdir /s /q %SP_DIR%\torch\lib
diff --git a/recipe/build.sh b/recipe/build.sh
index 22dde8f0e..57044b090 100644
--- a/recipe/build.sh
+++ b/recipe/build.sh
@@ -1,10 +1,8 @@
 #!/bin/bash
 
-set -ex
+echo "=== Building ${PKG_NAME} (py: ${PY_VER}) ==="
 
-echo "#########################################################################"
-echo "Building ${PKG_NAME} (py: ${PY_VER}) using BLAS implementation $blas_impl"
-echo "#########################################################################"
+set -ex
 
 # This is used to detect if it's in the process of building pytorch
 export IN_PYTORCH_BUILD=1
@@ -22,22 +20,9 @@ rm -rf pyproject.toml
 export USE_CUFILE=0
 export USE_NUMA=0
 export USE_ITT=0
-
-#################### ADJUST COMPILER AND LINKER FLAGS #####################
-# Pytorch's build system doesn't like us setting the c++ standard through CMAKE_CXX_FLAGS
-# and will issue a warning.  We need to use at least C++17 to match the abseil ABI, see
-# https://github.com/conda-forge/abseil-cpp-feedstock/issues/45, which pytorch 2.5 uses already:
-# https://github.com/pytorch/pytorch/blob/v2.5.1/CMakeLists.txt#L36-L48
-export CXXFLAGS="$(echo $CXXFLAGS | sed 's/-std=c++[0-9][0-9]//g')"
-# The below three lines expose symbols that would otherwise be hidden or
-# optimised away. They were here before, so removing them would potentially
-# break users' programs
 export CFLAGS="$(echo $CFLAGS | sed 's/-fvisibility-inlines-hidden//g')"
 export CXXFLAGS="$(echo $CXXFLAGS | sed 's/-fvisibility-inlines-hidden//g')"
 export LDFLAGS="$(echo $LDFLAGS | sed 's/-Wl,--as-needed//g')"
-# The default conda LDFLAGs include -Wl,-dead_strip_dylibs, which removes all the
-# MKL sequential, core, etc. libraries, resulting in a "Symbol not found: _mkl_blas_caxpy"
-# error on osx-64.
 export LDFLAGS="$(echo $LDFLAGS | sed 's/-Wl,-dead_strip_dylibs//g')"
 export LDFLAGS_LD="$(echo $LDFLAGS_LD | sed 's/-dead_strip_dylibs//g')"
 if [[ "$c_compiler" == "clang" ]]; then
@@ -60,7 +45,6 @@ fi
 # can be imported on system without a GPU
 LDFLAGS="${LDFLAGS//-Wl,-z,now/-Wl,-z,lazy}"
 
-################ CONFIGURE CMAKE FOR CONDA ENVIRONMENT ###################
 export CMAKE_GENERATOR=Ninja
 export CMAKE_LIBRARY_PATH=$PREFIX/lib:$PREFIX/include:$CMAKE_LIBRARY_PATH
 export CMAKE_PREFIX_PATH=$PREFIX
@@ -89,8 +73,6 @@ export USE_SYSTEM_SLEEF=1
 # use our protobuf
 export BUILD_CUSTOM_PROTOBUF=OFF
 rm -rf $PREFIX/bin/protoc
-export USE_SYSTEM_PYBIND11=1
-export USE_SYSTEM_EIGEN_INSTALL=1
 
 # prevent six from being downloaded
 > third_party/NNPACK/cmake/DownloadSix.cmake
@@ -116,28 +98,17 @@ if [[ "${CI}" == "github_actions" ]]; then
     # reduce parallelism to avoid getting OOM-killed on
     # cirun-openstack-gpu-2xlarge, which has 32GB RAM, 8 CPUs
     export MAX_JOBS=4
-elif [[ "${CI}" == "azure" ]]; then
-    export MAX_JOBS=${CPU_COUNT}
 else
-    # Leave a spare core for other tasks, per common practice.
-    # Reducing further can help with out-of-memory errors.
-    export MAX_JOBS=$((CPU_COUNT > 1 ? CPU_COUNT - 1 : 1))
+    export MAX_JOBS=${CPU_COUNT}
 fi
 
-case "$blas_impl" in
-    "generic")
-        # Fake openblas
-        export BLAS=OpenBLAS
-        export OpenBLAS_HOME=${PREFIX}
-        ;;
-    "mkl")
-        export BLAS=MKL
-        ;;
-    *)
-        echo "[ERROR] Unsupported BLAS implementation '${blas_impl}'" >&2
-        exit 1
-        ;;
-esac
+if [[ "$blas_impl" == "generic" ]]; then
+    # Fake openblas
+    export BLAS=OpenBLAS
+    export OpenBLAS_HOME=${PREFIX}
+else
+    export BLAS=MKL
+fi
 
 if [[ "$PKG_NAME" == "pytorch" ]]; then
   # Trick Cmake into thinking python hasn't changed
@@ -176,9 +147,11 @@ elif [[ ${cuda_compiler_version} != "None" ]]; then
     # all of them.
     export CUDAToolkit_BIN_DIR=${BUILD_PREFIX}/bin
     export CUDAToolkit_ROOT_DIR=${PREFIX}
+    if [[ "${target_platform}" != "${build_platform}" ]]; then
+        export CUDA_TOOLKIT_ROOT=${PREFIX}
+    fi
     # for CUPTI
     export CUDA_TOOLKIT_ROOT_DIR=${PREFIX}
-    export CUDAToolkit_ROOT=${PREFIX}
     case ${target_platform} in
         linux-64)
             export CUDAToolkit_TARGET_DIR=${PREFIX}/targets/x86_64-linux
@@ -190,24 +163,12 @@ elif [[ ${cuda_compiler_version} != "None" ]]; then
             echo "unknown CUDA arch, edit build.sh"
             exit 1
     esac
-
-    # Compatibility matrix for update: https://en.wikipedia.org/wiki/CUDA#GPUs_supported
-    # Warning from pytorch v1.12.1: In the future we will require one to
-    # explicitly pass TORCH_CUDA_ARCH_LIST to cmake instead of implicitly
-    # setting it as an env variable.
-    # Doing this is nontrivial given that we're using setup.py as an entry point, but should
-    # be addressed to pre-empt upstream changing it, as it probably won't result in a failed
-    # configuration.
-    #
-    # See:
-    # https://pytorch.org/docs/stable/cpp_extension.html (Compute capabilities)
-    # https://github.com/pytorch/pytorch/blob/main/.ci/manywheel/build_cuda.sh
     case ${cuda_compiler_version} in
-        12.[0-6])
+        12.6)
             export TORCH_CUDA_ARCH_LIST="5.0;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0+PTX"
             ;;
         *)
-            echo "No CUDA architecture list exists for CUDA v${cuda_compiler_version}. See build.sh for information on adding one."
+            echo "unsupported cuda version. edit build.sh"
             exit 1
     esac
     export TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
@@ -219,8 +180,6 @@ elif [[ ${cuda_compiler_version} != "None" ]]; then
     export USE_STATIC_CUDNN=0
     export MAGMA_HOME="${PREFIX}"
     export USE_MAGMA=1
-    # turn off noisy nvcc warnings
-    export CMAKE_CUDA_FLAGS="-w --ptxas-options=-w"
 else
     if [[ "$target_platform" != *-64 ]]; then
       # Breakpad seems to not work on aarch64 or ppc64le
@@ -244,8 +203,7 @@ case ${PKG_NAME} in
 
     mv build/lib.*/torch/bin/* ${PREFIX}/bin/
     mv build/lib.*/torch/lib/* ${PREFIX}/lib/
-    # need to merge these now because we're using system pybind11, meaning the destination directory is not empty
-    rsync -a build/lib.*/torch/share/* ${PREFIX}/share/
+    mv build/lib.*/torch/share/* ${PREFIX}/share/
     mv build/lib.*/torch/include/{ATen,caffe2,tensorpipe,torch,c10} ${PREFIX}/include/
     rm ${PREFIX}/lib/libtorch_python.*
 
@@ -253,7 +211,7 @@ case ${PKG_NAME} in
     cp build/CMakeCache.txt build/CMakeCache.txt.orig
     ;;
   pytorch)
-    $PREFIX/bin/python -m pip install . --no-deps --no-build-isolation -v --no-clean \
+    $PREFIX/bin/python -m pip install . --no-deps -vvv --no-clean \
         | sed "s,${CXX},\$\{CXX\},g" \
         | sed "s,${PREFIX},\$\{PREFIX\},g"
     # Keep this in ${PREFIX}/lib so that the library can be found by
diff --git a/recipe/cmake_test/CMakeLists.txt b/recipe/cmake_test/CMakeLists.txt
deleted file mode 100644
index 716845442..000000000
--- a/recipe/cmake_test/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-project(cf_dummy LANGUAGES C CXX)
-cmake_minimum_required(VERSION 3.12)
-find_package(Torch CONFIG REQUIRED)
-find_package(ATen CONFIG REQUIRED)
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index e1c2a2d6b..d5fc48f5d 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -1,10 +1,7 @@
 # if you wish to build release candidate number X, append the version string with ".rcX"
 {% set version = "2.5.1" %}
-{% set build = 12 %}
+{% set build = 10 %}
 
-# Use a higher build number for the CUDA variant, to ensure that it's
-# preferred by conda's solver, and it's preferentially
-# installed where the platform supports it.
 {% if cuda_compiler_version != "None" %}
 {% set build = build + 200 %}
 {% endif %}
@@ -67,13 +64,6 @@ source:
     - patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch
     # point to headers that are now living in $PREFIX/include instead of $SP_DIR/torch/include
     - patches/0016-point-include-paths-to-PREFIX-include.patch
-    - patches/0017-Add-conda-prefix-to-inductor-include-paths.patch
-    - patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
-    - patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch                       # [win]
-    - patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch
-    - patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
-    - patches_submodules/fbgemm/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch     # [win]
-    - patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch
 
 build:
   number: {{ build }}
@@ -127,7 +117,6 @@ requirements:
     - protobuf
     - make      # [linux]
     - sccache   # [win]
-    - rsync     # [unix]
   host:
     # GPU requirements
     - cudnn                           # [cuda_compiler_version != "None"]
@@ -178,9 +167,6 @@ requirements:
     - libuv
     - pkg-config  # [unix]
     - typing_extensions
-    - pybind11
-    - eigen
-    - zlib
   run:
     # GPU requirements without run_exports
     - {{ pin_compatible('cudnn') }}                       # [cuda_compiler_version != "None"]
@@ -206,18 +192,6 @@ requirements:
 # a particularity of conda-build, that output is defined in
 # the global build stage, including tests
 test:
-  requires:
-    # cmake needs a compiler to run package detection, see
-    # https://discourse.cmake.org/t/questions-about-find-package-cli-msvc/6194
-    - {{ compiler('cxx') }}
-    # for CMake config to find cuda & nvrtc
-    - {{ compiler('cuda') }}    # [cuda_compiler_version != "None"]
-    - cuda-nvrtc-dev            # [cuda_compiler_version != "None"]
-    - cmake
-    - ninja
-    - pkg-config
-  files:
-    - cmake_test/
   commands:
     # libraries; peculiar formatting to avoid linter false positives about selectors
     {% set torch_libs = [
@@ -243,11 +217,6 @@ test:
     - test -f $PREFIX/share/cmake/Torch/TorchConfig.cmake                       # [linux]
     - if not exist %LIBRARY_PREFIX%\share\cmake\Torch\TorchConfig.cmake exit 1  # [win]
 
-    # test integrity of CMake metadata
-    - cd cmake_test
-    - cmake -GNinja -DCMAKE_CXX_STANDARD=17 $CMAKE_ARGS .   # [unix]
-    - cmake -GNinja -DCMAKE_CXX_STANDARD=17 %CMAKE_ARGS% .  # [win]
-
 outputs:
   - name: libtorch
   - name: pytorch
@@ -330,9 +299,6 @@ outputs:
         - pkg-config  # [unix]
         - typing_extensions
         - {{ pin_subpackage('libtorch', exact=True) }}
-        - pybind11
-        - eigen
-        - zlib
       run:
         - llvm-openmp    # [osx]
         - intel-openmp {{ mkl }}  # [win]
@@ -348,7 +314,6 @@ outputs:
         - filelock
         - jinja2
         - networkx
-        - pybind11
         - nomkl                 # [blas_impl != "mkl"]
         - fsspec
         # avoid that people without GPUs needlessly download ~0.5-1GB
@@ -370,8 +335,6 @@ outputs:
       requires:
         - {{ compiler('c') }}
         - {{ compiler('cxx') }}
-        # for torch.compile tests
-        - {{ compiler('cuda') }}       # [cuda_compiler_version != "None"]
         - ninja
         - boto3
         - hypothesis
@@ -397,7 +360,6 @@ outputs:
         # tools/ is needed to optimise test run
         # as of pytorch=2.0.0, there is a bug when trying to run tests without the tools
         - tools
-        #- .ci/pytorch/smoke_test/smoke_test.py
       commands:
         # Run pip check so as to ensure that all pytorch packages are installed
         # https://github.com/conda-forge/pytorch-cpu-feedstock/issues/24
@@ -405,14 +367,6 @@ outputs:
         - python -c "import torch; print(torch.__version__)"
         - python -c "import torch; assert torch.backends.mkldnn.m.is_available()"  # [x86 and cuda_compiler_version == "None"]
         - python -c "import torch; torch.tensor(1).to('cpu').numpy(); print('numpy support enabled!!!')"
-        # We have had issues with openmp .dylibs being doubly loaded in certain cases. These two tests catch the (observed) issue
-        - python -c "import torch; import numpy"
-        - python -c "import numpy; import torch"
-        # distributed support is enabled by default on linux; for mac, we enable it manually in build.sh
-        - python -c "import torch; assert torch.distributed.is_available()"        # [linux or osx]
-        - python -c "import torch; assert torch.backends.cuda.is_built()"          # [linux64 and (cuda_compiler_version != "None")]
-        - python -c "import torch; assert torch.backends.cudnn.is_available()"     # [linux64 and (cuda_compiler_version != "None")]
-        - python -c "import torch; assert torch.backends.cudnn.enabled"            # [linux64 and (cuda_compiler_version != "None")]
         # At conda-forge, we target versions of OSX that are too old for MPS support
         # But if users install a newer version of OSX, they will have MPS support
         # https://github.com/conda-forge/pytorch-cpu-feedstock/pull/123#issuecomment-1186355073
@@ -423,31 +377,8 @@ outputs:
         - if not exist %LIBRARY_BIN%\torch_python.dll exit 1  # [win]
         - if not exist %LIBRARY_LIB%\torch_python.lib exit 1  # [win]
 
-        # See here for environment variables needed by the smoke test script
-        # https://github.com/pytorch/pytorch/blob/266fd35c5842902f6304aa8e7713b252cbfb243c/.ci/pytorch/smoke_test/smoke_test.py#L16
-        - set MATRIX_GPU_ARCH_VERSION="{{ '.'.join((cuda_compiler_version or "").split('.')[:2]) }}"   # [(cuda_compiler_version != "None") and (win)]
-        - set MATRIX_GPU_ARCH_TYPE="cuda"                                                       # [(cuda_compiler_version != "None") and (win)]
-        - set MATRIX_GPU_ARCH_VERSION="none"                                                    # [(cuda_compiler_version == "None") and (win)]
-        - set MATRIX_GPU_ARCH_TYPE="none"                                                       # [(cuda_compiler_version == "None") and (win)]
-        - set MATRIX_CHANNEL="defaults"                                                         # [win]
-        - set MATRIX_STABLE_VERSION={{ version }}                                               # [win]
-        - set MATRIX_PACKAGE_TYPE="conda"                                                       # [win]
-        - set TARGET_OS="windows"                                                               # [win]
-        - set OMP_NUM_THREADS=4                                                                 # [win]
-        - export MATRIX_GPU_ARCH_VERSION="{{ '.'.join((cuda_compiler_version or "").split('.')[:2]) }}"  # [(cuda_compiler_version != "None") and (linux and x86_64)]
-        - export MATRIX_GPU_ARCH_TYPE="cuda"                                                    # [(cuda_compiler_version != "None") and (linux and x86_64)]
-        - export MATRIX_GPU_ARCH_VERSION="none"                                                 # [(cuda_compiler_version == "None") and (not win)]
-        - export MATRIX_GPU_ARCH_TYPE="none"                                                    # [(cuda_compiler_version == "None") and (not win)]
-        - export MATRIX_CHANNEL="defaults"                                                      # [not win]
-        - export MATRIX_STABLE_VERSION="{{ version }}"                                          # [not win]
-        - export MATRIX_PACKAGE_TYPE="conda"                                                    # [not win]
-        - export TARGET_OS="linux"                                                              # [linux]
-        - export TARGET_OS="macos-arm64"                                                        # [(osx and arm64)]
-        - export TARGET_OS="macos-x86_64"                                                       # [(osx and x86_64)]
-        - export OMP_NUM_THREADS=4                                                              # [not win]
-        #- python ./smoke_test/smoke_test.py --package torchonly
-
         # a reasonably safe subset of tests that should run under 15 minutes
+        # disable hypothesis because it randomly yields health check errors
         {% set tests = " ".join([
             "test/test_autograd.py",
             "test/test_autograd_fallback.py",
@@ -458,10 +389,8 @@ outputs:
             "test/test_nn.py",
             "test/test_torch.py",
             "test/test_xnnpack_integration.py",
+            "-m \"not hypothesis\"",
         ]) %}
-        # tests torch.compile; avoid on aarch because it adds >4h in test runtime in emulation;
-        # they add a lot of runtime (15->60min on windows), so run them for only one python version
-        {% set tests = tests ~ " test/inductor/test_torchinductor.py" %}    # [py==312 and not aarch64]
 
         {% set skips = "(TestTorch and test_print)" %}
         # tolerance violation with openblas
@@ -483,26 +412,12 @@ outputs:
         # may crash spuriously
         {% set skips = skips ~ " or (TestAutograd and test_profiler_seq_nr)" %}
         {% set skips = skips ~ " or (TestAutograd and test_profiler_propagation)" %}
-        # tests that fail due to resource clean-up issues (non-unique temporary libraries), see
-        # https://github.com/conda-forge/pytorch-cpu-feedstock/pull/318#issuecomment-2620080859
-        {% set skips = skips ~ " or test_mutable_custom_op_fixed_layout" %}             # [cuda_compiler_version != "None"]
         # trivial accuracy problems
         {% set skips = skips ~ " or test_BCELoss_weights_no_reduce_cuda" %}             # [unix and cuda_compiler_version != "None"]
         {% set skips = skips ~ " or test_ctc_loss_cudnn_tensor_cuda " %}                # [unix and cuda_compiler_version != "None"]
         {% set skips = skips ~ " or (TestTorch and test_index_add_correctness)" %}      # [unix and cuda_compiler_version != "None"]
-        # These tests require higher-resource or more recent GPUs than the CI provides
-        {% set skips = skips ~ " or test_sdpa_inference_mode_aot_compile" %}            # [linux and cuda_compiler_version != "None"]
-        {% set skips = skips ~ " or (TestNN and test_grid_sample)" %}                   # [linux and cuda_compiler_version != "None"]
-        # don't mess with tests that rely on GPU failure handling
-        {% set skips = skips ~ " or test_indirect_device_assert" %}                     # [linux and cuda_compiler_version != "None"]
-        # test that fails to find temporary resource
-        {% set skips = skips ~ " or (GPUTests and test_scatter_reduce2)" %}             # [linux and cuda_compiler_version != "None"]
         # MKL problems
-        {% set skips = skips ~ " or (TestLinalgCPU and test_inverse_errors_large_cpu)" %}           # [linux and blas_impl == "mkl" and cuda_compiler_version != "None"]
-        {% set skips = skips ~ " or test_reentrant_parent_error_on_cpu_cuda)" %}                    # [linux and blas_impl == "mkl" and cuda_compiler_version != "None"]
-        # non-MKL problems
-        {% set skips = skips ~ " or test_cross_entropy_loss_2d_out_of_bounds_class_index_cuda" %}   # [linux and blas_impl != "mkl" and cuda_compiler_version != "None"]
-        {% set skips = skips ~ " or test_cublas_config_nondeterministic_alert_cuda " %}             # [linux and blas_impl != "mkl" and cuda_compiler_version != "None"]
+        {% set skips = skips ~ " or (TestLinalgCPU and test_inverse_errors_large_cpu)" %}  # [unix and blas_impl == "mkl" and cuda_compiler_version != "None"]
         # these tests are failing with low -n values
         {% set skips = skips ~ " or test_base_does_not_require_grad_mode_nothing" %}
         {% set skips = skips ~ " or test_base_does_not_require_grad_mode_warn" %}
@@ -523,9 +438,8 @@ outputs:
         # for potential packaging problems by running a fixed subset
         - export OMP_NUM_THREADS=4  # [unix]
         # reduced paralellism to avoid OOM; test only one python version on aarch because emulation is super-slow
-        # disable hypothesis because it randomly yields health check errors
-        - python -m pytest -n 2 {{ tests }} -k "not ({{ skips }})" -m "not hypothesis" --durations=50   # [unix and (not aarch64 or py==312)]
-        - python -m pytest -v -s {{ tests }} -k "not ({{ skips }})" -m "not hypothesis" --durations=50  # [win]
+        - python -m pytest -n 2 {{ tests }} -k "not ({{ skips }})" --durations=50   # [unix and (not aarch64 or py==312)]
+        - python -m pytest -v -s {{ tests }} -k "not ({{ skips }})" --durations=50  # [win]
 
         # regression test for https://github.com/conda-forge/pytorch-cpu-feedstock/issues/329, where we picked up
         # duplicate `.pyc` files due to newest py-ver (3.13) in the build environment not matching the one in host;
@@ -565,14 +479,8 @@ about:
   license_file:
     - LICENSE
     - NOTICE
-    - third_party/CMake/Copyright.txt
+    - third_party/pybind11/LICENSE
   summary: PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.
-  description: |
-    PyTorch is a Python package that provides two high-level features:
-      - Tensor computation (like NumPy) with strong GPU acceleration
-      - Deep neural networks built on a tape-based autograd system
-    You can reuse your favorite Python packages such as NumPy, SciPy, and Cython to extend PyTorch when needed.
-  doc_url: https://pytorch.org/docs/
 
 extra:
   recipe-maintainers:
diff --git a/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch b/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch
index fda50bccf..4ce6492a0 100644
--- a/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch
+++ b/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch
@@ -1,14 +1,14 @@
-From f3a0f9aab6dce56eea590b946f60256014b61bf7 Mon Sep 17 00:00:00 2001
+From 756045fca376345e48afb6a868b502dbfa0c584c Mon Sep 17 00:00:00 2001
 From: Mark Harfouche <mark.harfouche@gmail.com>
 Date: Sun, 1 Sep 2024 17:35:40 -0400
-Subject: [PATCH 01/21] Force usage of python 3 and error without numpy
+Subject: [PATCH 01/16] Force usage of python 3 and error without numpy
 
 ---
  cmake/Dependencies.cmake | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)
 
 diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index e78305e0a8e..15c62548601 100644
+index e78305e0a..15c625486 100644
 --- a/cmake/Dependencies.cmake
 +++ b/cmake/Dependencies.cmake
 @@ -861,9 +861,9 @@ if(BUILD_PYTHON)
@@ -32,3 +32,6 @@ index e78305e0a8e..15c62548601 100644
          caffe2_update_option(USE_NUMPY OFF)
        else()
          caffe2_update_option(USE_NUMPY ON)
+-- 
+2.48.1
+
diff --git a/recipe/patches/0002-Help-find-numpy.patch b/recipe/patches/0002-Help-find-numpy.patch
index d660deda0..6f3fa2c36 100644
--- a/recipe/patches/0002-Help-find-numpy.patch
+++ b/recipe/patches/0002-Help-find-numpy.patch
@@ -1,14 +1,14 @@
-From 21c30036b5b86f403c0cf4426165d9a6a50edb1a Mon Sep 17 00:00:00 2001
+From 70661ad52cb2f0290de3e0758f240560e4b1e769 Mon Sep 17 00:00:00 2001
 From: Mark Harfouche <mark.harfouche@gmail.com>
 Date: Tue, 1 Oct 2024 00:28:40 -0400
-Subject: [PATCH 02/21] Help find numpy
+Subject: [PATCH 02/16] Help find numpy
 
 ---
  tools/setup_helpers/cmake.py | 6 ++++++
  1 file changed, 6 insertions(+)
 
 diff --git a/tools/setup_helpers/cmake.py b/tools/setup_helpers/cmake.py
-index 4b605fe5975..bde41323c76 100644
+index 4b605fe59..bde41323c 100644
 --- a/tools/setup_helpers/cmake.py
 +++ b/tools/setup_helpers/cmake.py
 @@ -305,9 +305,15 @@ class CMake:
@@ -27,3 +27,6 @@ index 4b605fe5975..bde41323c76 100644
              TORCH_BUILD_VERSION=version,
              **build_options,
          )
+-- 
+2.48.1
+
diff --git a/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch b/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch
index d44513d49..af8662e42 100644
--- a/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch
+++ b/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch
@@ -1,7 +1,7 @@
-From d1826af525db41eda5020a1404f5d5521d67a5dc Mon Sep 17 00:00:00 2001
+From 4ae61d17c81e9d66e091c2790ac6deae6bf31204 Mon Sep 17 00:00:00 2001
 From: Jeongseok Lee <jeongseok@meta.com>
 Date: Sat, 19 Oct 2024 04:26:01 +0000
-Subject: [PATCH 03/21] Add USE_SYSTEM_NVTX option (#138287)
+Subject: [PATCH 03/16] Add USE_SYSTEM_NVTX option (#138287)
 
 ## Summary
 
@@ -21,7 +21,7 @@ Approved by: https://github.com/albanD
  3 files changed, 22 insertions(+), 2 deletions(-)
 
 diff --git a/CMakeLists.txt b/CMakeLists.txt
-index 98593c2de97..ae3c3f2cbd5 100644
+index 98593c2de..ae3c3f2cb 100644
 --- a/CMakeLists.txt
 +++ b/CMakeLists.txt
 @@ -470,6 +470,7 @@ option(USE_SYSTEM_FXDIV "Use system-provided fxdiv." OFF)
@@ -41,7 +41,7 @@ index 98593c2de97..ae3c3f2cbd5 100644
  
  # /Z7 override option When generating debug symbols, CMake default to use the
 diff --git a/cmake/public/cuda.cmake b/cmake/public/cuda.cmake
-index afc1bc12abf..152fbdbe6dd 100644
+index afc1bc12a..152fbdbe6 100644
 --- a/cmake/public/cuda.cmake
 +++ b/cmake/public/cuda.cmake
 @@ -170,7 +170,11 @@ else()
@@ -58,7 +58,7 @@ index afc1bc12abf..152fbdbe6dd 100644
  if(nvtx3_FOUND)
    add_library(torch::nvtx3 INTERFACE IMPORTED)
 diff --git a/setup.py b/setup.py
-index 2b0cfa99d71..7174777ed4e 100644
+index 2b0cfa99d..7174777ed 100644
 --- a/setup.py
 +++ b/setup.py
 @@ -183,7 +183,21 @@
@@ -84,3 +84,6 @@ index 2b0cfa99d71..7174777ed4e 100644
  #
  #   USE_MIMALLOC
  #      Static link mimalloc into C10, and use mimalloc in alloc_cpu & alloc_free.
+-- 
+2.48.1
+
diff --git a/recipe/patches/0004-Update-sympy-version.patch b/recipe/patches/0004-Update-sympy-version.patch
index a73a73994..5dd72f7c1 100644
--- a/recipe/patches/0004-Update-sympy-version.patch
+++ b/recipe/patches/0004-Update-sympy-version.patch
@@ -1,14 +1,14 @@
-From e3219c5fe8834753b0cf9e92be4d1ef1e874f370 Mon Sep 17 00:00:00 2001
+From 2c6db02c01ad080c8dc8ae0b78be2b93099c2ac8 Mon Sep 17 00:00:00 2001
 From: Jeongseok Lee <jeongseok@meta.com>
 Date: Thu, 17 Oct 2024 15:04:05 -0700
-Subject: [PATCH 04/21] Update sympy version
+Subject: [PATCH 04/16] Update sympy version
 
 ---
  setup.py | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/setup.py b/setup.py
-index 7174777ed4e..65be34e39b1 100644
+index 7174777ed..65be34e39 100644
 --- a/setup.py
 +++ b/setup.py
 @@ -1158,7 +1158,7 @@ def main():
@@ -20,3 +20,6 @@ index 7174777ed4e..65be34e39b1 100644
          "networkx",
          "jinja2",
          "fsspec",
+-- 
+2.48.1
+
diff --git a/recipe/patches/0005-Fix-duplicate-linker-script.patch b/recipe/patches/0005-Fix-duplicate-linker-script.patch
index 49e6d72b4..7cc824352 100644
--- a/recipe/patches/0005-Fix-duplicate-linker-script.patch
+++ b/recipe/patches/0005-Fix-duplicate-linker-script.patch
@@ -1,14 +1,14 @@
-From 08a1f44fbc81324aa98d720dfb7b87a261923ac2 Mon Sep 17 00:00:00 2001
+From fa5bb8f1acd0195efadc35c8fbb9199be92932d9 Mon Sep 17 00:00:00 2001
 From: Jeongseok Lee <jeongseok@meta.com>
 Date: Sun, 3 Nov 2024 01:12:36 -0700
-Subject: [PATCH 05/21] Fix duplicate linker script
+Subject: [PATCH 05/16] Fix duplicate linker script
 
 ---
  setup.py | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)
 
 diff --git a/setup.py b/setup.py
-index 65be34e39b1..b0e01e0d1ee 100644
+index 65be34e39..b0e01e0d1 100644
 --- a/setup.py
 +++ b/setup.py
 @@ -1184,7 +1184,9 @@ def main():
@@ -22,3 +22,6 @@ index 65be34e39b1..b0e01e0d1ee 100644
          os.environ["CFLAGS"] = (
              os.getenv("CFLAGS", "") + " -ffunction-sections -fdata-sections"
          )
+-- 
+2.48.1
+
diff --git a/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch b/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch
index 99baed0a5..cddb8b68c 100644
--- a/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch
+++ b/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch
@@ -1,7 +1,7 @@
-From 15df314a41c69a31c0443254d5552aa1b39d708d Mon Sep 17 00:00:00 2001
+From 6fc695312cd062e13c2482b52ae8d028bd7c043a Mon Sep 17 00:00:00 2001
 From: William Wen <williamwen@meta.com>
 Date: Fri, 13 Sep 2024 13:02:33 -0700
-Subject: [PATCH 06/21] fix 3.13 pickle error in serialization.py (#136034)
+Subject: [PATCH 06/16] fix 3.13 pickle error in serialization.py (#136034)
 
 Error encountered when adding dynamo 3.13 support.
 Pull Request resolved: https://github.com/pytorch/pytorch/pull/136034
@@ -11,7 +11,7 @@ Approved by: https://github.com/albanD
  1 file changed, 12 insertions(+), 4 deletions(-)
 
 diff --git a/torch/serialization.py b/torch/serialization.py
-index d936d31d6f5..d937680c031 100644
+index d936d31d6..d937680c0 100644
 --- a/torch/serialization.py
 +++ b/torch/serialization.py
 @@ -1005,8 +1005,12 @@ def _legacy_save(obj, f, pickle_module, pickle_protocol) -> None:
@@ -44,3 +44,6 @@ index d936d31d6f5..d937680c031 100644
      pickler.dump(obj)
      data_value = data_buf.getvalue()
      zip_file.write_record("data.pkl", data_value, len(data_value))
+-- 
+2.48.1
+
diff --git a/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch b/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch
index ae6a94cd8..b847ba1ae 100644
--- a/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch
+++ b/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch
@@ -1,7 +1,7 @@
-From 655f694854c3eafdd631235b60bc6c1b279218ed Mon Sep 17 00:00:00 2001
+From d5c8df70422afa07dc212266d420f923f5887f99 Mon Sep 17 00:00:00 2001
 From: Mark Harfouche <mark.harfouche@gmail.com>
 Date: Thu, 3 Oct 2024 22:49:56 -0400
-Subject: [PATCH 07/21] Allow users to overwrite ld with environment variables
+Subject: [PATCH 07/16] Allow users to overwrite ld with environment variables
 
 This should help in the case of cross compilation.
 
@@ -11,7 +11,7 @@ xref: https://github.com/conda-forge/pytorch-cpu-feedstock/pull/261
  1 file changed, 3 insertions(+), 2 deletions(-)
 
 diff --git a/tools/setup_helpers/generate_linker_script.py b/tools/setup_helpers/generate_linker_script.py
-index 11c397a9e5f..e66fc197062 100644
+index 11c397a9e..e66fc1970 100644
 --- a/tools/setup_helpers/generate_linker_script.py
 +++ b/tools/setup_helpers/generate_linker_script.py
 @@ -1,3 +1,4 @@
@@ -30,3 +30,6 @@ index 11c397a9e5f..e66fc197062 100644
          "\n"
      )
  
+-- 
+2.48.1
+
diff --git a/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch b/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch
index b52d15880..272d200cf 100644
--- a/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch
+++ b/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch
@@ -1,7 +1,7 @@
-From f03bf82d9da9cccb2cf4d4833c1a6349622dc37d Mon Sep 17 00:00:00 2001
+From da7b07f8e3165bf89b08b5a716e539ae9a7afb1a Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
 Date: Wed, 27 Nov 2024 13:47:23 +0100
-Subject: [PATCH 08/21] Allow overriding CUDA-related paths
+Subject: [PATCH 08/16] Allow overriding CUDA-related paths
 
 ---
  cmake/Modules/FindCUDAToolkit.cmake | 2 +-
@@ -9,7 +9,7 @@ Subject: [PATCH 08/21] Allow overriding CUDA-related paths
  2 files changed, 2 insertions(+), 2 deletions(-)
 
 diff --git a/cmake/Modules/FindCUDAToolkit.cmake b/cmake/Modules/FindCUDAToolkit.cmake
-index ec9ae530aa6..b7c0bd9fc51 100644
+index ec9ae530a..b7c0bd9fc 100644
 --- a/cmake/Modules/FindCUDAToolkit.cmake
 +++ b/cmake/Modules/FindCUDAToolkit.cmake
 @@ -497,7 +497,7 @@ Result variables
@@ -22,7 +22,7 @@ index ec9ae530aa6..b7c0bd9fc51 100644
    set(CUDAToolkit_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_LIBRARY_ROOT}")
    set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_TOOLKIT_VERSION}")
 diff --git a/tools/setup_helpers/cmake.py b/tools/setup_helpers/cmake.py
-index bde41323c76..b171837cd4a 100644
+index bde41323c..b171837cd 100644
 --- a/tools/setup_helpers/cmake.py
 +++ b/tools/setup_helpers/cmake.py
 @@ -252,7 +252,7 @@ class CMake:
@@ -34,3 +34,6 @@ index bde41323c76..b171837cd4a 100644
                  ("EXITCODE", "EXITCODE__TRYRUN_OUTPUT")
              ):
                  build_options[var] = val
+-- 
+2.48.1
+
diff --git a/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch b/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch
index 7d9d1ab50..e1befef6a 100644
--- a/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch
+++ b/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch
@@ -1,7 +1,7 @@
-From 4b1faf6ba142953ce2730766db44f8d98d161ef0 Mon Sep 17 00:00:00 2001
+From 3429795de33cac2e508397dd2d9f5f5c96f185c3 Mon Sep 17 00:00:00 2001
 From: Haifeng Jin <haifeng-jin@users.noreply.github.com>
 Date: Tue, 1 Oct 2024 07:53:24 +0000
-Subject: [PATCH 09/21] Fix test/test_linalg.py for NumPy 2 (#136800)
+Subject: [PATCH 09/16] Fix test/test_linalg.py for NumPy 2 (#136800)
 
 Related to  #107302.
 
@@ -36,7 +36,7 @@ Approved by: https://github.com/lezcano
  1 file changed, 12 insertions(+), 3 deletions(-)
 
 diff --git a/test/test_linalg.py b/test/test_linalg.py
-index e9ec874d695..060bccef2e5 100644
+index e9ec874d6..060bccef2 100644
 --- a/test/test_linalg.py
 +++ b/test/test_linalg.py
 @@ -2351,7 +2351,7 @@ class TestLinalg(TestCase):
@@ -75,3 +75,6 @@ index e9ec874d695..060bccef2e5 100644
                      reflectors_i[:] = reflectors_tmp.T
                  reflectors = reflectors.view(*A_cpu.shape)
                  tau = tau.view(tau_shape)
+-- 
+2.48.1
+
diff --git a/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch b/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch
index c28fe93aa..bd5aa553e 100644
--- a/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch
+++ b/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch
@@ -1,7 +1,7 @@
-From 032b9be9ca7f9ae174e75554cecc82600ea3ef54 Mon Sep 17 00:00:00 2001
+From a8ddbe6b682347fdc86c5052b244df4f95b926ac Mon Sep 17 00:00:00 2001
 From: Haifeng Jin <haifeng-jin@users.noreply.github.com>
 Date: Sat, 12 Oct 2024 02:40:17 +0000
-Subject: [PATCH 10/21] Fixes NumPy 2 test failures in test_torch.py (#137740)
+Subject: [PATCH 10/16] Fixes NumPy 2 test failures in test_torch.py (#137740)
 
 Related to #107302
 
@@ -24,7 +24,7 @@ Approved by: https://github.com/ezyang
  1 file changed, 4 insertions(+), 4 deletions(-)
 
 diff --git a/test/test_torch.py b/test/test_torch.py
-index be4d6180819..c6fd6ac9f19 100644
+index be4d61808..c6fd6ac9f 100644
 --- a/test/test_torch.py
 +++ b/test/test_torch.py
 @@ -2891,7 +2891,7 @@ else:
@@ -58,3 +58,6 @@ index be4d6180819..c6fd6ac9f19 100644
          )
  
      @skipIfTorchDynamo("np.float64 restored as float32 after graph break.")
+-- 
+2.48.1
+
diff --git a/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch b/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
index c7b201b6b..2d9b19958 100644
--- a/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
+++ b/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
@@ -1,7 +1,7 @@
-From 56f1528fa072023fb2724d5abf8790f2f6cc3aaa Mon Sep 17 00:00:00 2001
+From 113c9ebec11cba2f1d43bfd4ac03eb02c5c921a8 Mon Sep 17 00:00:00 2001
 From: Isuru Fernando <ifernando@quansight.com>
 Date: Wed, 18 Dec 2024 03:59:00 +0000
-Subject: [PATCH 11/21] Use BLAS_USE_CBLAS_DOT for OpenBLAS builds
+Subject: [PATCH 11/16] Use BLAS_USE_CBLAS_DOT for OpenBLAS builds
 
 There are two calling conventions for *dotu functions
 
@@ -31,7 +31,7 @@ functional calls.
  1 file changed, 1 insertion(+)
 
 diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 15c62548601..3965416eb29 100644
+index 15c625486..3965416eb 100644
 --- a/cmake/Dependencies.cmake
 +++ b/cmake/Dependencies.cmake
 @@ -182,6 +182,7 @@ elseif(BLAS STREQUAL "OpenBLAS")
@@ -42,3 +42,6 @@ index 15c62548601..3965416eb29 100644
  elseif(BLAS STREQUAL "BLIS")
    find_package(BLIS REQUIRED)
    include_directories(SYSTEM ${BLIS_INCLUDE_DIR})
+-- 
+2.48.1
+
diff --git a/recipe/patches/0012-fix-issue-142484.patch b/recipe/patches/0012-fix-issue-142484.patch
index db13f7aca..bb4a2e6e5 100644
--- a/recipe/patches/0012-fix-issue-142484.patch
+++ b/recipe/patches/0012-fix-issue-142484.patch
@@ -1,7 +1,7 @@
-From beba58d724cc1bd7ca73660b0a5ad9e61ae0c562 Mon Sep 17 00:00:00 2001
+From 323bb15a6b1f601d79211bd292c26cb886a5d60e Mon Sep 17 00:00:00 2001
 From: "Zheng, Zhaoqiong" <zhaoqiong.zheng@intel.com>
 Date: Fri, 27 Dec 2024 13:49:36 +0800
-Subject: [PATCH 12/21] fix issue 142484
+Subject: [PATCH 12/16] fix issue 142484
 
 From https://github.com/pytorch/pytorch/pull/143894
 ---
@@ -9,7 +9,7 @@ From https://github.com/pytorch/pytorch/pull/143894
  1 file changed, 11 insertions(+), 1 deletion(-)
 
 diff --git a/aten/src/ATen/native/mkl/SpectralOps.cpp b/aten/src/ATen/native/mkl/SpectralOps.cpp
-index e26cfbf6d8e..c61b76d3205 100644
+index e26cfbf6d..c61b76d32 100644
 --- a/aten/src/ATen/native/mkl/SpectralOps.cpp
 +++ b/aten/src/ATen/native/mkl/SpectralOps.cpp
 @@ -477,7 +477,17 @@ static Tensor& _exec_fft(Tensor& out, const Tensor& self, IntArrayRef out_sizes,
@@ -31,3 +31,6 @@ index e26cfbf6d8e..c61b76d3205 100644
    auto descriptor = _plan_mkl_fft(
        input.strides(), out.strides(), signal_size, input.is_complex(),
        out.is_complex(), normalization, forward, value_type);
+-- 
+2.48.1
+
diff --git a/recipe/patches/0013-Fix-FindOpenBLAS.patch b/recipe/patches/0013-Fix-FindOpenBLAS.patch
index 49d43f905..47e348855 100644
--- a/recipe/patches/0013-Fix-FindOpenBLAS.patch
+++ b/recipe/patches/0013-Fix-FindOpenBLAS.patch
@@ -1,14 +1,14 @@
-From 816a248a4425a97350959e412666e6db9012a52e Mon Sep 17 00:00:00 2001
+From 4ca7ade3211380629ab56f3c965edd1b6387d1e0 Mon Sep 17 00:00:00 2001
 From: Bas Zalmstra <bas@prefix.dev>
 Date: Thu, 16 May 2024 10:46:49 +0200
-Subject: [PATCH 13/21] Fix FindOpenBLAS
+Subject: [PATCH 13/16] Fix FindOpenBLAS
 
 ---
  cmake/Modules/FindOpenBLAS.cmake | 15 +++++++++------
  1 file changed, 9 insertions(+), 6 deletions(-)
 
 diff --git a/cmake/Modules/FindOpenBLAS.cmake b/cmake/Modules/FindOpenBLAS.cmake
-index 69d8227aea5..0d12185c799 100644
+index 69d8227ae..0d12185c7 100644
 --- a/cmake/Modules/FindOpenBLAS.cmake
 +++ b/cmake/Modules/FindOpenBLAS.cmake
 @@ -31,22 +31,25 @@ SET(Open_BLAS_LIB_SEARCH_PATHS
@@ -43,3 +43,6 @@ index 69d8227aea5..0d12185c799 100644
  
  IF (OpenBLAS_FOUND)
    IF (NOT OpenBLAS_FIND_QUIETLY)
+-- 
+2.48.1
+
diff --git a/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch b/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch
index af8083761..031fce6d8 100644
--- a/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch
+++ b/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch
@@ -1,7 +1,7 @@
-From db896f927403f55a18f931b18a6469cb4e37d322 Mon Sep 17 00:00:00 2001
+From 3b32a078793f06e80d88c356871953f254d4d6c3 Mon Sep 17 00:00:00 2001
 From: atalman <atalman@fb.com>
 Date: Tue, 12 Nov 2024 12:28:10 +0000
-Subject: [PATCH 14/21] CD Enable Python 3.13 on windows (#138095)
+Subject: [PATCH 14/16] CD Enable Python 3.13 on windows (#138095)
 
 Adding CD windows. Part of: https://github.com/pytorch/pytorch/issues/130249
 Builder PR landed with smoke test: https://github.com/pytorch/builder/pull/2035
@@ -16,7 +16,7 @@ Cherry-pick-note: minus changes in `.github/*`
  2 files changed, 13 insertions(+), 1 deletion(-)
 
 diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp
-index 722618efbb0..f98818bfdcc 100644
+index 722618efb..f98818bfd 100644
 --- a/functorch/csrc/dim/dim.cpp
 +++ b/functorch/csrc/dim/dim.cpp
 @@ -38,6 +38,7 @@ PyObject* Dim_init() {
@@ -28,7 +28,7 @@ index 722618efbb0..f98818bfdcc 100644
  #include "internal/pycore_opcode.h"
  #undef Py_BUILD_CORE
 diff --git a/functorch/csrc/dim/dim_opcode.c b/functorch/csrc/dim/dim_opcode.c
-index 81ba62a3781..1b5d0677344 100644
+index 81ba62a37..1b5d06773 100644
 --- a/functorch/csrc/dim/dim_opcode.c
 +++ b/functorch/csrc/dim/dim_opcode.c
 @@ -1,6 +1,17 @@
@@ -50,3 +50,6 @@ index 81ba62a3781..1b5d0677344 100644
 +#undef NEED_OPCODE_TABLES
 +#undef Py_BUILD_CORE
 +#endif
+-- 
+2.48.1
+
diff --git a/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch b/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch
index 6cf5ea9c5..e8ff9e592 100644
--- a/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch
+++ b/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch
@@ -1,7 +1,7 @@
-From 33790dfbf966e7d8ea4ff6798d2ff92474d84079 Mon Sep 17 00:00:00 2001
+From 4465b713563855e7eb5475758226f3a90f675f55 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Thu, 23 Jan 2025 22:46:58 +1100
-Subject: [PATCH 15/21] simplify torch.utils.cpp_extension.include_paths; use
+Subject: [PATCH 15/16] simplify torch.utils.cpp_extension.include_paths; use
  it in cpp_builder
 
 The /TH headers have not existed since pytorch 1.11
@@ -11,7 +11,7 @@ The /TH headers have not existed since pytorch 1.11
  2 files changed, 3 insertions(+), 14 deletions(-)
 
 diff --git a/torch/_inductor/cpp_builder.py b/torch/_inductor/cpp_builder.py
-index 95a0bff86fd..860e7fb062f 100644
+index 95a0bff86..860e7fb06 100644
 --- a/torch/_inductor/cpp_builder.py
 +++ b/torch/_inductor/cpp_builder.py
 @@ -743,16 +743,9 @@ def _get_build_args_of_chosen_isa(vec_isa: VecISA) -> Tuple[List[str], List[str]
@@ -35,7 +35,7 @@ index 95a0bff86fd..860e7fb062f 100644
      libraries = []
      if sys.platform != "darwin" and not config.is_fbcode():
 diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py
-index aaa45ea4c90..3f584ef5598 100644
+index aaa45ea4c..3f584ef55 100644
 --- a/torch/utils/cpp_extension.py
 +++ b/torch/utils/cpp_extension.py
 @@ -1159,10 +1159,6 @@ def include_paths(cuda: bool = False) -> List[str]:
@@ -49,3 +49,6 @@ index aaa45ea4c90..3f584ef5598 100644
      ]
      if cuda and IS_HIP_EXTENSION:
          paths.append(os.path.join(lib_include, 'THH'))
+-- 
+2.48.1
+
diff --git a/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch b/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch
index ed6b74f62..fecf4d0fc 100644
--- a/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch
+++ b/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch
@@ -1,14 +1,14 @@
-From 799f6fa59dac93dabbbcf72d46f4e1334e3d65d9 Mon Sep 17 00:00:00 2001
+From 4d485fc0a5e3226e528e9dab17b184ff9835a045 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Thu, 23 Jan 2025 22:58:14 +1100
-Subject: [PATCH 16/21] point include paths to $PREFIX/include
+Subject: [PATCH 16/16] point include paths to $PREFIX/include
 
 ---
  torch/utils/cpp_extension.py | 9 +++++++++
  1 file changed, 9 insertions(+)
 
 diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py
-index 3f584ef5598..4210f62b6db 100644
+index 3f584ef55..4210f62b6 100644
 --- a/torch/utils/cpp_extension.py
 +++ b/torch/utils/cpp_extension.py
 @@ -1155,10 +1155,19 @@ def include_paths(cuda: bool = False) -> List[str]:
@@ -31,3 +31,6 @@ index 3f584ef5598..4210f62b6db 100644
      ]
      if cuda and IS_HIP_EXTENSION:
          paths.append(os.path.join(lib_include, 'THH'))
+-- 
+2.48.1
+
diff --git a/recipe/patches/0017-Add-conda-prefix-to-inductor-include-paths.patch b/recipe/patches/0017-Add-conda-prefix-to-inductor-include-paths.patch
deleted file mode 100644
index aff55f95d..000000000
--- a/recipe/patches/0017-Add-conda-prefix-to-inductor-include-paths.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-From 9f73a02bacf9680833ac64657fde6762d33ab200 Mon Sep 17 00:00:00 2001
-From: Daniel Petry <dpetry@anaconda.com>
-Date: Tue, 21 Jan 2025 17:45:23 -0600
-Subject: [PATCH 17/21] Add conda prefix to inductor include paths
-
-Currently inductor doesn't look in conda's includes and libs. This results in
-errors when it tries to compile, if system versions are being used of
-dependencies (e.g., sleef).
-
-Note that this is for inductor's JIT mode, not its AOT mode, for which the
-end user provides a <filename>_compile_flags.json file.
----
- torch/_inductor/cpp_builder.py | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/torch/_inductor/cpp_builder.py b/torch/_inductor/cpp_builder.py
-index 860e7fb062f..76c61375d91 100644
---- a/torch/_inductor/cpp_builder.py
-+++ b/torch/_inductor/cpp_builder.py
-@@ -1048,6 +1048,7 @@ def get_cpp_torch_options(
-         + python_include_dirs
-         + torch_include_dirs
-         + omp_include_dir_paths
-+        + [os.getenv('CONDA_PREFIX') + '/include']
-     )
-     cflags = sys_libs_cflags + omp_cflags
-     ldflags = omp_ldflags
diff --git a/recipe/patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch b/recipe/patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
deleted file mode 100644
index 426e6015d..000000000
--- a/recipe/patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From b0cfa0f728e96a3a9d6f7434e2c02d74d6daa9a9 Mon Sep 17 00:00:00 2001
-From: "H. Vetinari" <h.vetinari@gmx.com>
-Date: Tue, 28 Jan 2025 14:15:34 +1100
-Subject: [PATCH 18/21] make ATEN_INCLUDE_DIR relative to TORCH_INSTALL_PREFIX
-
-we cannot set CMAKE_INSTALL_PREFIX without the pytorch build complaining, but we can
-use TORCH_INSTALL_PREFIX, which is set correctly relative to our CMake files already:
-https://github.com/pytorch/pytorch/blob/v2.5.1/cmake/TorchConfig.cmake.in#L47
----
- aten/src/ATen/CMakeLists.txt | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
-index 6d9152a4d07..aa4dd7b05cc 100644
---- a/aten/src/ATen/CMakeLists.txt
-+++ b/aten/src/ATen/CMakeLists.txt
-@@ -563,7 +563,7 @@ if(USE_ROCM)
-   # list(APPEND ATen_HIP_DEPENDENCY_LIBS ATEN_CUDA_FILES_GEN_LIB)
- endif()
- 
--set(ATEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${AT_INSTALL_INCLUDE_DIR}")
-+set(ATEN_INCLUDE_DIR "${TORCH_INSTALL_PREFIX}/${AT_INSTALL_INCLUDE_DIR}")
- configure_file(ATenConfig.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/ATenConfig.cmake")
- install(FILES "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/ATenConfig.cmake"
-   DESTINATION "${AT_INSTALL_SHARE_DIR}/cmake/ATen")
diff --git a/recipe/patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch b/recipe/patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch
deleted file mode 100644
index 789509862..000000000
--- a/recipe/patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch
+++ /dev/null
@@ -1,158 +0,0 @@
-From f7db4cbfb0af59027ed8bdcd0387dba6fbcb1192 Mon Sep 17 00:00:00 2001
-From: "H. Vetinari" <h.vetinari@gmx.com>
-Date: Tue, 28 Jan 2025 10:58:29 +1100
-Subject: [PATCH 19/21] remove `DESTINATION lib` from CMake `install(TARGETS`
- directives
-
-Suggested-By: Silvio Traversaro <silvio@traversaro.it>
----
- c10/CMakeLists.txt                      |  2 +-
- c10/cuda/CMakeLists.txt                 |  2 +-
- c10/hip/CMakeLists.txt                  |  2 +-
- c10/xpu/CMakeLists.txt                  |  2 +-
- caffe2/CMakeLists.txt                   | 18 +++++++++---------
- torch/CMakeLists.txt                    |  2 +-
- torch/lib/libshm_windows/CMakeLists.txt |  2 +-
- 7 files changed, 15 insertions(+), 15 deletions(-)
-
-diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt
-index 80e172497d5..d7f8987020d 100644
---- a/c10/CMakeLists.txt
-+++ b/c10/CMakeLists.txt
-@@ -162,7 +162,7 @@ if(NOT BUILD_LIBTORCHLESS)
-   # Note: for now, we will put all export path into one single Caffe2Targets group
-   # to deal with the cmake deployment need. Inside the Caffe2Targets set, the
-   # individual libraries like libc10.so and libcaffe2.so are still self-contained.
--  install(TARGETS c10 EXPORT Caffe2Targets DESTINATION lib)
-+  install(TARGETS c10 EXPORT Caffe2Targets)
- endif()
- 
- install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
-diff --git a/c10/cuda/CMakeLists.txt b/c10/cuda/CMakeLists.txt
-index 3327dab4779..9336c9e8f77 100644
---- a/c10/cuda/CMakeLists.txt
-+++ b/c10/cuda/CMakeLists.txt
-@@ -82,7 +82,7 @@ if(NOT BUILD_LIBTORCHLESS)
- # Note: for now, we will put all export path into one single Caffe2Targets group
- # to deal with the cmake deployment need. Inside the Caffe2Targets set, the
- # individual libraries like libc10.so and libcaffe2.so are still self-contained.
--install(TARGETS c10_cuda EXPORT Caffe2Targets DESTINATION lib)
-+install(TARGETS c10_cuda EXPORT Caffe2Targets)
- 
- endif()
- 
-diff --git a/c10/hip/CMakeLists.txt b/c10/hip/CMakeLists.txt
-index f153030e793..514c6d29266 100644
---- a/c10/hip/CMakeLists.txt
-+++ b/c10/hip/CMakeLists.txt
-@@ -55,7 +55,7 @@ if(NOT BUILD_LIBTORCHLESS)
-       $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
-       $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
-       $<INSTALL_INTERFACE:include>)
--  install(TARGETS c10_hip EXPORT Caffe2Targets DESTINATION lib)
-+  install(TARGETS c10_hip EXPORT Caffe2Targets)
-   set(C10_HIP_LIB c10_hip)
- endif()
- 
-diff --git a/c10/xpu/CMakeLists.txt b/c10/xpu/CMakeLists.txt
-index 01f77d61713..437ade657f9 100644
---- a/c10/xpu/CMakeLists.txt
-+++ b/c10/xpu/CMakeLists.txt
-@@ -45,7 +45,7 @@ if(NOT BUILD_LIBTORCHLESS)
-       $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
-       $<INSTALL_INTERFACE:include>
-       )
--  install(TARGETS c10_xpu EXPORT Caffe2Targets DESTINATION lib)
-+  install(TARGETS c10_xpu EXPORT Caffe2Targets)
-   set(C10_XPU_LIB c10_xpu)
-   add_subdirectory(test)
- endif()
-diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
-index 9be7f3732f3..b51c7cc637b 100644
---- a/caffe2/CMakeLists.txt
-+++ b/caffe2/CMakeLists.txt
-@@ -549,7 +549,7 @@ if(USE_CUDA)
-   endif()
- 
-   target_link_libraries(caffe2_nvrtc PRIVATE caffe2::nvrtc ${DELAY_LOAD_FLAGS})
--  install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}")
-+  install(TARGETS caffe2_nvrtc)
-   if(USE_NCCL)
-     list(APPEND Caffe2_GPU_SRCS
-       ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp)
-@@ -609,7 +609,7 @@ if(USE_ROCM)
-   target_link_libraries(caffe2_nvrtc ${PYTORCH_HIP_LIBRARIES} ${ROCM_HIPRTC_LIB})
-   target_include_directories(caffe2_nvrtc PRIVATE ${CMAKE_BINARY_DIR})
-   target_compile_definitions(caffe2_nvrtc PRIVATE USE_ROCM __HIP_PLATFORM_AMD__)
--  install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}")
-+  install(TARGETS caffe2_nvrtc)
- endif()
- 
- if(NOT NO_API AND NOT BUILD_LITE_INTERPRETER)
-@@ -995,7 +995,7 @@ elseif(USE_CUDA)
-           CUDA::culibos ${CMAKE_DL_LIBS})
-     endif()
-     set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/cuda/LinearAlgebraStubs.cpp PROPERTIES COMPILE_FLAGS "-DBUILD_LAZY_CUDA_LINALG")
--    install(TARGETS torch_cuda_linalg DESTINATION "${TORCH_INSTALL_LIB_DIR}")
-+    install(TARGETS torch_cuda_linalg)
-   endif()
- 
-   if(USE_PRECOMPILED_HEADERS)
-@@ -1467,17 +1467,17 @@ endif()
- 
- caffe2_interface_library(torch torch_library)
- 
--install(TARGETS torch_cpu torch_cpu_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
-+install(TARGETS torch_cpu torch_cpu_library EXPORT Caffe2Targets)
- 
- if(USE_CUDA)
--  install(TARGETS torch_cuda torch_cuda_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
-+  install(TARGETS torch_cuda torch_cuda_library EXPORT Caffe2Targets)
- elseif(USE_ROCM)
--  install(TARGETS torch_hip torch_hip_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
-+  install(TARGETS torch_hip torch_hip_library EXPORT Caffe2Targets)
- elseif(USE_XPU)
--  install(TARGETS torch_xpu torch_xpu_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
-+  install(TARGETS torch_xpu torch_xpu_library EXPORT Caffe2Targets)
- endif()
- 
--install(TARGETS torch torch_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
-+install(TARGETS torch torch_library EXPORT Caffe2Targets)
- 
- target_link_libraries(torch PUBLIC torch_cpu_library)
- 
-@@ -1616,7 +1616,7 @@ if(BUILD_SHARED_LIBS)
-       target_link_libraries(torch_global_deps torch::nvtoolsext)
-     endif()
-   endif()
--  install(TARGETS torch_global_deps DESTINATION "${TORCH_INSTALL_LIB_DIR}")
-+  install(TARGETS torch_global_deps)
- endif()
- 
- # ---[ Caffe2 HIP sources.
-diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
-index c74b45431c9..80fb5e7734e 100644
---- a/torch/CMakeLists.txt
-+++ b/torch/CMakeLists.txt
-@@ -447,7 +447,7 @@ if(NOT TORCH_PYTHON_LINK_FLAGS STREQUAL "")
-     set_target_properties(torch_python PROPERTIES LINK_FLAGS ${TORCH_PYTHON_LINK_FLAGS})
- endif()
- 
--install(TARGETS torch_python DESTINATION "${TORCH_INSTALL_LIB_DIR}")
-+install(TARGETS torch_python)
- 
- # Generate torch/version.py from the appropriate CMake cache variables.
- if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
-diff --git a/torch/lib/libshm_windows/CMakeLists.txt b/torch/lib/libshm_windows/CMakeLists.txt
-index df2a1064938..5fa15e6be31 100644
---- a/torch/lib/libshm_windows/CMakeLists.txt
-+++ b/torch/lib/libshm_windows/CMakeLists.txt
-@@ -19,7 +19,7 @@ target_include_directories(shm PRIVATE
- target_link_libraries(shm torch c10)
- 
- 
--install(TARGETS shm DESTINATION "${LIBSHM_INSTALL_LIB_SUBDIR}")
-+install(TARGETS shm)
- install(FILES libshm.h DESTINATION "include")
- 
- if(MSVC AND BUILD_SHARED_LIBS)
diff --git a/recipe/patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch b/recipe/patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch
deleted file mode 100644
index 17c54e337..000000000
--- a/recipe/patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch
+++ /dev/null
@@ -1,57 +0,0 @@
-From 39041f5a78068d2cf58d99f76938aee95a3c7bb5 Mon Sep 17 00:00:00 2001
-From: "H. Vetinari" <h.vetinari@gmx.com>
-Date: Thu, 30 Jan 2025 13:23:14 +1100
-Subject: [PATCH 20/21] make library name in
- `test_mutable_custom_op_fixed_layout{,2}` unique
-
-Suggested-By: Daniel Petry <dpetry@anaconda.com>
----
- test/inductor/test_torchinductor.py | 14 +++++++++-----
- 1 file changed, 9 insertions(+), 5 deletions(-)
-
-diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py
-index 610f5d27332..99e2169febb 100644
---- a/test/inductor/test_torchinductor.py
-+++ b/test/inductor/test_torchinductor.py
-@@ -10628,7 +10628,8 @@ class CommonTemplate:
-     @requires_gpu()
-     @config.patch(implicit_fallbacks=True)
-     def test_mutable_custom_op_fixed_layout2(self):
--        with torch.library._scoped_library("mylib", "DEF") as lib:
-+        unique_lib_name = f"mylib_{id(self)}"  # Make unique name using test instance id
-+        with torch.library._scoped_library(unique_lib_name, "DEF") as lib:
-             mod = nn.Conv2d(3, 128, 1, stride=1, bias=False).to(device=GPU_TYPE)
-             inp = torch.rand(2, 3, 128, 128, device=GPU_TYPE)
-             expected_stride = mod(inp).clone().stride()
-@@ -10664,8 +10665,9 @@ class CommonTemplate:
-             def fn(x):
-                 # Inductor changes the conv to be channels-last
-                 z = mod(x)
--                output = torch.ops.mylib.bar(z, torch._dynamo.is_compiling())
--                torch.ops.mylib.add_one(output)
-+                mylib = importlib.import_module(f"torch.ops.{unique_lib_name}")
-+                output = mylib.bar(z, torch._dynamo.is_compiling())
-+                mylib.add_one(output)
-                 return output**2
- 
-             with torch.no_grad():
-@@ -10681,7 +10683,8 @@ class CommonTemplate:
- 
-     @config.patch(implicit_fallbacks=True)
-     def test_mutable_custom_op_fixed_layout(self):
--        with torch.library._scoped_library("mylib", "DEF") as lib:
-+        unique_lib_name = f"mylib_{id(self)}"  # Make unique name using test instance id
-+        with torch.library._scoped_library(unique_lib_name, "DEF") as lib:
-             lib.define(
-                 "copy_(Tensor(a!) dst, Tensor src) -> ()",
-                 tags=torch.Tag.needs_fixed_stride_order,
-@@ -10697,7 +10700,8 @@ class CommonTemplate:
- 
-             def f(x):
-                 full_default_3 = torch.full([3], 7.0, device="cpu")
--                chunk_cat_default_1 = torch.ops.mylib.copy_.default(full_default_3, x)
-+                mylib = importlib.import_module(f"torch.ops.{unique_lib_name}")
-+                chunk_cat_default_1 = mylib.copy_.default(full_default_3, x)
-                 mul_out = torch.mul(full_default_3, full_default_3)
-                 return mul_out
- 
diff --git a/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch b/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
deleted file mode 100644
index a6f17c5d2..000000000
--- a/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
+++ /dev/null
@@ -1,367 +0,0 @@
-From 1780879024ea952f8591aa175a9787f93e697368 Mon Sep 17 00:00:00 2001
-From: "H. Vetinari" <h.vetinari@gmx.com>
-Date: Thu, 30 Jan 2025 08:33:44 +1100
-Subject: [PATCH 21/21] avoid deprecated `find_package(CUDA)` in caffe2 CMake
- metadata
-
-vendor the not-available-anymore function torch_cuda_get_nvcc_gencode_flag from CMake
----
- caffe2/CMakeLists.txt      |  14 ++--
- cmake/Summary.cmake        |  10 +--
- cmake/TorchConfig.cmake.in |   2 +-
- cmake/public/cuda.cmake    |  48 ++++----------
- cmake/public/utils.cmake   | 127 +++++++++++++++++++++++++++++++++++++
- setup.py                   |   2 +-
- 6 files changed, 153 insertions(+), 50 deletions(-)
-
-diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
-index b51c7cc637b..6e107b5b02a 100644
---- a/caffe2/CMakeLists.txt
-+++ b/caffe2/CMakeLists.txt
-@@ -906,25 +906,25 @@ if(USE_ROCM)
-         "$<$<COMPILE_LANGUAGE:CXX>:ATen/core/ATen_pch.h>")
-   endif()
- elseif(USE_CUDA)
--  set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
-+  set(CUDAToolkit_LINK_LIBRARIES_KEYWORD PRIVATE)
-   list(APPEND Caffe2_GPU_SRCS ${GENERATED_CXX_TORCH_CUDA})
--  if(CUDA_SEPARABLE_COMPILATION)
-+  if(CUDAToolkit_SEPARABLE_COMPILATION)
-     # Separate compilation fails when kernels using `thrust::sort_by_key`
-     # are linked with the rest of CUDA code. Workaround by linking them separately.
-     add_library(torch_cuda ${Caffe2_GPU_SRCS} ${Caffe2_GPU_CU_SRCS})
--    set_property(TARGET torch_cuda PROPERTY CUDA_SEPARABLE_COMPILATION ON)
-+    set_property(TARGET torch_cuda PROPERTY CUDAToolkit_SEPARABLE_COMPILATION ON)
- 
-     add_library(torch_cuda_w_sort_by_key OBJECT
-         ${Caffe2_GPU_SRCS_W_SORT_BY_KEY}
-         ${Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY})
--    set_property(TARGET torch_cuda_w_sort_by_key PROPERTY CUDA_SEPARABLE_COMPILATION OFF)
-+    set_property(TARGET torch_cuda_w_sort_by_key PROPERTY CUDAToolkit_SEPARABLE_COMPILATION OFF)
-     target_link_libraries(torch_cuda PRIVATE torch_cuda_w_sort_by_key)
-   else()
-     add_library(torch_cuda
-         ${Caffe2_GPU_SRCS} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY}
-         ${Caffe2_GPU_CU_SRCS} ${Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY})
-   endif()
--  set(CUDA_LINK_LIBRARIES_KEYWORD)
-+  set(CUDAToolkit_LINK_LIBRARIES_KEYWORD)
-   torch_compile_options(torch_cuda)  # see cmake/public/utils.cmake
-   target_compile_definitions(torch_cuda PRIVATE USE_CUDA)
- 
-@@ -973,12 +973,12 @@ elseif(USE_CUDA)
-         torch_cuda
-     )
-     if($ENV{ATEN_STATIC_CUDA})
--      if(CUDA_VERSION_MAJOR LESS_EQUAL 11)
-+      if(CUDAToolkit_VERSION_MAJOR LESS_EQUAL 11)
-         target_link_libraries(torch_cuda_linalg PRIVATE
-             CUDA::cusolver_static
-             ${CUDAToolkit_LIBRARY_DIR}/liblapack_static.a     # needed for libcusolver_static
-         )
--      elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 12)
-+      elseif(CUDAToolkit_VERSION_MAJOR GREATER_EQUAL 12)
-         target_link_libraries(torch_cuda_linalg PRIVATE
-             CUDA::cusolver_static
-             ${CUDAToolkit_LIBRARY_DIR}/libcusolver_lapack_static.a     # needed for libcusolver_static
-diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake
-index d51c451589c..154f04a89dd 100644
---- a/cmake/Summary.cmake
-+++ b/cmake/Summary.cmake
-@@ -76,7 +76,7 @@ function(caffe2_print_configuration_summary)
-     message(STATUS "    USE_CUSPARSELT      : ${USE_CUSPARSELT}")
-     message(STATUS "    USE_CUDSS           : ${USE_CUDSS}")
-     message(STATUS "    USE_CUFILE          : ${USE_CUFILE}")
--    message(STATUS "    CUDA version        : ${CUDA_VERSION}")
-+    message(STATUS "    CUDA version        : ${CUDAToolkit_VERSION}")
-     message(STATUS "    USE_FLASH_ATTENTION : ${USE_FLASH_ATTENTION}")
-     message(STATUS "    USE_MEM_EFF_ATTENTION : ${USE_MEM_EFF_ATTENTION}")
-     if(${USE_CUDNN})
-@@ -88,7 +88,7 @@ function(caffe2_print_configuration_summary)
-     if(${USE_CUFILE})
-       message(STATUS "    cufile library    : ${CUDA_cuFile_LIBRARY}")
-     endif()
--    message(STATUS "    CUDA root directory : ${CUDA_TOOLKIT_ROOT_DIR}")
-+    message(STATUS "    CUDA root directory : ${CUDAToolkit_ROOT}")
-     message(STATUS "    CUDA library        : ${CUDA_cuda_driver_LIBRARY}")
-     message(STATUS "    cudart library      : ${CUDA_cudart_LIBRARY}")
-     message(STATUS "    cublas library      : ${CUDA_cublas_LIBRARY}")
-@@ -108,12 +108,12 @@ function(caffe2_print_configuration_summary)
-       message(STATUS "    cuDSS library       : ${__tmp}")
-     endif()
-     message(STATUS "    nvrtc               : ${CUDA_nvrtc_LIBRARY}")
--    message(STATUS "    CUDA include path   : ${CUDA_INCLUDE_DIRS}")
--    message(STATUS "    NVCC executable     : ${CUDA_NVCC_EXECUTABLE}")
-+    message(STATUS "    CUDA include path   : ${CUDATookit_INCLUDE_DIRS}")
-+    message(STATUS "    NVCC executable     : ${CUDATookit_NVCC_EXECUTABLE}")
-     message(STATUS "    CUDA compiler       : ${CMAKE_CUDA_COMPILER}")
-     message(STATUS "    CUDA flags          : ${CMAKE_CUDA_FLAGS}")
-     message(STATUS "    CUDA host compiler  : ${CMAKE_CUDA_HOST_COMPILER}")
--    message(STATUS "    CUDA --device-c     : ${CUDA_SEPARABLE_COMPILATION}")
-+    message(STATUS "    CUDA --device-c     : ${CUDATookit_SEPARABLE_COMPILATION}")
-     message(STATUS "    USE_TENSORRT        : ${USE_TENSORRT}")
-     if(${USE_TENSORRT})
-       message(STATUS "      TensorRT runtime library: ${TENSORRT_LIBRARY}")
-diff --git a/cmake/TorchConfig.cmake.in b/cmake/TorchConfig.cmake.in
-index cba4d929855..da904fc6a18 100644
---- a/cmake/TorchConfig.cmake.in
-+++ b/cmake/TorchConfig.cmake.in
-@@ -125,7 +125,7 @@ if(@USE_CUDA@)
-     find_library(CAFFE2_NVRTC_LIBRARY caffe2_nvrtc PATHS "${TORCH_INSTALL_PREFIX}/lib")
-     list(APPEND TORCH_CUDA_LIBRARIES ${CAFFE2_NVRTC_LIBRARY})
-   else()
--    set(TORCH_CUDA_LIBRARIES ${CUDA_NVRTC_LIB})
-+    set(TORCH_CUDA_LIBRARIES CUDA::nvrtc)
-   endif()
-   if(TARGET torch::nvtoolsext)
-     list(APPEND TORCH_CUDA_LIBRARIES torch::nvtoolsext)
-diff --git a/cmake/public/cuda.cmake b/cmake/public/cuda.cmake
-index 152fbdbe6dd..0d1aeffc59f 100644
---- a/cmake/public/cuda.cmake
-+++ b/cmake/public/cuda.cmake
-@@ -26,8 +26,8 @@ if(NOT MSVC)
- endif()
- 
- # Find CUDA.
--find_package(CUDA)
--if(NOT CUDA_FOUND)
-+find_package(CUDAToolkit)
-+if(NOT CUDAToolkit_FOUND)
-   message(WARNING
-     "Caffe2: CUDA cannot be found. Depending on whether you are building "
-     "Caffe2 or a Caffe2 dependent library, the next warning / error will "
-@@ -36,8 +36,6 @@ if(NOT CUDA_FOUND)
-   return()
- endif()
- 
--# Enable CUDA language support
--set(CUDAToolkit_ROOT "${CUDA_TOOLKIT_ROOT_DIR}")
- # Pass clang as host compiler, which according to the docs
- # Must be done before CUDA language is enabled, see
- # https://cmake.org/cmake/help/v3.15/variable/CMAKE_CUDA_HOST_COMPILER.html
-@@ -56,24 +54,18 @@ if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.12.0)
-   cmake_policy(SET CMP0074 NEW)
- endif()
- 
--find_package(CUDAToolkit REQUIRED)
-+find_package(CUDAToolkit REQUIRED COMPONENTS cudart nvrtc REQUIRED)
- 
- cmake_policy(POP)
- 
--if(NOT CMAKE_CUDA_COMPILER_VERSION VERSION_EQUAL CUDAToolkit_VERSION)
--  message(FATAL_ERROR "Found two conflicting CUDA versions:\n"
--                      "V${CMAKE_CUDA_COMPILER_VERSION} in '${CUDA_INCLUDE_DIRS}' and\n"
--                      "V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIRS}'")
--endif()
--
--message(STATUS "Caffe2: CUDA detected: " ${CUDA_VERSION})
--message(STATUS "Caffe2: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE})
--message(STATUS "Caffe2: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR})
--if(CUDA_VERSION VERSION_LESS 11.0)
-+message(STATUS "Caffe2: CUDA detected: " ${CUDAToolkit_VERSION})
-+message(STATUS "Caffe2: CUDA nvcc is: " ${CUDAToolkit_NVCC_EXECUTABLE})
-+message(STATUS "Caffe2: CUDA toolkit directory: " ${CUDAToolkit_ROOT})
-+if(CUDAToolkit_VERSION VERSION_LESS 11.0)
-   message(FATAL_ERROR "PyTorch requires CUDA 11.0 or above.")
- endif()
- 
--if(CUDA_FOUND)
-+if(CUDAToolkit_FOUND)
-   # Sometimes, we may mismatch nvcc with the CUDA headers we are
-   # compiling with, e.g., if a ccache nvcc is fed to us by CUDA_NVCC_EXECUTABLE
-   # but the PATH is not consistent with CUDA_HOME.  It's better safe
-@@ -97,8 +89,8 @@ if(CUDA_FOUND)
-     )
-   if(NOT CMAKE_CROSSCOMPILING)
-     try_run(run_result compile_result ${PROJECT_RANDOM_BINARY_DIR} ${file}
--      CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}"
--      LINK_LIBRARIES ${CUDA_LIBRARIES}
-+      CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDAToolkit_INCLUDE_DIRS}"
-+      LINK_LIBRARIES ${CUDAToolkit_LIBRARIES}
-       RUN_OUTPUT_VARIABLE cuda_version_from_header
-       COMPILE_OUTPUT_VARIABLE output_var
-       )
-@@ -106,30 +98,14 @@ if(CUDA_FOUND)
-       message(FATAL_ERROR "Caffe2: Couldn't determine version from header: " ${output_var})
-     endif()
-     message(STATUS "Caffe2: Header version is: " ${cuda_version_from_header})
--    if(NOT cuda_version_from_header STREQUAL ${CUDA_VERSION_STRING})
--      # Force CUDA to be processed for again next time
--      # TODO: I'm not sure if this counts as an implementation detail of
--      # FindCUDA
--      set(${cuda_version_from_findcuda} ${CUDA_VERSION_STRING})
--      unset(CUDA_TOOLKIT_ROOT_DIR_INTERNAL CACHE)
--      # Not strictly necessary, but for good luck.
--      unset(CUDA_VERSION CACHE)
--      # Error out
--      message(FATAL_ERROR "FindCUDA says CUDA version is ${cuda_version_from_findcuda} (usually determined by nvcc), "
--        "but the CUDA headers say the version is ${cuda_version_from_header}.  This often occurs "
--        "when you set both CUDA_HOME and CUDA_NVCC_EXECUTABLE to "
--        "non-standard locations, without also setting PATH to point to the correct nvcc.  "
--        "Perhaps, try re-running this command again with PATH=${CUDA_TOOLKIT_ROOT_DIR}/bin:$PATH.  "
--        "See above log messages for more diagnostics, and see https://github.com/pytorch/pytorch/issues/8092 for more details.")
--    endif()
-   endif()
- endif()
- 
- # ---[ CUDA libraries wrapper
- 
- # find lbnvrtc.so
--set(CUDA_NVRTC_LIB "${CUDA_nvrtc_LIBRARY}" CACHE FILEPATH "")
--if(CUDA_NVRTC_LIB AND NOT CUDA_NVRTC_SHORTHASH)
-+get_target_property(CUDA_NVRTC_LIB CUDA::nvrtc INTERFACE_LINK_LIBRARIES)
-+if(NOT CUDA_NVRTC_SHORTHASH)
-   find_package(Python COMPONENTS Interpreter)
-   execute_process(
-     COMMAND Python::Interpreter -c
-diff --git a/cmake/public/utils.cmake b/cmake/public/utils.cmake
-index c6647eb457c..accebfd3457 100644
---- a/cmake/public/utils.cmake
-+++ b/cmake/public/utils.cmake
-@@ -306,6 +306,133 @@ macro(torch_hip_get_arch_list store_var)
-   string(REPLACE " " ";" ${store_var} "${_TMP}")
- endmacro()
- 
-+# torch_cuda_get_nvcc_gencode_flag is part of find_package(CUDA), but not find_package(CUDAToolkit);
-+# vendor it from https://github.com/Kitware/CMake/blob/master/Modules/FindCUDA/select_compute_arch.cmake
-+# but disable CUDA_DETECT_INSTALLED_GPUS
-+################################################################################################
-+# Function for selecting GPU arch flags for nvcc based on CUDA architectures from parameter list
-+# Usage:
-+#   SELECT_NVCC_ARCH_FLAGS(out_variable [list of CUDA compute archs])
-+function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable)
-+  set(CUDA_ARCH_LIST "${ARGN}")
-+
-+  if("X${CUDA_ARCH_LIST}" STREQUAL "X" )
-+    set(CUDA_ARCH_LIST "Auto")
-+  endif()
-+
-+  set(cuda_arch_bin)
-+  set(cuda_arch_ptx)
-+
-+  if("${CUDA_ARCH_LIST}" STREQUAL "All")
-+    set(CUDA_ARCH_LIST ${CUDA_KNOWN_GPU_ARCHITECTURES})
-+  elseif("${CUDA_ARCH_LIST}" STREQUAL "Common")
-+    set(CUDA_ARCH_LIST ${CUDA_COMMON_GPU_ARCHITECTURES})
-+  elseif("${CUDA_ARCH_LIST}" STREQUAL "Auto")
-+    # disabled, replaced by common architectures
-+    # CUDA_DETECT_INSTALLED_GPUS(CUDA_ARCH_LIST)
-+    # message(STATUS "Autodetected CUDA architecture(s): ${CUDA_ARCH_LIST}")
-+    set(CUDA_ARCH_LIST ${CUDA_COMMON_GPU_ARCHITECTURES})
-+  endif()
-+
-+  # Now process the list and look for names
-+  string(REGEX REPLACE "[ \t]+" ";" CUDA_ARCH_LIST "${CUDA_ARCH_LIST}")
-+  list(REMOVE_DUPLICATES CUDA_ARCH_LIST)
-+  foreach(arch_name ${CUDA_ARCH_LIST})
-+    set(arch_bin)
-+    set(arch_ptx)
-+    set(add_ptx FALSE)
-+    # Check to see if we are compiling PTX
-+    if(arch_name MATCHES "(.*)\\+PTX$")
-+      set(add_ptx TRUE)
-+      set(arch_name ${CMAKE_MATCH_1})
-+    endif()
-+    if(arch_name MATCHES "^([0-9]\\.[0-9](\\([0-9]\\.[0-9]\\))?)$")
-+      set(arch_bin ${CMAKE_MATCH_1})
-+      set(arch_ptx ${arch_bin})
-+    else()
-+      # Look for it in our list of known architectures
-+      if(${arch_name} STREQUAL "Fermi")
-+        set(arch_bin 2.0 "2.1(2.0)")
-+      elseif(${arch_name} STREQUAL "Kepler+Tegra")
-+        set(arch_bin 3.2)
-+      elseif(${arch_name} STREQUAL "Kepler+Tesla")
-+        set(arch_bin 3.7)
-+      elseif(${arch_name} STREQUAL "Kepler")
-+        set(arch_bin 3.0 3.5)
-+        set(arch_ptx 3.5)
-+      elseif(${arch_name} STREQUAL "Maxwell+Tegra")
-+        set(arch_bin 5.3)
-+      elseif(${arch_name} STREQUAL "Maxwell")
-+        set(arch_bin 5.0 5.2)
-+        set(arch_ptx 5.2)
-+      elseif(${arch_name} STREQUAL "Pascal")
-+        set(arch_bin 6.0 6.1)
-+        set(arch_ptx 6.1)
-+      elseif(${arch_name} STREQUAL "Volta")
-+        set(arch_bin 7.0 7.0)
-+        set(arch_ptx 7.0)
-+      elseif(${arch_name} STREQUAL "Turing")
-+        set(arch_bin 7.5)
-+        set(arch_ptx 7.5)
-+      elseif(${arch_name} STREQUAL "Ampere")
-+        set(arch_bin 8.0)
-+        set(arch_ptx 8.0)
-+      else()
-+        message(SEND_ERROR "Unknown CUDA Architecture Name ${arch_name} in CUDA_SELECT_NVCC_ARCH_FLAGS")
-+      endif()
-+    endif()
-+    if(NOT arch_bin)
-+      message(SEND_ERROR "arch_bin wasn't set for some reason")
-+    endif()
-+    list(APPEND cuda_arch_bin ${arch_bin})
-+    if(add_ptx)
-+      if (NOT arch_ptx)
-+        set(arch_ptx ${arch_bin})
-+      endif()
-+      list(APPEND cuda_arch_ptx ${arch_ptx})
-+    endif()
-+  endforeach()
-+
-+  # remove dots and convert to lists
-+  string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
-+  string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}")
-+  string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}")
-+  string(REGEX MATCHALL "[0-9]+"   cuda_arch_ptx "${cuda_arch_ptx}")
-+
-+  if(cuda_arch_bin)
-+    list(REMOVE_DUPLICATES cuda_arch_bin)
-+  endif()
-+  if(cuda_arch_ptx)
-+    list(REMOVE_DUPLICATES cuda_arch_ptx)
-+  endif()
-+
-+  set(nvcc_flags "")
-+  set(nvcc_archs_readable "")
-+
-+  # Tell NVCC to add binaries for the specified GPUs
-+  foreach(arch ${cuda_arch_bin})
-+    if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
-+      # User explicitly specified ARCH for the concrete CODE
-+      list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
-+      list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1})
-+    else()
-+      # User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE
-+      list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch})
-+      list(APPEND nvcc_archs_readable sm_${arch})
-+    endif()
-+  endforeach()
-+
-+  # Tell NVCC to add PTX intermediate code for the specified architectures
-+  foreach(arch ${cuda_arch_ptx})
-+    list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch})
-+    list(APPEND nvcc_archs_readable compute_${arch})
-+  endforeach()
-+
-+  string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
-+  set(${out_variable}          ${nvcc_flags}          PARENT_SCOPE)
-+  set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE)
-+endfunction()
-+
- ##############################################################################
- # Get the NVCC arch flags specified by TORCH_CUDA_ARCH_LIST and CUDA_ARCH_NAME.
- # Usage:
-diff --git a/setup.py b/setup.py
-index b0e01e0d1ee..dc21f91d69e 100644
---- a/setup.py
-+++ b/setup.py
-@@ -627,7 +627,7 @@ class build_ext(setuptools.command.build_ext.build_ext):
-         else:
-             report("-- Not using cuDNN")
-         if cmake_cache_vars["USE_CUDA"]:
--            report("-- Detected CUDA at " + cmake_cache_vars["CUDA_TOOLKIT_ROOT_DIR"])
-+            report(f"-- Detected CUDA at {cmake_cache_vars['CMAKE_CUDA_COMPILER_TOOLKIT_ROOT']}")
-         else:
-             report("-- Not using CUDA")
-         if cmake_cache_vars["USE_XPU"]:
diff --git a/recipe/patches_submodules/fbgemm/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch b/recipe/patches_submodules/fbgemm/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch
deleted file mode 100644
index 665cc74e8..000000000
--- a/recipe/patches_submodules/fbgemm/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From a9879bdd5ea793c5301a4b86f163a07e1f28f321 Mon Sep 17 00:00:00 2001
-From: "H. Vetinari" <h.vetinari@gmx.com>
-Date: Tue, 28 Jan 2025 13:32:28 +1100
-Subject: [PATCH] remove `DESTINATION lib` from CMake install directives
-
-Suggested-By: Silvio Traversaro <silvio@traversaro.it>
----
- CMakeLists.txt | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/third_party/fbgemm/CMakeLists.txt b/third_party/fbgemm/CMakeLists.txt
-index 134523e7..86fb8fad 100644
---- a/third_party/fbgemm/CMakeLists.txt
-+++ b/third_party/fbgemm/CMakeLists.txt
-@@ -370,8 +370,8 @@ if(MSVC)
-       FILES $<TARGET_PDB_FILE:fbgemm> $<TARGET_PDB_FILE:asmjit>
-       DESTINATION ${CMAKE_INSTALL_LIBDIR} OPTIONAL)
-   endif()
--  install(TARGETS fbgemm DESTINATION ${CMAKE_INSTALL_LIBDIR})
--  install(TARGETS asmjit DESTINATION ${CMAKE_INSTALL_LIBDIR})
-+  install(TARGETS fbgemm)
-+  install(TARGETS asmjit)
- endif()
- 
- #Make project importable from the build directory
diff --git a/recipe/patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch b/recipe/patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch
deleted file mode 100644
index fe411d716..000000000
--- a/recipe/patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch
+++ /dev/null
@@ -1,22 +0,0 @@
-From 9a1de62dd1b3d816d6fb87c2041f4005ab5c683d Mon Sep 17 00:00:00 2001
-From: "H. Vetinari" <h.vetinari@gmx.com>
-Date: Sun, 2 Feb 2025 08:54:01 +1100
-Subject: [PATCH] switch away from find_package(CUDA)
-
----
- tensorpipe/CMakeLists.txt | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/third_party/tensorpipe/tensorpipe/CMakeLists.txt b/third_party/tensorpipe/tensorpipe/CMakeLists.txt
-index efcffc2..1c3b2ca 100644
---- a/third_party/tensorpipe/tensorpipe/CMakeLists.txt
-+++ b/third_party/tensorpipe/tensorpipe/CMakeLists.txt
-@@ -234,7 +234,7 @@ if(TP_USE_CUDA)
-   # TP_INCLUDE_DIRS is list of include path to be used
-   set(TP_CUDA_INCLUDE_DIRS)
- 
--  find_package(CUDA REQUIRED)
-+  find_package(CUDAToolkit REQUIRED)
-   list(APPEND TP_CUDA_LINK_LIBRARIES ${CUDA_LIBRARIES})
-   list(APPEND TP_CUDA_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS})
- 
diff --git a/recipe/third_party/CMake/Copyright.txt b/recipe/third_party/CMake/Copyright.txt
deleted file mode 100644
index f32a818aa..000000000
--- a/recipe/third_party/CMake/Copyright.txt
+++ /dev/null
@@ -1,136 +0,0 @@
-CMake - Cross Platform Makefile Generator
-Copyright 2000-2025 Kitware, Inc. and Contributors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of Kitware, Inc. nor the names of Contributors
-  may be used to endorse or promote products derived from this
-  software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-------------------------------------------------------------------------------
-
-The following individuals and institutions are among the Contributors:
-
-* Aaron C. Meadows <cmake@shadowguarddev.com>
-* Adriaan de Groot <groot@kde.org>
-* Aleksey Avdeev <solo@altlinux.ru>
-* Alexander Neundorf <neundorf@kde.org>
-* Alexander Smorkalov <alexander.smorkalov@itseez.com>
-* Alexey Sokolov <sokolov@google.com>
-* Alex Merry <alex.merry@kde.org>
-* Alex Turbov <i.zaufi@gmail.com>
-* Andreas Pakulat <apaku@gmx.de>
-* Andreas Schneider <asn@cryptomilk.org>
-* André Rigland Brodtkorb <Andre.Brodtkorb@ifi.uio.no>
-* Axel Huebl, Helmholtz-Zentrum Dresden - Rossendorf
-* Benjamin Eikel
-* Bjoern Ricks <bjoern.ricks@gmail.com>
-* Brad Hards <bradh@kde.org>
-* Christopher Harvey
-* Christoph Grüninger <foss@grueninger.de>
-* Clement Creusot <creusot@cs.york.ac.uk>
-* Daniel Blezek <blezek@gmail.com>
-* Daniel Pfeifer <daniel@pfeifer-mail.de>
-* Dawid Wróbel <me@dawidwrobel.com>
-* Enrico Scholz <enrico.scholz@informatik.tu-chemnitz.de>
-* Eran Ifrah <eran.ifrah@gmail.com>
-* Esben Mose Hansen, Ange Optimization ApS
-* Geoffrey Viola <geoffrey.viola@asirobots.com>
-* Google Inc
-* Gregor Jasny
-* Helio Chissini de Castro <helio@kde.org>
-* Ilya Lavrenov <ilya.lavrenov@itseez.com>
-* Insight Software Consortium <insightsoftwareconsortium.org>
-* Intel Corporation <www.intel.com>
-* Jan Woetzel
-* Jordan Williams <jordan@jwillikers.com>
-* Julien Schueller
-* Kelly Thompson <kgt@lanl.gov>
-* Konstantin Podsvirov <konstantin@podsvirov.pro>
-* Laurent Montel <montel@kde.org>
-* Mario Bensi <mbensi@ipsquad.net>
-* Martin Gräßlin <mgraesslin@kde.org>
-* Mathieu Malaterre <mathieu.malaterre@gmail.com>
-* Matthaeus G. Chajdas
-* Matthias Kretz <kretz@kde.org>
-* Matthias Maennich <matthias@maennich.net>
-* Michael Hirsch, Ph.D. <www.scivision.co>
-* Michael Stürmer
-* Miguel A. Figueroa-Villanueva
-* Mike Durso <rbprogrammer@gmail.com>
-* Mike Jackson
-* Mike McQuaid <mike@mikemcquaid.com>
-* Nicolas Bock <nicolasbock@gmail.com>
-* Nicolas Despres <nicolas.despres@gmail.com>
-* Nikita Krupen'ko <krnekit@gmail.com>
-* NVIDIA Corporation <www.nvidia.com>
-* OpenGamma Ltd. <opengamma.com>
-* Patrick Stotko <stotko@cs.uni-bonn.de>
-* Per Øyvind Karlsen <peroyvind@mandriva.org>
-* Peter Collingbourne <peter@pcc.me.uk>
-* Petr Gotthard <gotthard@honeywell.com>
-* Philip Lowman <philip@yhbt.com>
-* Philippe Proulx <pproulx@efficios.com>
-* Raffi Enficiaud, Max Planck Society
-* Raumfeld <raumfeld.com>
-* Roger Leigh <rleigh@codelibre.net>
-* Rolf Eike Beer <eike@sf-mail.de>
-* Roman Donchenko <roman.donchenko@itseez.com>
-* Roman Kharitonov <roman.kharitonov@itseez.com>
-* Ruslan Baratov
-* Sebastian Holtermann <sebholt@xwmw.org>
-* Stephen Kelly <steveire@gmail.com>
-* Sylvain Joubert <joubert.sy@gmail.com>
-* The Qt Company Ltd.
-* Thomas Sondergaard <ts@medical-insight.com>
-* Tobias Hunger <tobias.hunger@qt.io>
-* Todd Gamblin <tgamblin@llnl.gov>
-* Tristan Carel
-* University of Dundee
-* Vadim Zhukov
-* Will Dicharry <wdicharry@stellarscience.com>
-
-See version control history for details of individual contributions.
-
-The above copyright and license notice applies to distributions of
-CMake in source and binary form.  Third-party software packages supplied
-with CMake under compatible licenses provide their own copyright notices
-documented in corresponding subdirectories or source files.
-
-------------------------------------------------------------------------------
-
-CMake was initially developed by Kitware with the following sponsorship:
-
- * National Library of Medicine at the National Institutes of Health
-   as part of the Insight Segmentation and Registration Toolkit (ITK).
-
- * US National Labs (Los Alamos, Livermore, Sandia) ASC Parallel
-   Visualization Initiative.
-
- * National Alliance for Medical Image Computing (NAMIC) is funded by the
-   National Institutes of Health through the NIH Roadmap for Medical Research,
-   Grant U54 EB005149.
-
- * Kitware, Inc.