From b6808c2fff00fabd4b4d9c1a878491a7933a0660 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 30 Jan 2025 13:17:32 +1100
Subject: [PATCH 01/25] Revert "skip test failures with CUDA due to non-unique
 temporaries"

This reverts commit 53ab2c8cfc99ea8fc0bdec628a6910b663ba0f90.
---
 recipe/meta.yaml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index da7830f5..138cfba3 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -478,9 +478,6 @@ outputs:
         # may crash spuriously
         {% set skips = skips ~ " or (TestAutograd and test_profiler_seq_nr)" %}
         {% set skips = skips ~ " or (TestAutograd and test_profiler_propagation)" %}
-        # tests that fail due to resource clean-up issues (non-unique temporary libraries), see
-        # https://github.com/conda-forge/pytorch-cpu-feedstock/pull/318#issuecomment-2620080859
-        {% set skips = skips ~ " or test_mutable_custom_op_fixed_layout" %}             # [cuda_compiler_version != "None"]
         # trivial accuracy problems
         {% set skips = skips ~ " or test_BCELoss_weights_no_reduce_cuda" %}             # [unix and cuda_compiler_version != "None"]
         {% set skips = skips ~ " or test_ctc_loss_cudnn_tensor_cuda " %}                # [unix and cuda_compiler_version != "None"]

From f52e86ce45fa506e6a60f81300a7e6856af4a935 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 30 Jan 2025 13:25:56 +1100
Subject: [PATCH 02/25] make temporary library names unique in failing tests

---
 recipe/meta.yaml                              |  1 +
 ...-of-python-3-and-error-without-numpy.patch |  2 +-
 recipe/patches/0002-Help-find-numpy.patch     |  2 +-
 ...03-Add-USE_SYSTEM_NVTX-option-138287.patch |  2 +-
 .../patches/0004-Update-sympy-version.patch   |  2 +-
 .../0005-Fix-duplicate-linker-script.patch    |  2 +-
 ...kle-error-in-serialization.py-136034.patch |  2 +-
 ...verwrite-ld-with-environment-variabl.patch |  2 +-
 ...-Allow-overriding-CUDA-related-paths.patch |  2 +-
 ...st-test_linalg.py-for-NumPy-2-136800.patch |  2 +-
 ...est-failures-in-test_torch.py-137740.patch |  2 +-
 ...AS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch |  2 +-
 recipe/patches/0012-fix-issue-142484.patch    |  2 +-
 recipe/patches/0013-Fix-FindOpenBLAS.patch    |  2 +-
 ...Enable-Python-3.13-on-windows-138095.patch |  2 +-
 ...tils.cpp_extension.include_paths-use.patch |  2 +-
 ...oint-include-paths-to-PREFIX-include.patch |  2 +-
 ...nda-prefix-to-inductor-include-paths.patch |  2 +-
 ...E_DIR-relative-to-TORCH_INSTALL_PREF.patch |  2 +-
 ...ON-lib-from-CMake-install-TARGETS-di.patch |  2 +-
 ...e-in-test_mutable_custom_op_fixed_la.patch | 35 +++++++++++++++++++
 21 files changed, 55 insertions(+), 19 deletions(-)
 create mode 100644 recipe/patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 138cfba3..4c8c2551 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -70,6 +70,7 @@ source:
     - patches/0017-Add-conda-prefix-to-inductor-include-paths.patch
     - patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
     - patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch               # [win]
+    - patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch
     - patches_submodules/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch    # [win]
 
 build:
diff --git a/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch b/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch
index 7c03260a..b5519b81 100644
--- a/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch
+++ b/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch
@@ -1,7 +1,7 @@
 From f3a0f9aab6dce56eea590b946f60256014b61bf7 Mon Sep 17 00:00:00 2001
 From: Mark Harfouche <mark.harfouche@gmail.com>
 Date: Sun, 1 Sep 2024 17:35:40 -0400
-Subject: [PATCH 01/19] Force usage of python 3 and error without numpy
+Subject: [PATCH 01/20] Force usage of python 3 and error without numpy
 
 ---
  cmake/Dependencies.cmake | 6 +++---
diff --git a/recipe/patches/0002-Help-find-numpy.patch b/recipe/patches/0002-Help-find-numpy.patch
index 694d0475..833af9f1 100644
--- a/recipe/patches/0002-Help-find-numpy.patch
+++ b/recipe/patches/0002-Help-find-numpy.patch
@@ -1,7 +1,7 @@
 From 21c30036b5b86f403c0cf4426165d9a6a50edb1a Mon Sep 17 00:00:00 2001
 From: Mark Harfouche <mark.harfouche@gmail.com>
 Date: Tue, 1 Oct 2024 00:28:40 -0400
-Subject: [PATCH 02/19] Help find numpy
+Subject: [PATCH 02/20] Help find numpy
 
 ---
  tools/setup_helpers/cmake.py | 6 ++++++
diff --git a/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch b/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch
index 92a497db..a4c44e01 100644
--- a/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch
+++ b/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch
@@ -1,7 +1,7 @@
 From d1826af525db41eda5020a1404f5d5521d67a5dc Mon Sep 17 00:00:00 2001
 From: Jeongseok Lee <jeongseok@meta.com>
 Date: Sat, 19 Oct 2024 04:26:01 +0000
-Subject: [PATCH 03/19] Add USE_SYSTEM_NVTX option (#138287)
+Subject: [PATCH 03/20] Add USE_SYSTEM_NVTX option (#138287)
 
 ## Summary
 
diff --git a/recipe/patches/0004-Update-sympy-version.patch b/recipe/patches/0004-Update-sympy-version.patch
index 92340dba..81a66b3f 100644
--- a/recipe/patches/0004-Update-sympy-version.patch
+++ b/recipe/patches/0004-Update-sympy-version.patch
@@ -1,7 +1,7 @@
 From e3219c5fe8834753b0cf9e92be4d1ef1e874f370 Mon Sep 17 00:00:00 2001
 From: Jeongseok Lee <jeongseok@meta.com>
 Date: Thu, 17 Oct 2024 15:04:05 -0700
-Subject: [PATCH 04/19] Update sympy version
+Subject: [PATCH 04/20] Update sympy version
 
 ---
  setup.py | 2 +-
diff --git a/recipe/patches/0005-Fix-duplicate-linker-script.patch b/recipe/patches/0005-Fix-duplicate-linker-script.patch
index 1d2e52e7..cb09dcdf 100644
--- a/recipe/patches/0005-Fix-duplicate-linker-script.patch
+++ b/recipe/patches/0005-Fix-duplicate-linker-script.patch
@@ -1,7 +1,7 @@
 From 08a1f44fbc81324aa98d720dfb7b87a261923ac2 Mon Sep 17 00:00:00 2001
 From: Jeongseok Lee <jeongseok@meta.com>
 Date: Sun, 3 Nov 2024 01:12:36 -0700
-Subject: [PATCH 05/19] Fix duplicate linker script
+Subject: [PATCH 05/20] Fix duplicate linker script
 
 ---
  setup.py | 4 +++-
diff --git a/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch b/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch
index 5d0c8ccf..326e6285 100644
--- a/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch
+++ b/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch
@@ -1,7 +1,7 @@
 From 15df314a41c69a31c0443254d5552aa1b39d708d Mon Sep 17 00:00:00 2001
 From: William Wen <williamwen@meta.com>
 Date: Fri, 13 Sep 2024 13:02:33 -0700
-Subject: [PATCH 06/19] fix 3.13 pickle error in serialization.py (#136034)
+Subject: [PATCH 06/20] fix 3.13 pickle error in serialization.py (#136034)
 
 Error encountered when adding dynamo 3.13 support.
 Pull Request resolved: https://github.com/pytorch/pytorch/pull/136034
diff --git a/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch b/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch
index e6dcc5af..ad215aa9 100644
--- a/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch
+++ b/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch
@@ -1,7 +1,7 @@
 From 655f694854c3eafdd631235b60bc6c1b279218ed Mon Sep 17 00:00:00 2001
 From: Mark Harfouche <mark.harfouche@gmail.com>
 Date: Thu, 3 Oct 2024 22:49:56 -0400
-Subject: [PATCH 07/19] Allow users to overwrite ld with environment variables
+Subject: [PATCH 07/20] Allow users to overwrite ld with environment variables
 
 This should help in the case of cross compilation.
 
diff --git a/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch b/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch
index 8c366b3d..fbfe0560 100644
--- a/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch
+++ b/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch
@@ -1,7 +1,7 @@
 From f03bf82d9da9cccb2cf4d4833c1a6349622dc37d Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
 Date: Wed, 27 Nov 2024 13:47:23 +0100
-Subject: [PATCH 08/19] Allow overriding CUDA-related paths
+Subject: [PATCH 08/20] Allow overriding CUDA-related paths
 
 ---
  cmake/Modules/FindCUDAToolkit.cmake | 2 +-
diff --git a/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch b/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch
index 4d11b1b6..580fe42a 100644
--- a/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch
+++ b/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch
@@ -1,7 +1,7 @@
 From 4b1faf6ba142953ce2730766db44f8d98d161ef0 Mon Sep 17 00:00:00 2001
 From: Haifeng Jin <haifeng-jin@users.noreply.github.com>
 Date: Tue, 1 Oct 2024 07:53:24 +0000
-Subject: [PATCH 09/19] Fix test/test_linalg.py for NumPy 2 (#136800)
+Subject: [PATCH 09/20] Fix test/test_linalg.py for NumPy 2 (#136800)
 
 Related to  #107302.
 
diff --git a/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch b/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch
index 9a7572a3..6495b150 100644
--- a/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch
+++ b/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch
@@ -1,7 +1,7 @@
 From 032b9be9ca7f9ae174e75554cecc82600ea3ef54 Mon Sep 17 00:00:00 2001
 From: Haifeng Jin <haifeng-jin@users.noreply.github.com>
 Date: Sat, 12 Oct 2024 02:40:17 +0000
-Subject: [PATCH 10/19] Fixes NumPy 2 test failures in test_torch.py (#137740)
+Subject: [PATCH 10/20] Fixes NumPy 2 test failures in test_torch.py (#137740)
 
 Related to #107302
 
diff --git a/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch b/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
index 7202c4af..193ce159 100644
--- a/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
+++ b/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
@@ -1,7 +1,7 @@
 From 56f1528fa072023fb2724d5abf8790f2f6cc3aaa Mon Sep 17 00:00:00 2001
 From: Isuru Fernando <ifernando@quansight.com>
 Date: Wed, 18 Dec 2024 03:59:00 +0000
-Subject: [PATCH 11/19] Use BLAS_USE_CBLAS_DOT for OpenBLAS builds
+Subject: [PATCH 11/20] Use BLAS_USE_CBLAS_DOT for OpenBLAS builds
 
 There are two calling conventions for *dotu functions
 
diff --git a/recipe/patches/0012-fix-issue-142484.patch b/recipe/patches/0012-fix-issue-142484.patch
index 675b7545..00f1e3d2 100644
--- a/recipe/patches/0012-fix-issue-142484.patch
+++ b/recipe/patches/0012-fix-issue-142484.patch
@@ -1,7 +1,7 @@
 From beba58d724cc1bd7ca73660b0a5ad9e61ae0c562 Mon Sep 17 00:00:00 2001
 From: "Zheng, Zhaoqiong" <zhaoqiong.zheng@intel.com>
 Date: Fri, 27 Dec 2024 13:49:36 +0800
-Subject: [PATCH 12/19] fix issue 142484
+Subject: [PATCH 12/20] fix issue 142484
 
 From https://github.com/pytorch/pytorch/pull/143894
 ---
diff --git a/recipe/patches/0013-Fix-FindOpenBLAS.patch b/recipe/patches/0013-Fix-FindOpenBLAS.patch
index ef18af04..f539d0a6 100644
--- a/recipe/patches/0013-Fix-FindOpenBLAS.patch
+++ b/recipe/patches/0013-Fix-FindOpenBLAS.patch
@@ -1,7 +1,7 @@
 From 816a248a4425a97350959e412666e6db9012a52e Mon Sep 17 00:00:00 2001
 From: Bas Zalmstra <bas@prefix.dev>
 Date: Thu, 16 May 2024 10:46:49 +0200
-Subject: [PATCH 13/19] Fix FindOpenBLAS
+Subject: [PATCH 13/20] Fix FindOpenBLAS
 
 ---
  cmake/Modules/FindOpenBLAS.cmake | 15 +++++++++------
diff --git a/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch b/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch
index 3a258d8f..7a2df88f 100644
--- a/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch
+++ b/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch
@@ -1,7 +1,7 @@
 From db896f927403f55a18f931b18a6469cb4e37d322 Mon Sep 17 00:00:00 2001
 From: atalman <atalman@fb.com>
 Date: Tue, 12 Nov 2024 12:28:10 +0000
-Subject: [PATCH 14/19] CD Enable Python 3.13 on windows (#138095)
+Subject: [PATCH 14/20] CD Enable Python 3.13 on windows (#138095)
 
 Adding CD windows. Part of: https://github.com/pytorch/pytorch/issues/130249
 Builder PR landed with smoke test: https://github.com/pytorch/builder/pull/2035
diff --git a/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch b/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch
index 95f92692..3736ca78 100644
--- a/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch
+++ b/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch
@@ -1,7 +1,7 @@
 From 33790dfbf966e7d8ea4ff6798d2ff92474d84079 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Thu, 23 Jan 2025 22:46:58 +1100
-Subject: [PATCH 15/19] simplify torch.utils.cpp_extension.include_paths; use
+Subject: [PATCH 15/20] simplify torch.utils.cpp_extension.include_paths; use
  it in cpp_builder
 
 The /TH headers have not existed since pytorch 1.11
diff --git a/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch b/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch
index 84b211d1..764e24af 100644
--- a/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch
+++ b/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch
@@ -1,7 +1,7 @@
 From 799f6fa59dac93dabbbcf72d46f4e1334e3d65d9 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Thu, 23 Jan 2025 22:58:14 +1100
-Subject: [PATCH 16/19] point include paths to $PREFIX/include
+Subject: [PATCH 16/20] point include paths to $PREFIX/include
 
 ---
  torch/utils/cpp_extension.py | 9 +++++++++
diff --git a/recipe/patches/0017-Add-conda-prefix-to-inductor-include-paths.patch b/recipe/patches/0017-Add-conda-prefix-to-inductor-include-paths.patch
index fc36f27f..e2111c54 100644
--- a/recipe/patches/0017-Add-conda-prefix-to-inductor-include-paths.patch
+++ b/recipe/patches/0017-Add-conda-prefix-to-inductor-include-paths.patch
@@ -1,7 +1,7 @@
 From 9f73a02bacf9680833ac64657fde6762d33ab200 Mon Sep 17 00:00:00 2001
 From: Daniel Petry <dpetry@anaconda.com>
 Date: Tue, 21 Jan 2025 17:45:23 -0600
-Subject: [PATCH 17/19] Add conda prefix to inductor include paths
+Subject: [PATCH 17/20] Add conda prefix to inductor include paths
 
 Currently inductor doesn't look in conda's includes and libs. This results in
 errors when it tries to compile, if system versions are being used of
diff --git a/recipe/patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch b/recipe/patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
index cc7e33dd..028d79be 100644
--- a/recipe/patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
+++ b/recipe/patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
@@ -1,7 +1,7 @@
 From b0cfa0f728e96a3a9d6f7434e2c02d74d6daa9a9 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Tue, 28 Jan 2025 14:15:34 +1100
-Subject: [PATCH 18/19] make ATEN_INCLUDE_DIR relative to TORCH_INSTALL_PREFIX
+Subject: [PATCH 18/20] make ATEN_INCLUDE_DIR relative to TORCH_INSTALL_PREFIX
 
 we cannot set CMAKE_INSTALL_PREFIX without the pytorch build complaining, but we can
 use TORCH_INSTALL_PREFIX, which is set correctly relative to our CMake files already:
diff --git a/recipe/patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch b/recipe/patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch
index 4357adb4..7aa41192 100644
--- a/recipe/patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch
+++ b/recipe/patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch
@@ -1,7 +1,7 @@
 From f7db4cbfb0af59027ed8bdcd0387dba6fbcb1192 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Tue, 28 Jan 2025 10:58:29 +1100
-Subject: [PATCH 19/19] remove `DESTINATION lib` from CMake `install(TARGETS`
+Subject: [PATCH 19/20] remove `DESTINATION lib` from CMake `install(TARGETS`
  directives
 
 Suggested-By: Silvio Traversaro <silvio@traversaro.it>
diff --git a/recipe/patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch b/recipe/patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch
new file mode 100644
index 00000000..523d92cc
--- /dev/null
+++ b/recipe/patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch
@@ -0,0 +1,35 @@
+From 79ee5dcac30f2eba891af961f7649d15dfc6ce63 Mon Sep 17 00:00:00 2001
+From: "H. Vetinari" <h.vetinari@gmx.com>
+Date: Thu, 30 Jan 2025 13:23:14 +1100
+Subject: [PATCH 20/20] make library name in
+ `test_mutable_custom_op_fixed_layout{,2}` unique
+
+Suggested-By: Daniel Petry <dpetry@anaconda.com>
+---
+ test/inductor/test_torchinductor.py | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py
+index 610f5d27332..28f85e228d2 100644
+--- a/test/inductor/test_torchinductor.py
++++ b/test/inductor/test_torchinductor.py
+@@ -10628,7 +10628,8 @@ class CommonTemplate:
+     @requires_gpu()
+     @config.patch(implicit_fallbacks=True)
+     def test_mutable_custom_op_fixed_layout2(self):
+-        with torch.library._scoped_library("mylib", "DEF") as lib:
++        unique_lib_name = f"mylib_{id(self)}"  # Make unique name using test instance id
++        with torch.library._scoped_library(unique_lib_name, "DEF") as lib:
+             mod = nn.Conv2d(3, 128, 1, stride=1, bias=False).to(device=GPU_TYPE)
+             inp = torch.rand(2, 3, 128, 128, device=GPU_TYPE)
+             expected_stride = mod(inp).clone().stride()
+@@ -10681,7 +10682,8 @@ class CommonTemplate:
+ 
+     @config.patch(implicit_fallbacks=True)
+     def test_mutable_custom_op_fixed_layout(self):
+-        with torch.library._scoped_library("mylib", "DEF") as lib:
++        unique_lib_name = f"mylib_{id(self)}"  # Make unique name using test instance id
++        with torch.library._scoped_library(unique_lib_name, "DEF") as lib:
+             lib.define(
+                 "copy_(Tensor(a!) dst, Tensor src) -> ()",
+                 tags=torch.Tag.needs_fixed_stride_order,

From c2b6ce4513d6e74d352a0aa2cfe7840ed50f2e82 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 30 Jan 2025 13:32:50 +1100
Subject: [PATCH 03/25] collect USE_* variables in `bld.bat`

---
 recipe/bld.bat | 48 +++++++++++++++++++++++-------------------------
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/recipe/bld.bat b/recipe/bld.bat
index 5f6f57c6..c9e38bd8 100644
--- a/recipe/bld.bat
+++ b/recipe/bld.bat
@@ -27,10 +27,6 @@ if "%blas_impl%" == "generic" (
     SET BLAS=MKL
 )
 
-@REM TODO(baszalmstra): Figure out if we need these flags
-SET "USE_NUMA=0"
-SET "USE_ITT=0"
-
 if "%PKG_NAME%" == "pytorch" (
   set "PIP_ACTION=install"
   @REM We build libtorch for a specific python version.
@@ -64,8 +60,27 @@ if "%PKG_NAME%" == "pytorch" (
   set "PIP_ACTION=wheel"
 )
 
+set "BUILD_CUSTOM_PROTOBUF=OFF"
+set "USE_LITE_PROTO=ON"
+
+@REM TODO(baszalmstra): Figure out if we need these flags
+SET "USE_ITT=0"
+SET "USE_NUMA=0"
+
+@REM TODO(baszalmstra): There are linker errors because of mixing Intel OpenMP (iomp) and Microsoft OpenMP (vcomp)
+set "USE_OPENMP=OFF"
+
+@REM Use our Pybind11, Eigen, sleef
+set USE_SYSTEM_EIGEN_INSTALL=1
+set USE_SYSTEM_PYBIND11=1
+set USE_SYSTEM_SLEEF=1
+
 if not "%cuda_compiler_version%" == "None" (
     set USE_CUDA=1
+    set USE_STATIC_CUDNN=0
+    @REM NCCL is not available on windows
+    set USE_NCCL=0
+    set USE_STATIC_NCCL=0
 
     @REM set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%desired_cuda%
     @REM set CUDA_BIN_PATH=%CUDA_PATH%\bin
@@ -74,39 +89,29 @@ if not "%cuda_compiler_version%" == "None" (
 
     set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
 
-    set USE_STATIC_CUDNN=0
     set MAGMA_HOME=%PREFIX%
 
-    @REM NCCL is not available on windows
-    set USE_NCCL=0
-    set USE_STATIC_NCCL=0
-
     set MAGMA_HOME=%LIBRARY_PREFIX%
 
     set "PATH=%CUDA_BIN_PATH%;%PATH%"
 
     set CUDNN_INCLUDE_DIR=%LIBRARY_PREFIX%\include
-
 ) else (
     set USE_CUDA=0
+    @REM MKLDNN is an Apache-2.0 licensed library for DNNs and is used
+    @REM for CPU builds. Not to be confused with MKL.
+    set "USE_MKLDNN=1"
+
     @REM On windows, env vars are case-insensitive and setup.py
     @REM passes all env vars starting with CUDA_*, CMAKE_* to
     @REM to cmake
     set "cuda_compiler_version="
     set "cuda_compiler="
     set "CUDA_VERSION="
-
-    @REM MKLDNN is an Apache-2.0 licensed library for DNNs and is used
-    @REM for CPU builds. Not to be confused with MKL.
-    set "USE_MKLDNN=1"
 )
 
 set DISTUTILS_USE_SDK=1
 
-@REM Use our Pybind11, Eigen
-set USE_SYSTEM_PYBIND11=1
-set USE_SYSTEM_EIGEN_INSTALL=1
-
 set CMAKE_INCLUDE_PATH=%LIBRARY_PREFIX%\include
 set LIB=%LIBRARY_PREFIX%\lib;%LIB%
 
@@ -126,17 +131,10 @@ set "INSTALL_TEST=0"
 set "BUILD_TEST=0"
 
 set "libuv_ROOT=%LIBRARY_PREFIX%"
-set "USE_SYSTEM_SLEEF=ON"
 
 @REM uncomment to debug cmake build
 @REM set "CMAKE_VERBOSE_MAKEFILE=1"
 
-set "BUILD_CUSTOM_PROTOBUF=OFF"
-set "USE_LITE_PROTO=ON"
-
-@REM TODO(baszalmstra): There are linker errors because of mixing Intel OpenMP (iomp) and Microsoft OpenMP (vcomp)
-set "USE_OPENMP=OFF"
-
 @REM The activation script for cuda-nvcc doesnt add the CUDA_CFLAGS on windows.
 @REM Therefore we do this manually here. See:
 @REM https://github.com/conda-forge/cuda-nvcc-feedstock/issues/47

From 8622491d364cfa4be8567a8d7be301dccbfd0ade Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 30 Jan 2025 13:33:48 +1100
Subject: [PATCH 04/25] clean up CUDA option handling in `bld.bat`

---
 recipe/bld.bat | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/recipe/bld.bat b/recipe/bld.bat
index c9e38bd8..87fe94e3 100644
--- a/recipe/bld.bat
+++ b/recipe/bld.bat
@@ -85,16 +85,11 @@ if not "%cuda_compiler_version%" == "None" (
     @REM set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%desired_cuda%
     @REM set CUDA_BIN_PATH=%CUDA_PATH%\bin
 
-    set TORCH_CUDA_ARCH_LIST=5.0;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0+PTX
-
-    set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
-
-    set MAGMA_HOME=%PREFIX%
+    set "TORCH_CUDA_ARCH_LIST=5.0;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0+PTX"
+    set "TORCH_NVCC_FLAGS=-Xfatbin -compress-all"
 
     set MAGMA_HOME=%LIBRARY_PREFIX%
-
     set "PATH=%CUDA_BIN_PATH%;%PATH%"
-
     set CUDNN_INCLUDE_DIR=%LIBRARY_PREFIX%\include
 ) else (
     set USE_CUDA=0

From 093816a0c7319268b8932389d78416450b9ff305 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 30 Jan 2025 13:39:53 +1100
Subject: [PATCH 05/25] first attempt at patching `find_package(CUDA)`

---
 recipe/meta.yaml                              |   1 +
 ...-of-python-3-and-error-without-numpy.patch |   2 +-
 recipe/patches/0002-Help-find-numpy.patch     |   2 +-
 ...03-Add-USE_SYSTEM_NVTX-option-138287.patch |   2 +-
 .../patches/0004-Update-sympy-version.patch   |   2 +-
 .../0005-Fix-duplicate-linker-script.patch    |   2 +-
 ...kle-error-in-serialization.py-136034.patch |   2 +-
 ...verwrite-ld-with-environment-variabl.patch |   2 +-
 ...-Allow-overriding-CUDA-related-paths.patch |   2 +-
 ...st-test_linalg.py-for-NumPy-2-136800.patch |   2 +-
 ...est-failures-in-test_torch.py-137740.patch |   2 +-
 ...AS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch |   2 +-
 recipe/patches/0012-fix-issue-142484.patch    |   2 +-
 recipe/patches/0013-Fix-FindOpenBLAS.patch    |   2 +-
 ...Enable-Python-3.13-on-windows-138095.patch |   2 +-
 ...tils.cpp_extension.include_paths-use.patch |   2 +-
 ...oint-include-paths-to-PREFIX-include.patch |   2 +-
 ...nda-prefix-to-inductor-include-paths.patch |   2 +-
 ...E_DIR-relative-to-TORCH_INSTALL_PREF.patch |   2 +-
 ...ON-lib-from-CMake-install-TARGETS-di.patch |   2 +-
 ...e-in-test_mutable_custom_op_fixed_la.patch |   2 +-
 ...-find_package-CUDA-in-caffe2-CMake-m.patch | 216 ++++++++++++++++++
 22 files changed, 237 insertions(+), 20 deletions(-)
 create mode 100644 recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 4c8c2551..b32271a6 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -71,6 +71,7 @@ source:
     - patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
     - patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch               # [win]
     - patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch
+    - patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
     - patches_submodules/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch    # [win]
 
 build:
diff --git a/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch b/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch
index b5519b81..fda50bcc 100644
--- a/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch
+++ b/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch
@@ -1,7 +1,7 @@
 From f3a0f9aab6dce56eea590b946f60256014b61bf7 Mon Sep 17 00:00:00 2001
 From: Mark Harfouche <mark.harfouche@gmail.com>
 Date: Sun, 1 Sep 2024 17:35:40 -0400
-Subject: [PATCH 01/20] Force usage of python 3 and error without numpy
+Subject: [PATCH 01/21] Force usage of python 3 and error without numpy
 
 ---
  cmake/Dependencies.cmake | 6 +++---
diff --git a/recipe/patches/0002-Help-find-numpy.patch b/recipe/patches/0002-Help-find-numpy.patch
index 833af9f1..d660deda 100644
--- a/recipe/patches/0002-Help-find-numpy.patch
+++ b/recipe/patches/0002-Help-find-numpy.patch
@@ -1,7 +1,7 @@
 From 21c30036b5b86f403c0cf4426165d9a6a50edb1a Mon Sep 17 00:00:00 2001
 From: Mark Harfouche <mark.harfouche@gmail.com>
 Date: Tue, 1 Oct 2024 00:28:40 -0400
-Subject: [PATCH 02/20] Help find numpy
+Subject: [PATCH 02/21] Help find numpy
 
 ---
  tools/setup_helpers/cmake.py | 6 ++++++
diff --git a/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch b/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch
index a4c44e01..d44513d4 100644
--- a/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch
+++ b/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch
@@ -1,7 +1,7 @@
 From d1826af525db41eda5020a1404f5d5521d67a5dc Mon Sep 17 00:00:00 2001
 From: Jeongseok Lee <jeongseok@meta.com>
 Date: Sat, 19 Oct 2024 04:26:01 +0000
-Subject: [PATCH 03/20] Add USE_SYSTEM_NVTX option (#138287)
+Subject: [PATCH 03/21] Add USE_SYSTEM_NVTX option (#138287)
 
 ## Summary
 
diff --git a/recipe/patches/0004-Update-sympy-version.patch b/recipe/patches/0004-Update-sympy-version.patch
index 81a66b3f..a73a7399 100644
--- a/recipe/patches/0004-Update-sympy-version.patch
+++ b/recipe/patches/0004-Update-sympy-version.patch
@@ -1,7 +1,7 @@
 From e3219c5fe8834753b0cf9e92be4d1ef1e874f370 Mon Sep 17 00:00:00 2001
 From: Jeongseok Lee <jeongseok@meta.com>
 Date: Thu, 17 Oct 2024 15:04:05 -0700
-Subject: [PATCH 04/20] Update sympy version
+Subject: [PATCH 04/21] Update sympy version
 
 ---
  setup.py | 2 +-
diff --git a/recipe/patches/0005-Fix-duplicate-linker-script.patch b/recipe/patches/0005-Fix-duplicate-linker-script.patch
index cb09dcdf..49e6d72b 100644
--- a/recipe/patches/0005-Fix-duplicate-linker-script.patch
+++ b/recipe/patches/0005-Fix-duplicate-linker-script.patch
@@ -1,7 +1,7 @@
 From 08a1f44fbc81324aa98d720dfb7b87a261923ac2 Mon Sep 17 00:00:00 2001
 From: Jeongseok Lee <jeongseok@meta.com>
 Date: Sun, 3 Nov 2024 01:12:36 -0700
-Subject: [PATCH 05/20] Fix duplicate linker script
+Subject: [PATCH 05/21] Fix duplicate linker script
 
 ---
  setup.py | 4 +++-
diff --git a/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch b/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch
index 326e6285..99baed0a 100644
--- a/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch
+++ b/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch
@@ -1,7 +1,7 @@
 From 15df314a41c69a31c0443254d5552aa1b39d708d Mon Sep 17 00:00:00 2001
 From: William Wen <williamwen@meta.com>
 Date: Fri, 13 Sep 2024 13:02:33 -0700
-Subject: [PATCH 06/20] fix 3.13 pickle error in serialization.py (#136034)
+Subject: [PATCH 06/21] fix 3.13 pickle error in serialization.py (#136034)
 
 Error encountered when adding dynamo 3.13 support.
 Pull Request resolved: https://github.com/pytorch/pytorch/pull/136034
diff --git a/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch b/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch
index ad215aa9..ae6a94cd 100644
--- a/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch
+++ b/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch
@@ -1,7 +1,7 @@
 From 655f694854c3eafdd631235b60bc6c1b279218ed Mon Sep 17 00:00:00 2001
 From: Mark Harfouche <mark.harfouche@gmail.com>
 Date: Thu, 3 Oct 2024 22:49:56 -0400
-Subject: [PATCH 07/20] Allow users to overwrite ld with environment variables
+Subject: [PATCH 07/21] Allow users to overwrite ld with environment variables
 
 This should help in the case of cross compilation.
 
diff --git a/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch b/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch
index fbfe0560..b52d1588 100644
--- a/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch
+++ b/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch
@@ -1,7 +1,7 @@
 From f03bf82d9da9cccb2cf4d4833c1a6349622dc37d Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
 Date: Wed, 27 Nov 2024 13:47:23 +0100
-Subject: [PATCH 08/20] Allow overriding CUDA-related paths
+Subject: [PATCH 08/21] Allow overriding CUDA-related paths
 
 ---
  cmake/Modules/FindCUDAToolkit.cmake | 2 +-
diff --git a/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch b/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch
index 580fe42a..7d9d1ab5 100644
--- a/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch
+++ b/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch
@@ -1,7 +1,7 @@
 From 4b1faf6ba142953ce2730766db44f8d98d161ef0 Mon Sep 17 00:00:00 2001
 From: Haifeng Jin <haifeng-jin@users.noreply.github.com>
 Date: Tue, 1 Oct 2024 07:53:24 +0000
-Subject: [PATCH 09/20] Fix test/test_linalg.py for NumPy 2 (#136800)
+Subject: [PATCH 09/21] Fix test/test_linalg.py for NumPy 2 (#136800)
 
 Related to  #107302.
 
diff --git a/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch b/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch
index 6495b150..c28fe93a 100644
--- a/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch
+++ b/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch
@@ -1,7 +1,7 @@
 From 032b9be9ca7f9ae174e75554cecc82600ea3ef54 Mon Sep 17 00:00:00 2001
 From: Haifeng Jin <haifeng-jin@users.noreply.github.com>
 Date: Sat, 12 Oct 2024 02:40:17 +0000
-Subject: [PATCH 10/20] Fixes NumPy 2 test failures in test_torch.py (#137740)
+Subject: [PATCH 10/21] Fixes NumPy 2 test failures in test_torch.py (#137740)
 
 Related to #107302
 
diff --git a/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch b/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
index 193ce159..c7b201b6 100644
--- a/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
+++ b/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
@@ -1,7 +1,7 @@
 From 56f1528fa072023fb2724d5abf8790f2f6cc3aaa Mon Sep 17 00:00:00 2001
 From: Isuru Fernando <ifernando@quansight.com>
 Date: Wed, 18 Dec 2024 03:59:00 +0000
-Subject: [PATCH 11/20] Use BLAS_USE_CBLAS_DOT for OpenBLAS builds
+Subject: [PATCH 11/21] Use BLAS_USE_CBLAS_DOT for OpenBLAS builds
 
 There are two calling conventions for *dotu functions
 
diff --git a/recipe/patches/0012-fix-issue-142484.patch b/recipe/patches/0012-fix-issue-142484.patch
index 00f1e3d2..db13f7ac 100644
--- a/recipe/patches/0012-fix-issue-142484.patch
+++ b/recipe/patches/0012-fix-issue-142484.patch
@@ -1,7 +1,7 @@
 From beba58d724cc1bd7ca73660b0a5ad9e61ae0c562 Mon Sep 17 00:00:00 2001
 From: "Zheng, Zhaoqiong" <zhaoqiong.zheng@intel.com>
 Date: Fri, 27 Dec 2024 13:49:36 +0800
-Subject: [PATCH 12/20] fix issue 142484
+Subject: [PATCH 12/21] fix issue 142484
 
 From https://github.com/pytorch/pytorch/pull/143894
 ---
diff --git a/recipe/patches/0013-Fix-FindOpenBLAS.patch b/recipe/patches/0013-Fix-FindOpenBLAS.patch
index f539d0a6..49d43f90 100644
--- a/recipe/patches/0013-Fix-FindOpenBLAS.patch
+++ b/recipe/patches/0013-Fix-FindOpenBLAS.patch
@@ -1,7 +1,7 @@
 From 816a248a4425a97350959e412666e6db9012a52e Mon Sep 17 00:00:00 2001
 From: Bas Zalmstra <bas@prefix.dev>
 Date: Thu, 16 May 2024 10:46:49 +0200
-Subject: [PATCH 13/20] Fix FindOpenBLAS
+Subject: [PATCH 13/21] Fix FindOpenBLAS
 
 ---
  cmake/Modules/FindOpenBLAS.cmake | 15 +++++++++------
diff --git a/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch b/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch
index 7a2df88f..af808376 100644
--- a/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch
+++ b/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch
@@ -1,7 +1,7 @@
 From db896f927403f55a18f931b18a6469cb4e37d322 Mon Sep 17 00:00:00 2001
 From: atalman <atalman@fb.com>
 Date: Tue, 12 Nov 2024 12:28:10 +0000
-Subject: [PATCH 14/20] CD Enable Python 3.13 on windows (#138095)
+Subject: [PATCH 14/21] CD Enable Python 3.13 on windows (#138095)
 
 Adding CD windows. Part of: https://github.com/pytorch/pytorch/issues/130249
 Builder PR landed with smoke test: https://github.com/pytorch/builder/pull/2035
diff --git a/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch b/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch
index 3736ca78..6cf5ea9c 100644
--- a/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch
+++ b/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch
@@ -1,7 +1,7 @@
 From 33790dfbf966e7d8ea4ff6798d2ff92474d84079 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Thu, 23 Jan 2025 22:46:58 +1100
-Subject: [PATCH 15/20] simplify torch.utils.cpp_extension.include_paths; use
+Subject: [PATCH 15/21] simplify torch.utils.cpp_extension.include_paths; use
  it in cpp_builder
 
 The /TH headers have not existed since pytorch 1.11
diff --git a/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch b/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch
index 764e24af..ed6b74f6 100644
--- a/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch
+++ b/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch
@@ -1,7 +1,7 @@
 From 799f6fa59dac93dabbbcf72d46f4e1334e3d65d9 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Thu, 23 Jan 2025 22:58:14 +1100
-Subject: [PATCH 16/20] point include paths to $PREFIX/include
+Subject: [PATCH 16/21] point include paths to $PREFIX/include
 
 ---
  torch/utils/cpp_extension.py | 9 +++++++++
diff --git a/recipe/patches/0017-Add-conda-prefix-to-inductor-include-paths.patch b/recipe/patches/0017-Add-conda-prefix-to-inductor-include-paths.patch
index e2111c54..aff55f95 100644
--- a/recipe/patches/0017-Add-conda-prefix-to-inductor-include-paths.patch
+++ b/recipe/patches/0017-Add-conda-prefix-to-inductor-include-paths.patch
@@ -1,7 +1,7 @@
 From 9f73a02bacf9680833ac64657fde6762d33ab200 Mon Sep 17 00:00:00 2001
 From: Daniel Petry <dpetry@anaconda.com>
 Date: Tue, 21 Jan 2025 17:45:23 -0600
-Subject: [PATCH 17/20] Add conda prefix to inductor include paths
+Subject: [PATCH 17/21] Add conda prefix to inductor include paths
 
 Currently inductor doesn't look in conda's includes and libs. This results in
 errors when it tries to compile, if system versions are being used of
diff --git a/recipe/patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch b/recipe/patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
index 028d79be..426e6015 100644
--- a/recipe/patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
+++ b/recipe/patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
@@ -1,7 +1,7 @@
 From b0cfa0f728e96a3a9d6f7434e2c02d74d6daa9a9 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Tue, 28 Jan 2025 14:15:34 +1100
-Subject: [PATCH 18/20] make ATEN_INCLUDE_DIR relative to TORCH_INSTALL_PREFIX
+Subject: [PATCH 18/21] make ATEN_INCLUDE_DIR relative to TORCH_INSTALL_PREFIX
 
 we cannot set CMAKE_INSTALL_PREFIX without the pytorch build complaining, but we can
 use TORCH_INSTALL_PREFIX, which is set correctly relative to our CMake files already:
diff --git a/recipe/patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch b/recipe/patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch
index 7aa41192..78950986 100644
--- a/recipe/patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch
+++ b/recipe/patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch
@@ -1,7 +1,7 @@
 From f7db4cbfb0af59027ed8bdcd0387dba6fbcb1192 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Tue, 28 Jan 2025 10:58:29 +1100
-Subject: [PATCH 19/20] remove `DESTINATION lib` from CMake `install(TARGETS`
+Subject: [PATCH 19/21] remove `DESTINATION lib` from CMake `install(TARGETS`
  directives
 
 Suggested-By: Silvio Traversaro <silvio@traversaro.it>
diff --git a/recipe/patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch b/recipe/patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch
index 523d92cc..392ce265 100644
--- a/recipe/patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch
+++ b/recipe/patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch
@@ -1,7 +1,7 @@
 From 79ee5dcac30f2eba891af961f7649d15dfc6ce63 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Thu, 30 Jan 2025 13:23:14 +1100
-Subject: [PATCH 20/20] make library name in
+Subject: [PATCH 20/21] make library name in
  `test_mutable_custom_op_fixed_layout{,2}` unique
 
 Suggested-By: Daniel Petry <dpetry@anaconda.com>
diff --git a/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch b/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
new file mode 100644
index 00000000..690dd90f
--- /dev/null
+++ b/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
@@ -0,0 +1,216 @@
+From befd57c110928ddaebec95cb54b6e6f2e3df2e22 Mon Sep 17 00:00:00 2001
+From: "H. Vetinari" <h.vetinari@gmx.com>
+Date: Thu, 30 Jan 2025 08:33:44 +1100
+Subject: [PATCH 21/21] avoid deprecated `find_package(CUDA)` in caffe2 CMake
+ metadata
+
+---
+ caffe2/CMakeLists.txt      | 14 ++++++-------
+ cmake/Summary.cmake        | 10 +++++-----
+ cmake/TorchConfig.cmake.in |  2 +-
+ cmake/public/cuda.cmake    | 40 +++++++++++++++-----------------------
+ 4 files changed, 29 insertions(+), 37 deletions(-)
+
+diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
+index b51c7cc637b..6e107b5b02a 100644
+--- a/caffe2/CMakeLists.txt
++++ b/caffe2/CMakeLists.txt
+@@ -906,25 +906,25 @@ if(USE_ROCM)
+         "$<$<COMPILE_LANGUAGE:CXX>:ATen/core/ATen_pch.h>")
+   endif()
+ elseif(USE_CUDA)
+-  set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
++  set(CUDAToolkit_LINK_LIBRARIES_KEYWORD PRIVATE)
+   list(APPEND Caffe2_GPU_SRCS ${GENERATED_CXX_TORCH_CUDA})
+-  if(CUDA_SEPARABLE_COMPILATION)
++  if(CUDAToolkit_SEPARABLE_COMPILATION)
+     # Separate compilation fails when kernels using `thrust::sort_by_key`
+     # are linked with the rest of CUDA code. Workaround by linking them separately.
+     add_library(torch_cuda ${Caffe2_GPU_SRCS} ${Caffe2_GPU_CU_SRCS})
+-    set_property(TARGET torch_cuda PROPERTY CUDA_SEPARABLE_COMPILATION ON)
++    set_property(TARGET torch_cuda PROPERTY CUDAToolkit_SEPARABLE_COMPILATION ON)
+ 
+     add_library(torch_cuda_w_sort_by_key OBJECT
+         ${Caffe2_GPU_SRCS_W_SORT_BY_KEY}
+         ${Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY})
+-    set_property(TARGET torch_cuda_w_sort_by_key PROPERTY CUDA_SEPARABLE_COMPILATION OFF)
++    set_property(TARGET torch_cuda_w_sort_by_key PROPERTY CUDAToolkit_SEPARABLE_COMPILATION OFF)
+     target_link_libraries(torch_cuda PRIVATE torch_cuda_w_sort_by_key)
+   else()
+     add_library(torch_cuda
+         ${Caffe2_GPU_SRCS} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY}
+         ${Caffe2_GPU_CU_SRCS} ${Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY})
+   endif()
+-  set(CUDA_LINK_LIBRARIES_KEYWORD)
++  set(CUDAToolkit_LINK_LIBRARIES_KEYWORD)
+   torch_compile_options(torch_cuda)  # see cmake/public/utils.cmake
+   target_compile_definitions(torch_cuda PRIVATE USE_CUDA)
+ 
+@@ -973,12 +973,12 @@ elseif(USE_CUDA)
+         torch_cuda
+     )
+     if($ENV{ATEN_STATIC_CUDA})
+-      if(CUDA_VERSION_MAJOR LESS_EQUAL 11)
++      if(CUDAToolkit_VERSION_MAJOR LESS_EQUAL 11)
+         target_link_libraries(torch_cuda_linalg PRIVATE
+             CUDA::cusolver_static
+             ${CUDAToolkit_LIBRARY_DIR}/liblapack_static.a     # needed for libcusolver_static
+         )
+-      elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 12)
++      elseif(CUDAToolkit_VERSION_MAJOR GREATER_EQUAL 12)
+         target_link_libraries(torch_cuda_linalg PRIVATE
+             CUDA::cusolver_static
+             ${CUDAToolkit_LIBRARY_DIR}/libcusolver_lapack_static.a     # needed for libcusolver_static
+diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake
+index d51c451589c..154f04a89dd 100644
+--- a/cmake/Summary.cmake
++++ b/cmake/Summary.cmake
+@@ -76,7 +76,7 @@ function(caffe2_print_configuration_summary)
+     message(STATUS "    USE_CUSPARSELT      : ${USE_CUSPARSELT}")
+     message(STATUS "    USE_CUDSS           : ${USE_CUDSS}")
+     message(STATUS "    USE_CUFILE          : ${USE_CUFILE}")
+-    message(STATUS "    CUDA version        : ${CUDA_VERSION}")
++    message(STATUS "    CUDA version        : ${CUDAToolkit_VERSION}")
+     message(STATUS "    USE_FLASH_ATTENTION : ${USE_FLASH_ATTENTION}")
+     message(STATUS "    USE_MEM_EFF_ATTENTION : ${USE_MEM_EFF_ATTENTION}")
+     if(${USE_CUDNN})
+@@ -88,7 +88,7 @@ function(caffe2_print_configuration_summary)
+     if(${USE_CUFILE})
+       message(STATUS "    cufile library    : ${CUDA_cuFile_LIBRARY}")
+     endif()
+-    message(STATUS "    CUDA root directory : ${CUDA_TOOLKIT_ROOT_DIR}")
++    message(STATUS "    CUDA root directory : ${CUDAToolkit_ROOT}")
+     message(STATUS "    CUDA library        : ${CUDA_cuda_driver_LIBRARY}")
+     message(STATUS "    cudart library      : ${CUDA_cudart_LIBRARY}")
+     message(STATUS "    cublas library      : ${CUDA_cublas_LIBRARY}")
+@@ -108,12 +108,12 @@ function(caffe2_print_configuration_summary)
+       message(STATUS "    cuDSS library       : ${__tmp}")
+     endif()
+     message(STATUS "    nvrtc               : ${CUDA_nvrtc_LIBRARY}")
+-    message(STATUS "    CUDA include path   : ${CUDA_INCLUDE_DIRS}")
+-    message(STATUS "    NVCC executable     : ${CUDA_NVCC_EXECUTABLE}")
++    message(STATUS "    CUDA include path   : ${CUDATookit_INCLUDE_DIRS}")
++    message(STATUS "    NVCC executable     : ${CUDATookit_NVCC_EXECUTABLE}")
+     message(STATUS "    CUDA compiler       : ${CMAKE_CUDA_COMPILER}")
+     message(STATUS "    CUDA flags          : ${CMAKE_CUDA_FLAGS}")
+     message(STATUS "    CUDA host compiler  : ${CMAKE_CUDA_HOST_COMPILER}")
+-    message(STATUS "    CUDA --device-c     : ${CUDA_SEPARABLE_COMPILATION}")
++    message(STATUS "    CUDA --device-c     : ${CUDATookit_SEPARABLE_COMPILATION}")
+     message(STATUS "    USE_TENSORRT        : ${USE_TENSORRT}")
+     if(${USE_TENSORRT})
+       message(STATUS "      TensorRT runtime library: ${TENSORRT_LIBRARY}")
+diff --git a/cmake/TorchConfig.cmake.in b/cmake/TorchConfig.cmake.in
+index cba4d929855..da904fc6a18 100644
+--- a/cmake/TorchConfig.cmake.in
++++ b/cmake/TorchConfig.cmake.in
+@@ -125,7 +125,7 @@ if(@USE_CUDA@)
+     find_library(CAFFE2_NVRTC_LIBRARY caffe2_nvrtc PATHS "${TORCH_INSTALL_PREFIX}/lib")
+     list(APPEND TORCH_CUDA_LIBRARIES ${CAFFE2_NVRTC_LIBRARY})
+   else()
+-    set(TORCH_CUDA_LIBRARIES ${CUDA_NVRTC_LIB})
++    set(TORCH_CUDA_LIBRARIES CUDA::nvrtc)
+   endif()
+   if(TARGET torch::nvtoolsext)
+     list(APPEND TORCH_CUDA_LIBRARIES torch::nvtoolsext)
+diff --git a/cmake/public/cuda.cmake b/cmake/public/cuda.cmake
+index 152fbdbe6dd..13bae9b6227 100644
+--- a/cmake/public/cuda.cmake
++++ b/cmake/public/cuda.cmake
+@@ -26,8 +26,8 @@ if(NOT MSVC)
+ endif()
+ 
+ # Find CUDA.
+-find_package(CUDA)
+-if(NOT CUDA_FOUND)
++find_package(CUDAToolkit)
++if(NOT CUDAToolkit_FOUND)
+   message(WARNING
+     "Caffe2: CUDA cannot be found. Depending on whether you are building "
+     "Caffe2 or a Caffe2 dependent library, the next warning / error will "
+@@ -36,8 +36,6 @@ if(NOT CUDA_FOUND)
+   return()
+ endif()
+ 
+-# Enable CUDA language support
+-set(CUDAToolkit_ROOT "${CUDA_TOOLKIT_ROOT_DIR}")
+ # Pass clang as host compiler, which according to the docs
+ # Must be done before CUDA language is enabled, see
+ # https://cmake.org/cmake/help/v3.15/variable/CMAKE_CUDA_HOST_COMPILER.html
+@@ -56,24 +54,18 @@ if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.12.0)
+   cmake_policy(SET CMP0074 NEW)
+ endif()
+ 
+-find_package(CUDAToolkit REQUIRED)
++find_package(CUDAToolkit REQUIRED COMPONENTS cudart nvrtc REQUIRED)
+ 
+ cmake_policy(POP)
+ 
+-if(NOT CMAKE_CUDA_COMPILER_VERSION VERSION_EQUAL CUDAToolkit_VERSION)
+-  message(FATAL_ERROR "Found two conflicting CUDA versions:\n"
+-                      "V${CMAKE_CUDA_COMPILER_VERSION} in '${CUDA_INCLUDE_DIRS}' and\n"
+-                      "V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIRS}'")
+-endif()
+-
+-message(STATUS "Caffe2: CUDA detected: " ${CUDA_VERSION})
+-message(STATUS "Caffe2: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE})
+-message(STATUS "Caffe2: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR})
+-if(CUDA_VERSION VERSION_LESS 11.0)
++message(STATUS "Caffe2: CUDA detected: " ${CUDAToolkit_VERSION})
++message(STATUS "Caffe2: CUDA nvcc is: " ${CUDAToolkit_NVCC_EXECUTABLE})
++message(STATUS "Caffe2: CUDA toolkit directory: " ${CUDAToolkit_ROOT})
++if(CUDAToolkit_VERSION VERSION_LESS 11.0)
+   message(FATAL_ERROR "PyTorch requires CUDA 11.0 or above.")
+ endif()
+ 
+-if(CUDA_FOUND)
++if(CUDAToolkit_FOUND)
+   # Sometimes, we may mismatch nvcc with the CUDA headers we are
+   # compiling with, e.g., if a ccache nvcc is fed to us by CUDA_NVCC_EXECUTABLE
+   # but the PATH is not consistent with CUDA_HOME.  It's better safe
+@@ -97,8 +89,8 @@ if(CUDA_FOUND)
+     )
+   if(NOT CMAKE_CROSSCOMPILING)
+     try_run(run_result compile_result ${PROJECT_RANDOM_BINARY_DIR} ${file}
+-      CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}"
+-      LINK_LIBRARIES ${CUDA_LIBRARIES}
++      CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDAToolkit_INCLUDE_DIRS}"
++      LINK_LIBRARIES ${CUDAToolkit_LIBRARIES}
+       RUN_OUTPUT_VARIABLE cuda_version_from_header
+       COMPILE_OUTPUT_VARIABLE output_var
+       )
+@@ -106,20 +98,20 @@ if(CUDA_FOUND)
+       message(FATAL_ERROR "Caffe2: Couldn't determine version from header: " ${output_var})
+     endif()
+     message(STATUS "Caffe2: Header version is: " ${cuda_version_from_header})
+-    if(NOT cuda_version_from_header STREQUAL ${CUDA_VERSION_STRING})
++    if(NOT cuda_version_from_header STREQUAL "${CUDAToolkit_VERSION}")
+       # Force CUDA to be processed for again next time
+       # TODO: I'm not sure if this counts as an implementation detail of
+       # FindCUDA
+-      set(${cuda_version_from_findcuda} ${CUDA_VERSION_STRING})
++      set(${cuda_version_from_findcuda} "${CUDAToolkit_VERSION}")
+       unset(CUDA_TOOLKIT_ROOT_DIR_INTERNAL CACHE)
+       # Not strictly necessary, but for good luck.
+-      unset(CUDA_VERSION CACHE)
++      unset(CUDAToolkit_VERSION CACHE)
+       # Error out
+       message(FATAL_ERROR "FindCUDA says CUDA version is ${cuda_version_from_findcuda} (usually determined by nvcc), "
+         "but the CUDA headers say the version is ${cuda_version_from_header}.  This often occurs "
+         "when you set both CUDA_HOME and CUDA_NVCC_EXECUTABLE to "
+         "non-standard locations, without also setting PATH to point to the correct nvcc.  "
+-        "Perhaps, try re-running this command again with PATH=${CUDA_TOOLKIT_ROOT_DIR}/bin:$PATH.  "
++        "Perhaps, try re-running this command again with PATH=${CUDAToolkit_ROOT}/bin:$PATH.  "
+         "See above log messages for more diagnostics, and see https://github.com/pytorch/pytorch/issues/8092 for more details.")
+     endif()
+   endif()
+@@ -128,8 +120,8 @@ endif()
+ # ---[ CUDA libraries wrapper
+ 
+ # find lbnvrtc.so
+-set(CUDA_NVRTC_LIB "${CUDA_nvrtc_LIBRARY}" CACHE FILEPATH "")
+-if(CUDA_NVRTC_LIB AND NOT CUDA_NVRTC_SHORTHASH)
++get_target_property(CUDA_NVRTC_LIB cuda::nvrtc INTERFACE_LINK_LIBRARIES)
++if(NOT CUDA_NVRTC_SHORTHASH)
+   find_package(Python COMPONENTS Interpreter)
+   execute_process(
+     COMMAND Python::Interpreter -c

From 847f7b10e080eb93b84b47c33f24bc61030c984e Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 30 Jan 2025 23:33:17 +1100
Subject: [PATCH 06/25] delete an unnecessary check

---
 ...-find_package-CUDA-in-caffe2-CMake-m.patch | 45 +++++++++----------
 1 file changed, 21 insertions(+), 24 deletions(-)

diff --git a/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch b/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
index 690dd90f..2f066a39 100644
--- a/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
+++ b/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
@@ -1,15 +1,15 @@
-From befd57c110928ddaebec95cb54b6e6f2e3df2e22 Mon Sep 17 00:00:00 2001
+From 012e72268018aeb5d728898222e19634f131762a Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Thu, 30 Jan 2025 08:33:44 +1100
 Subject: [PATCH 21/21] avoid deprecated `find_package(CUDA)` in caffe2 CMake
  metadata
 
 ---
- caffe2/CMakeLists.txt      | 14 ++++++-------
- cmake/Summary.cmake        | 10 +++++-----
+ caffe2/CMakeLists.txt      | 14 +++++------
+ cmake/Summary.cmake        | 10 ++++----
  cmake/TorchConfig.cmake.in |  2 +-
- cmake/public/cuda.cmake    | 40 +++++++++++++++-----------------------
- 4 files changed, 29 insertions(+), 37 deletions(-)
+ cmake/public/cuda.cmake    | 48 ++++++++++----------------------------
+ 4 files changed, 25 insertions(+), 49 deletions(-)
 
 diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
 index b51c7cc637b..6e107b5b02a 100644
@@ -113,7 +113,7 @@ index cba4d929855..da904fc6a18 100644
    if(TARGET torch::nvtoolsext)
      list(APPEND TORCH_CUDA_LIBRARIES torch::nvtoolsext)
 diff --git a/cmake/public/cuda.cmake b/cmake/public/cuda.cmake
-index 152fbdbe6dd..13bae9b6227 100644
+index 152fbdbe6dd..50d37f4d6d0 100644
 --- a/cmake/public/cuda.cmake
 +++ b/cmake/public/cuda.cmake
 @@ -26,8 +26,8 @@ if(NOT MSVC)
@@ -178,32 +178,29 @@ index 152fbdbe6dd..13bae9b6227 100644
        RUN_OUTPUT_VARIABLE cuda_version_from_header
        COMPILE_OUTPUT_VARIABLE output_var
        )
-@@ -106,20 +98,20 @@ if(CUDA_FOUND)
+@@ -106,30 +98,14 @@ if(CUDA_FOUND)
        message(FATAL_ERROR "Caffe2: Couldn't determine version from header: " ${output_var})
      endif()
      message(STATUS "Caffe2: Header version is: " ${cuda_version_from_header})
 -    if(NOT cuda_version_from_header STREQUAL ${CUDA_VERSION_STRING})
-+    if(NOT cuda_version_from_header STREQUAL "${CUDAToolkit_VERSION}")
-       # Force CUDA to be processed for again next time
-       # TODO: I'm not sure if this counts as an implementation detail of
-       # FindCUDA
+-      # Force CUDA to be processed for again next time
+-      # TODO: I'm not sure if this counts as an implementation detail of
+-      # FindCUDA
 -      set(${cuda_version_from_findcuda} ${CUDA_VERSION_STRING})
-+      set(${cuda_version_from_findcuda} "${CUDAToolkit_VERSION}")
-       unset(CUDA_TOOLKIT_ROOT_DIR_INTERNAL CACHE)
-       # Not strictly necessary, but for good luck.
+-      unset(CUDA_TOOLKIT_ROOT_DIR_INTERNAL CACHE)
+-      # Not strictly necessary, but for good luck.
 -      unset(CUDA_VERSION CACHE)
-+      unset(CUDAToolkit_VERSION CACHE)
-       # Error out
-       message(FATAL_ERROR "FindCUDA says CUDA version is ${cuda_version_from_findcuda} (usually determined by nvcc), "
-         "but the CUDA headers say the version is ${cuda_version_from_header}.  This often occurs "
-         "when you set both CUDA_HOME and CUDA_NVCC_EXECUTABLE to "
-         "non-standard locations, without also setting PATH to point to the correct nvcc.  "
+-      # Error out
+-      message(FATAL_ERROR "FindCUDA says CUDA version is ${cuda_version_from_findcuda} (usually determined by nvcc), "
+-        "but the CUDA headers say the version is ${cuda_version_from_header}.  This often occurs "
+-        "when you set both CUDA_HOME and CUDA_NVCC_EXECUTABLE to "
+-        "non-standard locations, without also setting PATH to point to the correct nvcc.  "
 -        "Perhaps, try re-running this command again with PATH=${CUDA_TOOLKIT_ROOT_DIR}/bin:$PATH.  "
-+        "Perhaps, try re-running this command again with PATH=${CUDAToolkit_ROOT}/bin:$PATH.  "
-         "See above log messages for more diagnostics, and see https://github.com/pytorch/pytorch/issues/8092 for more details.")
-     endif()
+-        "See above log messages for more diagnostics, and see https://github.com/pytorch/pytorch/issues/8092 for more details.")
+-    endif()
    endif()
-@@ -128,8 +120,8 @@ endif()
+ endif()
+ 
  # ---[ CUDA libraries wrapper
  
  # find lbnvrtc.so

From 9a36bd4d665ea781429d09899e72461b714ed212 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Fri, 31 Jan 2025 07:50:07 +1100
Subject: [PATCH 07/25] vendor CMake's cuda_select_nvcc_arch_flags

---
 recipe/meta.yaml                              |   1 +
 ...-find_package-CUDA-in-caffe2-CMake-m.patch | 149 +++++++++++++++++-
 recipe/third_party/CMake/Copyright.txt        | 136 ++++++++++++++++
 3 files changed, 280 insertions(+), 6 deletions(-)
 create mode 100644 recipe/third_party/CMake/Copyright.txt

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index b32271a6..9b560039 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -558,6 +558,7 @@ about:
   license_file:
     - LICENSE
     - NOTICE
+    - third_party/CMake/Copyright.txt
   summary: PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.
   description: |
     PyTorch is a Python package that provides two high-level features:
diff --git a/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch b/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
index 2f066a39..eff7150b 100644
--- a/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
+++ b/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
@@ -1,15 +1,17 @@
-From 012e72268018aeb5d728898222e19634f131762a Mon Sep 17 00:00:00 2001
+From 6ec9f719d4e0cc1f572fc334fbbfac89a3c2b7a9 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Thu, 30 Jan 2025 08:33:44 +1100
 Subject: [PATCH 21/21] avoid deprecated `find_package(CUDA)` in caffe2 CMake
  metadata
 
+vendor the not-available-anymore function torch_cuda_get_nvcc_gencode_flag from CMake
 ---
- caffe2/CMakeLists.txt      | 14 +++++------
- cmake/Summary.cmake        | 10 ++++----
- cmake/TorchConfig.cmake.in |  2 +-
- cmake/public/cuda.cmake    | 48 ++++++++++----------------------------
- 4 files changed, 25 insertions(+), 49 deletions(-)
+ caffe2/CMakeLists.txt      |  14 ++---
+ cmake/Summary.cmake        |  10 +--
+ cmake/TorchConfig.cmake.in |   2 +-
+ cmake/public/cuda.cmake    |  48 ++++----------
+ cmake/public/utils.cmake   | 124 +++++++++++++++++++++++++++++++++++++
+ 5 files changed, 149 insertions(+), 49 deletions(-)
 
 diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
 index b51c7cc637b..6e107b5b02a 100644
@@ -211,3 +213,138 @@ index 152fbdbe6dd..50d37f4d6d0 100644
    find_package(Python COMPONENTS Interpreter)
    execute_process(
      COMMAND Python::Interpreter -c
+diff --git a/cmake/public/utils.cmake b/cmake/public/utils.cmake
+index c6647eb457c..de066549f26 100644
+--- a/cmake/public/utils.cmake
++++ b/cmake/public/utils.cmake
+@@ -306,6 +306,130 @@ macro(torch_hip_get_arch_list store_var)
+   string(REPLACE " " ";" ${store_var} "${_TMP}")
+ endmacro()
+ 
++# torch_cuda_get_nvcc_gencode_flag is part of find_package(CUDA), but not find_package(CUDAToolkit);
++# vendor it from https://github.com/Kitware/CMake/blob/master/Modules/FindCUDA/select_compute_arch.cmake
++################################################################################################
++# Function for selecting GPU arch flags for nvcc based on CUDA architectures from parameter list
++# Usage:
++#   SELECT_NVCC_ARCH_FLAGS(out_variable [list of CUDA compute archs])
++function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable)
++  set(CUDA_ARCH_LIST "${ARGN}")
++
++  if("X${CUDA_ARCH_LIST}" STREQUAL "X" )
++    set(CUDA_ARCH_LIST "Auto")
++  endif()
++
++  set(cuda_arch_bin)
++  set(cuda_arch_ptx)
++
++  if("${CUDA_ARCH_LIST}" STREQUAL "All")
++    set(CUDA_ARCH_LIST ${CUDA_KNOWN_GPU_ARCHITECTURES})
++  elseif("${CUDA_ARCH_LIST}" STREQUAL "Common")
++    set(CUDA_ARCH_LIST ${CUDA_COMMON_GPU_ARCHITECTURES})
++  elseif("${CUDA_ARCH_LIST}" STREQUAL "Auto")
++    CUDA_DETECT_INSTALLED_GPUS(CUDA_ARCH_LIST)
++    message(STATUS "Autodetected CUDA architecture(s): ${CUDA_ARCH_LIST}")
++  endif()
++
++  # Now process the list and look for names
++  string(REGEX REPLACE "[ \t]+" ";" CUDA_ARCH_LIST "${CUDA_ARCH_LIST}")
++  list(REMOVE_DUPLICATES CUDA_ARCH_LIST)
++  foreach(arch_name ${CUDA_ARCH_LIST})
++    set(arch_bin)
++    set(arch_ptx)
++    set(add_ptx FALSE)
++    # Check to see if we are compiling PTX
++    if(arch_name MATCHES "(.*)\\+PTX$")
++      set(add_ptx TRUE)
++      set(arch_name ${CMAKE_MATCH_1})
++    endif()
++    if(arch_name MATCHES "^([0-9]\\.[0-9](\\([0-9]\\.[0-9]\\))?)$")
++      set(arch_bin ${CMAKE_MATCH_1})
++      set(arch_ptx ${arch_bin})
++    else()
++      # Look for it in our list of known architectures
++      if(${arch_name} STREQUAL "Fermi")
++        set(arch_bin 2.0 "2.1(2.0)")
++      elseif(${arch_name} STREQUAL "Kepler+Tegra")
++        set(arch_bin 3.2)
++      elseif(${arch_name} STREQUAL "Kepler+Tesla")
++        set(arch_bin 3.7)
++      elseif(${arch_name} STREQUAL "Kepler")
++        set(arch_bin 3.0 3.5)
++        set(arch_ptx 3.5)
++      elseif(${arch_name} STREQUAL "Maxwell+Tegra")
++        set(arch_bin 5.3)
++      elseif(${arch_name} STREQUAL "Maxwell")
++        set(arch_bin 5.0 5.2)
++        set(arch_ptx 5.2)
++      elseif(${arch_name} STREQUAL "Pascal")
++        set(arch_bin 6.0 6.1)
++        set(arch_ptx 6.1)
++      elseif(${arch_name} STREQUAL "Volta")
++        set(arch_bin 7.0 7.0)
++        set(arch_ptx 7.0)
++      elseif(${arch_name} STREQUAL "Turing")
++        set(arch_bin 7.5)
++        set(arch_ptx 7.5)
++      elseif(${arch_name} STREQUAL "Ampere")
++        set(arch_bin 8.0)
++        set(arch_ptx 8.0)
++      else()
++        message(SEND_ERROR "Unknown CUDA Architecture Name ${arch_name} in CUDA_SELECT_NVCC_ARCH_FLAGS")
++      endif()
++    endif()
++    if(NOT arch_bin)
++      message(SEND_ERROR "arch_bin wasn't set for some reason")
++    endif()
++    list(APPEND cuda_arch_bin ${arch_bin})
++    if(add_ptx)
++      if (NOT arch_ptx)
++        set(arch_ptx ${arch_bin})
++      endif()
++      list(APPEND cuda_arch_ptx ${arch_ptx})
++    endif()
++  endforeach()
++
++  # remove dots and convert to lists
++  string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
++  string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}")
++  string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}")
++  string(REGEX MATCHALL "[0-9]+"   cuda_arch_ptx "${cuda_arch_ptx}")
++
++  if(cuda_arch_bin)
++    list(REMOVE_DUPLICATES cuda_arch_bin)
++  endif()
++  if(cuda_arch_ptx)
++    list(REMOVE_DUPLICATES cuda_arch_ptx)
++  endif()
++
++  set(nvcc_flags "")
++  set(nvcc_archs_readable "")
++
++  # Tell NVCC to add binaries for the specified GPUs
++  foreach(arch ${cuda_arch_bin})
++    if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
++      # User explicitly specified ARCH for the concrete CODE
++      list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
++      list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1})
++    else()
++      # User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE
++      list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch})
++      list(APPEND nvcc_archs_readable sm_${arch})
++    endif()
++  endforeach()
++
++  # Tell NVCC to add PTX intermediate code for the specified architectures
++  foreach(arch ${cuda_arch_ptx})
++    list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch})
++    list(APPEND nvcc_archs_readable compute_${arch})
++  endforeach()
++
++  string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
++  set(${out_variable}          ${nvcc_flags}          PARENT_SCOPE)
++  set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE)
++endfunction()
++
+ ##############################################################################
+ # Get the NVCC arch flags specified by TORCH_CUDA_ARCH_LIST and CUDA_ARCH_NAME.
+ # Usage:
diff --git a/recipe/third_party/CMake/Copyright.txt b/recipe/third_party/CMake/Copyright.txt
new file mode 100644
index 00000000..f32a818a
--- /dev/null
+++ b/recipe/third_party/CMake/Copyright.txt
@@ -0,0 +1,136 @@
+CMake - Cross Platform Makefile Generator
+Copyright 2000-2025 Kitware, Inc. and Contributors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of Kitware, Inc. nor the names of Contributors
+  may be used to endorse or promote products derived from this
+  software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+------------------------------------------------------------------------------
+
+The following individuals and institutions are among the Contributors:
+
+* Aaron C. Meadows <cmake@shadowguarddev.com>
+* Adriaan de Groot <groot@kde.org>
+* Aleksey Avdeev <solo@altlinux.ru>
+* Alexander Neundorf <neundorf@kde.org>
+* Alexander Smorkalov <alexander.smorkalov@itseez.com>
+* Alexey Sokolov <sokolov@google.com>
+* Alex Merry <alex.merry@kde.org>
+* Alex Turbov <i.zaufi@gmail.com>
+* Andreas Pakulat <apaku@gmx.de>
+* Andreas Schneider <asn@cryptomilk.org>
+* André Rigland Brodtkorb <Andre.Brodtkorb@ifi.uio.no>
+* Axel Huebl, Helmholtz-Zentrum Dresden - Rossendorf
+* Benjamin Eikel
+* Bjoern Ricks <bjoern.ricks@gmail.com>
+* Brad Hards <bradh@kde.org>
+* Christopher Harvey
+* Christoph Grüninger <foss@grueninger.de>
+* Clement Creusot <creusot@cs.york.ac.uk>
+* Daniel Blezek <blezek@gmail.com>
+* Daniel Pfeifer <daniel@pfeifer-mail.de>
+* Dawid Wróbel <me@dawidwrobel.com>
+* Enrico Scholz <enrico.scholz@informatik.tu-chemnitz.de>
+* Eran Ifrah <eran.ifrah@gmail.com>
+* Esben Mose Hansen, Ange Optimization ApS
+* Geoffrey Viola <geoffrey.viola@asirobots.com>
+* Google Inc
+* Gregor Jasny
+* Helio Chissini de Castro <helio@kde.org>
+* Ilya Lavrenov <ilya.lavrenov@itseez.com>
+* Insight Software Consortium <insightsoftwareconsortium.org>
+* Intel Corporation <www.intel.com>
+* Jan Woetzel
+* Jordan Williams <jordan@jwillikers.com>
+* Julien Schueller
+* Kelly Thompson <kgt@lanl.gov>
+* Konstantin Podsvirov <konstantin@podsvirov.pro>
+* Laurent Montel <montel@kde.org>
+* Mario Bensi <mbensi@ipsquad.net>
+* Martin Gräßlin <mgraesslin@kde.org>
+* Mathieu Malaterre <mathieu.malaterre@gmail.com>
+* Matthaeus G. Chajdas
+* Matthias Kretz <kretz@kde.org>
+* Matthias Maennich <matthias@maennich.net>
+* Michael Hirsch, Ph.D. <www.scivision.co>
+* Michael Stürmer
+* Miguel A. Figueroa-Villanueva
+* Mike Durso <rbprogrammer@gmail.com>
+* Mike Jackson
+* Mike McQuaid <mike@mikemcquaid.com>
+* Nicolas Bock <nicolasbock@gmail.com>
+* Nicolas Despres <nicolas.despres@gmail.com>
+* Nikita Krupen'ko <krnekit@gmail.com>
+* NVIDIA Corporation <www.nvidia.com>
+* OpenGamma Ltd. <opengamma.com>
+* Patrick Stotko <stotko@cs.uni-bonn.de>
+* Per Øyvind Karlsen <peroyvind@mandriva.org>
+* Peter Collingbourne <peter@pcc.me.uk>
+* Petr Gotthard <gotthard@honeywell.com>
+* Philip Lowman <philip@yhbt.com>
+* Philippe Proulx <pproulx@efficios.com>
+* Raffi Enficiaud, Max Planck Society
+* Raumfeld <raumfeld.com>
+* Roger Leigh <rleigh@codelibre.net>
+* Rolf Eike Beer <eike@sf-mail.de>
+* Roman Donchenko <roman.donchenko@itseez.com>
+* Roman Kharitonov <roman.kharitonov@itseez.com>
+* Ruslan Baratov
+* Sebastian Holtermann <sebholt@xwmw.org>
+* Stephen Kelly <steveire@gmail.com>
+* Sylvain Joubert <joubert.sy@gmail.com>
+* The Qt Company Ltd.
+* Thomas Sondergaard <ts@medical-insight.com>
+* Tobias Hunger <tobias.hunger@qt.io>
+* Todd Gamblin <tgamblin@llnl.gov>
+* Tristan Carel
+* University of Dundee
+* Vadim Zhukov
+* Will Dicharry <wdicharry@stellarscience.com>
+
+See version control history for details of individual contributions.
+
+The above copyright and license notice applies to distributions of
+CMake in source and binary form.  Third-party software packages supplied
+with CMake under compatible licenses provide their own copyright notices
+documented in corresponding subdirectories or source files.
+
+------------------------------------------------------------------------------
+
+CMake was initially developed by Kitware with the following sponsorship:
+
+ * National Library of Medicine at the National Institutes of Health
+   as part of the Insight Segmentation and Registration Toolkit (ITK).
+
+ * US National Labs (Los Alamos, Livermore, Sandia) ASC Parallel
+   Visualization Initiative.
+
+ * National Alliance for Medical Image Computing (NAMIC) is funded by the
+   National Institutes of Health through the NIH Roadmap for Medical Research,
+   Grant U54 EB005149.
+
+ * Kitware, Inc.

From 32527dc1a26c88164cfd6739ccae3b42df0e7a5a Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Fri, 31 Jan 2025 08:37:57 +1100
Subject: [PATCH 08/25] fix a casing error in CMake

---
 ...oid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch b/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
index eff7150b..428f5fd2 100644
--- a/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
+++ b/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
@@ -1,4 +1,4 @@
-From 6ec9f719d4e0cc1f572fc334fbbfac89a3c2b7a9 Mon Sep 17 00:00:00 2001
+From 3bbff0ff28062788f8d05c5c24323de317911375 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Thu, 30 Jan 2025 08:33:44 +1100
 Subject: [PATCH 21/21] avoid deprecated `find_package(CUDA)` in caffe2 CMake
@@ -115,7 +115,7 @@ index cba4d929855..da904fc6a18 100644
    if(TARGET torch::nvtoolsext)
      list(APPEND TORCH_CUDA_LIBRARIES torch::nvtoolsext)
 diff --git a/cmake/public/cuda.cmake b/cmake/public/cuda.cmake
-index 152fbdbe6dd..50d37f4d6d0 100644
+index 152fbdbe6dd..0d1aeffc59f 100644
 --- a/cmake/public/cuda.cmake
 +++ b/cmake/public/cuda.cmake
 @@ -26,8 +26,8 @@ if(NOT MSVC)
@@ -208,7 +208,7 @@ index 152fbdbe6dd..50d37f4d6d0 100644
  # find lbnvrtc.so
 -set(CUDA_NVRTC_LIB "${CUDA_nvrtc_LIBRARY}" CACHE FILEPATH "")
 -if(CUDA_NVRTC_LIB AND NOT CUDA_NVRTC_SHORTHASH)
-+get_target_property(CUDA_NVRTC_LIB cuda::nvrtc INTERFACE_LINK_LIBRARIES)
++get_target_property(CUDA_NVRTC_LIB CUDA::nvrtc INTERFACE_LINK_LIBRARIES)
 +if(NOT CUDA_NVRTC_SHORTHASH)
    find_package(Python COMPONENTS Interpreter)
    execute_process(

From 2a0827b6d00652888dba397c5fb3618bfe05ae27 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Fri, 31 Jan 2025 08:38:01 +1100
Subject: [PATCH 09/25] add zlib

---
 recipe/meta.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 9b560039..febcc425 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -181,6 +181,7 @@ requirements:
     - typing_extensions
     - pybind11
     - eigen
+    - zlib
   run:
     # GPU requirements without run_exports
     - {{ pin_compatible('cudnn') }}                       # [cuda_compiler_version != "None"]

From 1ee54fb3f9144bc4e3a1e0ae2eef9fb1e49162e9 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Fri, 31 Jan 2025 22:26:17 +1100
Subject: [PATCH 10/25] add cuda-nvrtc; CMake files require to find it now

---
 recipe/meta.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index febcc425..ed09314e 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -211,7 +211,9 @@ test:
     # cmake needs a compiler to run package detection, see
     # https://discourse.cmake.org/t/questions-about-find-package-cli-msvc/6194
     - {{ compiler('cxx') }}
+    # for CMake config to find cuda & nvrtc
     - {{ compiler('cuda') }}    # [cuda_compiler_version != "None"]
+    - cuda-nvrtc-dev            # [cuda_compiler_version != "None"]
     - cmake
     - ninja
     - pkg-config

From f35c9aa699000408a6dba4bb8ebcce28d8670a5a Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sat, 1 Feb 2025 06:40:35 +1100
Subject: [PATCH 11/25] clean up an old CMake variable in setup.py

---
 ...d-find_package-CUDA-in-caffe2-CMake-m.patch | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch b/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
index 428f5fd2..280e7028 100644
--- a/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
+++ b/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
@@ -1,4 +1,4 @@
-From 3bbff0ff28062788f8d05c5c24323de317911375 Mon Sep 17 00:00:00 2001
+From 4b910a5445f063dddb88980c480a332d44d28b38 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Thu, 30 Jan 2025 08:33:44 +1100
 Subject: [PATCH 21/21] avoid deprecated `find_package(CUDA)` in caffe2 CMake
@@ -11,7 +11,8 @@ vendor the not-available-anymore function torch_cuda_get_nvcc_gencode_flag from
  cmake/TorchConfig.cmake.in |   2 +-
  cmake/public/cuda.cmake    |  48 ++++----------
  cmake/public/utils.cmake   | 124 +++++++++++++++++++++++++++++++++++++
- 5 files changed, 149 insertions(+), 49 deletions(-)
+ setup.py                   |   2 +-
+ 6 files changed, 150 insertions(+), 50 deletions(-)
 
 diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
 index b51c7cc637b..6e107b5b02a 100644
@@ -348,3 +349,16 @@ index c6647eb457c..de066549f26 100644
  ##############################################################################
  # Get the NVCC arch flags specified by TORCH_CUDA_ARCH_LIST and CUDA_ARCH_NAME.
  # Usage:
+diff --git a/setup.py b/setup.py
+index b0e01e0d1ee..8d939b5ba63 100644
+--- a/setup.py
++++ b/setup.py
+@@ -627,7 +627,7 @@ class build_ext(setuptools.command.build_ext.build_ext):
+         else:
+             report("-- Not using cuDNN")
+         if cmake_cache_vars["USE_CUDA"]:
+-            report("-- Detected CUDA at " + cmake_cache_vars["CUDA_TOOLKIT_ROOT_DIR"])
++            report("-- Detected CUDA at " + cmake_cache_vars["CUDAToolkit_Root"])
+         else:
+             report("-- Not using CUDA")
+         if cmake_cache_vars["USE_XPU"]:

From 0d8670906d537b3954905c3ad95415cae5272fa0 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sat, 1 Feb 2025 07:10:53 +1100
Subject: [PATCH 12/25] disable CUDA_DETECT_INSTALLED_GPUS in vendored CMake
 function

---
 ...-find_package-CUDA-in-caffe2-CMake-m.patch | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch b/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
index 280e7028..debfba95 100644
--- a/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
+++ b/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
@@ -1,4 +1,4 @@
-From 4b910a5445f063dddb88980c480a332d44d28b38 Mon Sep 17 00:00:00 2001
+From 34a6b30fbb914ca3f702b66b8fbf8ddd7934dbf4 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Thu, 30 Jan 2025 08:33:44 +1100
 Subject: [PATCH 21/21] avoid deprecated `find_package(CUDA)` in caffe2 CMake
@@ -6,13 +6,13 @@ Subject: [PATCH 21/21] avoid deprecated `find_package(CUDA)` in caffe2 CMake
 
 vendor the not-available-anymore function torch_cuda_get_nvcc_gencode_flag from CMake
 ---
- caffe2/CMakeLists.txt      |  14 ++---
+ caffe2/CMakeLists.txt      |  14 ++--
  cmake/Summary.cmake        |  10 +--
  cmake/TorchConfig.cmake.in |   2 +-
  cmake/public/cuda.cmake    |  48 ++++----------
- cmake/public/utils.cmake   | 124 +++++++++++++++++++++++++++++++++++++
+ cmake/public/utils.cmake   | 127 +++++++++++++++++++++++++++++++++++++
  setup.py                   |   2 +-
- 6 files changed, 150 insertions(+), 50 deletions(-)
+ 6 files changed, 153 insertions(+), 50 deletions(-)
 
 diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
 index b51c7cc637b..6e107b5b02a 100644
@@ -215,15 +215,16 @@ index 152fbdbe6dd..0d1aeffc59f 100644
    execute_process(
      COMMAND Python::Interpreter -c
 diff --git a/cmake/public/utils.cmake b/cmake/public/utils.cmake
-index c6647eb457c..de066549f26 100644
+index c6647eb457c..accebfd3457 100644
 --- a/cmake/public/utils.cmake
 +++ b/cmake/public/utils.cmake
-@@ -306,6 +306,130 @@ macro(torch_hip_get_arch_list store_var)
+@@ -306,6 +306,133 @@ macro(torch_hip_get_arch_list store_var)
    string(REPLACE " " ";" ${store_var} "${_TMP}")
  endmacro()
  
 +# torch_cuda_get_nvcc_gencode_flag is part of find_package(CUDA), but not find_package(CUDAToolkit);
 +# vendor it from https://github.com/Kitware/CMake/blob/master/Modules/FindCUDA/select_compute_arch.cmake
++# but disable CUDA_DETECT_INSTALLED_GPUS
 +################################################################################################
 +# Function for selecting GPU arch flags for nvcc based on CUDA architectures from parameter list
 +# Usage:
@@ -243,8 +244,10 @@ index c6647eb457c..de066549f26 100644
 +  elseif("${CUDA_ARCH_LIST}" STREQUAL "Common")
 +    set(CUDA_ARCH_LIST ${CUDA_COMMON_GPU_ARCHITECTURES})
 +  elseif("${CUDA_ARCH_LIST}" STREQUAL "Auto")
-+    CUDA_DETECT_INSTALLED_GPUS(CUDA_ARCH_LIST)
-+    message(STATUS "Autodetected CUDA architecture(s): ${CUDA_ARCH_LIST}")
++    # disabled, replaced by common architectures
++    # CUDA_DETECT_INSTALLED_GPUS(CUDA_ARCH_LIST)
++    # message(STATUS "Autodetected CUDA architecture(s): ${CUDA_ARCH_LIST}")
++    set(CUDA_ARCH_LIST ${CUDA_COMMON_GPU_ARCHITECTURES})
 +  endif()
 +
 +  # Now process the list and look for names

From fc3fa85d55e39e6b7cc7b17087e5c869636b28dd Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sat, 1 Feb 2025 16:30:10 +1100
Subject: [PATCH 13/25] set CUDAToolkit_ROOT so that CMake cache gets populated
 correctly

---
 recipe/build.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/recipe/build.sh b/recipe/build.sh
index 648763a1..4ae0e644 100644
--- a/recipe/build.sh
+++ b/recipe/build.sh
@@ -177,10 +177,10 @@ elif [[ ${cuda_compiler_version} != "None" ]]; then
     export CUDAToolkit_BIN_DIR=${BUILD_PREFIX}/bin
     export CUDAToolkit_ROOT_DIR=${PREFIX}
     if [[ "${target_platform}" != "${build_platform}" ]]; then
-        export CUDA_TOOLKIT_ROOT=${PREFIX}
+        export CUDAToolkit_ROOT=${PREFIX}
     fi
     # for CUPTI
-    export CUDA_TOOLKIT_ROOT_DIR=${PREFIX}
+    export CUDAToolkit_ROOT=${PREFIX}
     case ${target_platform} in
         linux-64)
             export CUDAToolkit_TARGET_DIR=${PREFIX}/targets/x86_64-linux

From 03cb0fec16eeb225bb8a00f53c639a9db738a311 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sat, 1 Feb 2025 22:20:36 +1100
Subject: [PATCH 14/25] use computed variable for looking in `CMakeCache.txt`

ambient environment variables don't get registered in the cache, even if they
end up influencing the configuration; try to look up derived quantities, see
https://github.com/Kitware/CMake/blob/master/Modules/FindCUDAToolkit.cmake
---
 ...oid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch b/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
index debfba95..b21209a5 100644
--- a/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
+++ b/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
@@ -1,4 +1,4 @@
-From 34a6b30fbb914ca3f702b66b8fbf8ddd7934dbf4 Mon Sep 17 00:00:00 2001
+From 3f643096358ed552ef5e97ad50d7a48d1c970e73 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Thu, 30 Jan 2025 08:33:44 +1100
 Subject: [PATCH 21/21] avoid deprecated `find_package(CUDA)` in caffe2 CMake
@@ -353,7 +353,7 @@ index c6647eb457c..accebfd3457 100644
  # Get the NVCC arch flags specified by TORCH_CUDA_ARCH_LIST and CUDA_ARCH_NAME.
  # Usage:
 diff --git a/setup.py b/setup.py
-index b0e01e0d1ee..8d939b5ba63 100644
+index b0e01e0d1ee..dc21f91d69e 100644
 --- a/setup.py
 +++ b/setup.py
 @@ -627,7 +627,7 @@ class build_ext(setuptools.command.build_ext.build_ext):
@@ -361,7 +361,7 @@ index b0e01e0d1ee..8d939b5ba63 100644
              report("-- Not using cuDNN")
          if cmake_cache_vars["USE_CUDA"]:
 -            report("-- Detected CUDA at " + cmake_cache_vars["CUDA_TOOLKIT_ROOT_DIR"])
-+            report("-- Detected CUDA at " + cmake_cache_vars["CUDAToolkit_Root"])
++            report(f"-- Detected CUDA at {cmake_cache_vars['CMAKE_CUDA_COMPILER_TOOLKIT_ROOT']}")
          else:
              report("-- Not using CUDA")
          if cmake_cache_vars["USE_XPU"]:

From 9d80394c8b9199138f0e26c65c432627f4f119a0 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sat, 1 Feb 2025 22:21:51 +1100
Subject: [PATCH 15/25] bump build number

---
 recipe/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index ed09314e..e3b8b816 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -1,6 +1,6 @@
 # if you wish to build release candidate number X, append the version string with ".rcX"
 {% set version = "2.5.1" %}
-{% set build = 11 %}
+{% set build = 12 %}
 
 # Use a higher build number for the CUDA variant, to ensure that it's
 # preferred by conda's solver, and it's preferentially

From e2c551d31a3630147a7b34dfd4eb5fa098665572 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sun, 2 Feb 2025 00:42:59 +1100
Subject: [PATCH 16/25] keep setting CUDA_TOOLKIT_ROOT_DIR

---
 recipe/build.sh | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/recipe/build.sh b/recipe/build.sh
index 4ae0e644..981e90df 100644
--- a/recipe/build.sh
+++ b/recipe/build.sh
@@ -176,10 +176,8 @@ elif [[ ${cuda_compiler_version} != "None" ]]; then
     # all of them.
     export CUDAToolkit_BIN_DIR=${BUILD_PREFIX}/bin
     export CUDAToolkit_ROOT_DIR=${PREFIX}
-    if [[ "${target_platform}" != "${build_platform}" ]]; then
-        export CUDAToolkit_ROOT=${PREFIX}
-    fi
     # for CUPTI
+    export CUDA_TOOLKIT_ROOT_DIR=${PREFIX}
     export CUDAToolkit_ROOT=${PREFIX}
     case ${target_platform} in
         linux-64)

From 0de45db86d869932e66351da9ce57137d5f48bc5 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sun, 2 Feb 2025 08:57:28 +1100
Subject: [PATCH 17/25] patch find_package(CUDA) in tensorpipe submodule

---
 recipe/meta.yaml                              |  5 +++--
 ...ON-lib-from-CMake-install-directives.patch |  0
 ...1-switch-away-from-find_package-CUDA.patch | 22 +++++++++++++++++++
 3 files changed, 25 insertions(+), 2 deletions(-)
 rename recipe/patches_submodules/{ => fbgemm}/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch (100%)
 create mode 100644 recipe/patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index e3b8b816..f5436b45 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -69,10 +69,11 @@ source:
     - patches/0016-point-include-paths-to-PREFIX-include.patch
     - patches/0017-Add-conda-prefix-to-inductor-include-paths.patch
     - patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
-    - patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch               # [win]
+    - patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch                       # [win]
     - patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch
     - patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
-    - patches_submodules/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch    # [win]
+    - patches_submodules/fbgemm/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch     # [win]
+    - patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch
 
 build:
   number: {{ build }}
diff --git a/recipe/patches_submodules/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch b/recipe/patches_submodules/fbgemm/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch
similarity index 100%
rename from recipe/patches_submodules/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch
rename to recipe/patches_submodules/fbgemm/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch
diff --git a/recipe/patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch b/recipe/patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch
new file mode 100644
index 00000000..fe411d71
--- /dev/null
+++ b/recipe/patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch
@@ -0,0 +1,22 @@
+From 9a1de62dd1b3d816d6fb87c2041f4005ab5c683d Mon Sep 17 00:00:00 2001
+From: "H. Vetinari" <h.vetinari@gmx.com>
+Date: Sun, 2 Feb 2025 08:54:01 +1100
+Subject: [PATCH] switch away from find_package(CUDA)
+
+---
+ tensorpipe/CMakeLists.txt | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/third_party/tensorpipe/tensorpipe/CMakeLists.txt b/third_party/tensorpipe/tensorpipe/CMakeLists.txt
+index efcffc2..1c3b2ca 100644
+--- a/third_party/tensorpipe/tensorpipe/CMakeLists.txt
++++ b/third_party/tensorpipe/tensorpipe/CMakeLists.txt
+@@ -234,7 +234,7 @@ if(TP_USE_CUDA)
+   # TP_INCLUDE_DIRS is list of include path to be used
+   set(TP_CUDA_INCLUDE_DIRS)
+ 
+-  find_package(CUDA REQUIRED)
++  find_package(CUDAToolkit REQUIRED)
+   list(APPEND TP_CUDA_LINK_LIBRARIES ${CUDA_LIBRARIES})
+   list(APPEND TP_CUDA_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS})
+ 

From 94c000bab6a062b41238bf47d59240094f059ca0 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sun, 2 Feb 2025 09:52:45 +1100
Subject: [PATCH 18/25] don't blow up logs with nvcc warnings

---
 recipe/bld.bat  | 2 ++
 recipe/build.sh | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/recipe/bld.bat b/recipe/bld.bat
index 87fe94e3..c058b1b5 100644
--- a/recipe/bld.bat
+++ b/recipe/bld.bat
@@ -91,6 +91,8 @@ if not "%cuda_compiler_version%" == "None" (
     set MAGMA_HOME=%LIBRARY_PREFIX%
     set "PATH=%CUDA_BIN_PATH%;%PATH%"
     set CUDNN_INCLUDE_DIR=%LIBRARY_PREFIX%\include
+    @REM turn off very noisy nvcc warnings
+    set "CUDAFLAGS=-w"
 ) else (
     set USE_CUDA=0
     @REM MKLDNN is an Apache-2.0 licensed library for DNNs and is used
diff --git a/recipe/build.sh b/recipe/build.sh
index 981e90df..3a29224e 100644
--- a/recipe/build.sh
+++ b/recipe/build.sh
@@ -219,6 +219,8 @@ elif [[ ${cuda_compiler_version} != "None" ]]; then
     export USE_STATIC_CUDNN=0
     export MAGMA_HOME="${PREFIX}"
     export USE_MAGMA=1
+    # turn off noisy nvcc warnings
+    export CUDAFLAGS="-w"
 else
     if [[ "$target_platform" != *-64 ]]; then
       # Breakpad seems to not work on aarch64 or ppc64le

From e284ed07eaa857f89a4fe57560fbc0f4ebde06a9 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sun, 2 Feb 2025 10:16:39 +1100
Subject: [PATCH 19/25] reduce verbosity of pip install

---
 recipe/bld.bat  | 2 +-
 recipe/build.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/recipe/bld.bat b/recipe/bld.bat
index c058b1b5..ebb43756 100644
--- a/recipe/bld.bat
+++ b/recipe/bld.bat
@@ -160,7 +160,7 @@ if EXIST build (
     if %ERRORLEVEL% neq 0 exit 1
 )
 
-%PYTHON% -m pip %PIP_ACTION% . --no-build-isolation --no-deps -vvv --no-clean
+%PYTHON% -m pip %PIP_ACTION% . --no-build-isolation --no-deps --no-clean
 if %ERRORLEVEL% neq 0 exit 1
 
 @REM Here we split the build into two parts.
diff --git a/recipe/build.sh b/recipe/build.sh
index 3a29224e..324e1a00 100644
--- a/recipe/build.sh
+++ b/recipe/build.sh
@@ -253,7 +253,7 @@ case ${PKG_NAME} in
     cp build/CMakeCache.txt build/CMakeCache.txt.orig
     ;;
   pytorch)
-    $PREFIX/bin/python -m pip install . --no-deps --no-build-isolation -vvv --no-clean \
+    $PREFIX/bin/python -m pip install . --no-deps --no-build-isolation --no-clean \
         | sed "s,${CXX},\$\{CXX\},g" \
         | sed "s,${PREFIX},\$\{PREFIX\},g"
     # Keep this in ${PREFIX}/lib so that the library can be found by

From 1c23e13aa43843d84bda6474ba7f0dd60e91faec Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sun, 2 Feb 2025 10:52:46 +1100
Subject: [PATCH 20/25] skip a test that may fail on MKL

May yield
```
test/test_autograd.py:11417: in _test_reentrant_parent_error_on_cpu
    with self.assertRaisesRegex(Exception, "Simulate error"):
E   AssertionError: "Simulate error" does not match "grad can be implicitly created only for scalar outputs"
```
---
 recipe/meta.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index f5436b45..1885530d 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -497,6 +497,7 @@ outputs:
         {% set skips = skips ~ " or (GPUTests and test_scatter_reduce2)" %}             # [linux and cuda_compiler_version != "None"]
         # MKL problems
         {% set skips = skips ~ " or (TestLinalgCPU and test_inverse_errors_large_cpu)" %}           # [linux and blas_impl == "mkl" and cuda_compiler_version != "None"]
+        {% set skips = skips ~ " or test_reentrant_parent_error_on_cpu_cuda)" %}                    # [linux and blas_impl == "mkl" and cuda_compiler_version != "None"]
         # non-MKL problems
         {% set skips = skips ~ " or test_cross_entropy_loss_2d_out_of_bounds_class_index_cuda" %}   # [linux and blas_impl != "mkl" and cuda_compiler_version != "None"]
         {% set skips = skips ~ " or test_cublas_config_nondeterministic_alert_cuda " %}             # [linux and blas_impl != "mkl" and cuda_compiler_version != "None"]

From 138456c736799758ca263ab2448d5c7d5168fa21 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sun, 2 Feb 2025 11:35:41 +1100
Subject: [PATCH 21/25] fix patch for unique `mylib` in torchinductor tests

---
 ...e-in-test_mutable_custom_op_fixed_la.patch | 32 ++++++++++++++++---
 ...-find_package-CUDA-in-caffe2-CMake-m.patch |  2 +-
 2 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/recipe/patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch b/recipe/patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch
index 392ce265..17c54e33 100644
--- a/recipe/patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch
+++ b/recipe/patches/0020-make-library-name-in-test_mutable_custom_op_fixed_la.patch
@@ -1,4 +1,4 @@
-From 79ee5dcac30f2eba891af961f7649d15dfc6ce63 Mon Sep 17 00:00:00 2001
+From 39041f5a78068d2cf58d99f76938aee95a3c7bb5 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Thu, 30 Jan 2025 13:23:14 +1100
 Subject: [PATCH 20/21] make library name in
@@ -6,11 +6,11 @@ Subject: [PATCH 20/21] make library name in
 
 Suggested-By: Daniel Petry <dpetry@anaconda.com>
 ---
- test/inductor/test_torchinductor.py | 6 ++++--
- 1 file changed, 4 insertions(+), 2 deletions(-)
+ test/inductor/test_torchinductor.py | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
 
 diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py
-index 610f5d27332..28f85e228d2 100644
+index 610f5d27332..99e2169febb 100644
 --- a/test/inductor/test_torchinductor.py
 +++ b/test/inductor/test_torchinductor.py
 @@ -10628,7 +10628,8 @@ class CommonTemplate:
@@ -23,7 +23,19 @@ index 610f5d27332..28f85e228d2 100644
              mod = nn.Conv2d(3, 128, 1, stride=1, bias=False).to(device=GPU_TYPE)
              inp = torch.rand(2, 3, 128, 128, device=GPU_TYPE)
              expected_stride = mod(inp).clone().stride()
-@@ -10681,7 +10682,8 @@ class CommonTemplate:
+@@ -10664,8 +10665,9 @@ class CommonTemplate:
+             def fn(x):
+                 # Inductor changes the conv to be channels-last
+                 z = mod(x)
+-                output = torch.ops.mylib.bar(z, torch._dynamo.is_compiling())
+-                torch.ops.mylib.add_one(output)
++                mylib = importlib.import_module(f"torch.ops.{unique_lib_name}")
++                output = mylib.bar(z, torch._dynamo.is_compiling())
++                mylib.add_one(output)
+                 return output**2
+ 
+             with torch.no_grad():
+@@ -10681,7 +10683,8 @@ class CommonTemplate:
  
      @config.patch(implicit_fallbacks=True)
      def test_mutable_custom_op_fixed_layout(self):
@@ -33,3 +45,13 @@ index 610f5d27332..28f85e228d2 100644
              lib.define(
                  "copy_(Tensor(a!) dst, Tensor src) -> ()",
                  tags=torch.Tag.needs_fixed_stride_order,
+@@ -10697,7 +10700,8 @@ class CommonTemplate:
+ 
+             def f(x):
+                 full_default_3 = torch.full([3], 7.0, device="cpu")
+-                chunk_cat_default_1 = torch.ops.mylib.copy_.default(full_default_3, x)
++                mylib = importlib.import_module(f"torch.ops.{unique_lib_name}")
++                chunk_cat_default_1 = mylib.copy_.default(full_default_3, x)
+                 mul_out = torch.mul(full_default_3, full_default_3)
+                 return mul_out
+ 
diff --git a/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch b/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
index b21209a5..a6f17c5d 100644
--- a/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
+++ b/recipe/patches/0021-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
@@ -1,4 +1,4 @@
-From 3f643096358ed552ef5e97ad50d7a48d1c970e73 Mon Sep 17 00:00:00 2001
+From 1780879024ea952f8591aa175a9787f93e697368 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Thu, 30 Jan 2025 08:33:44 +1100
 Subject: [PATCH 21/21] avoid deprecated `find_package(CUDA)` in caffe2 CMake

From bdb9df5ef99b8071bb6e3f6e9213a892aea65054 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sun, 2 Feb 2025 15:39:39 +1100
Subject: [PATCH 22/25] reinstate logs for building libtorch on windows

---
 recipe/bld.bat | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/recipe/bld.bat b/recipe/bld.bat
index ebb43756..ffbf3b6e 100644
--- a/recipe/bld.bat
+++ b/recipe/bld.bat
@@ -29,6 +29,7 @@ if "%blas_impl%" == "generic" (
 
 if "%PKG_NAME%" == "pytorch" (
   set "PIP_ACTION=install"
+  set "PIP_VERBOSITY=-v"
   @REM We build libtorch for a specific python version.
   @REM This ensures its only build once. However, when that version changes
   @REM we need to make sure to update that here.
@@ -58,6 +59,7 @@ if "%PKG_NAME%" == "pytorch" (
   @REM For the main script we just build a wheel for so that the C++/CUDA
   @REM parts are built. Then they are reused in each python version.
   set "PIP_ACTION=wheel"
+  set "PIP_VERBOSITY=-vvv"
 )
 
 set "BUILD_CUSTOM_PROTOBUF=OFF"
@@ -160,7 +162,7 @@ if EXIST build (
     if %ERRORLEVEL% neq 0 exit 1
 )
 
-%PYTHON% -m pip %PIP_ACTION% . --no-build-isolation --no-deps --no-clean
+%PYTHON% -m pip %PIP_ACTION% . --no-build-isolation --no-deps %PIP_VERBOSITY% --no-clean
 if %ERRORLEVEL% neq 0 exit 1
 
 @REM Here we split the build into two parts.

From 44782b3f4b6745ccca97b04b8e2a636edc567379 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sun, 2 Feb 2025 18:25:48 +1100
Subject: [PATCH 23/25] also switch off very noisy ptxas warnings

we get 10'000s of lines of
```
ptxas /tmp/tmpxft_00006937_00000000-8_SparseSemiStructuredOps.compute_86.ptx, line 8606; info    : Advisory: Modifier '.sp::ordered_metadata' should be used on instruction 'mma' instead of modifier '.sp' as it is expected to have substantially reduced performance on some future architectures
```
We cannot directly talk to ptxas, we need to pass this to nvcc, see
https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#ptxas-options-options-xptxas
---
 recipe/bld.bat  | 2 +-
 recipe/build.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/recipe/bld.bat b/recipe/bld.bat
index ffbf3b6e..4089b425 100644
--- a/recipe/bld.bat
+++ b/recipe/bld.bat
@@ -94,7 +94,7 @@ if not "%cuda_compiler_version%" == "None" (
     set "PATH=%CUDA_BIN_PATH%;%PATH%"
     set CUDNN_INCLUDE_DIR=%LIBRARY_PREFIX%\include
     @REM turn off very noisy nvcc warnings
-    set "CUDAFLAGS=-w"
+    set "CUDAFLAGS=-w --ptxas-options=-w"
 ) else (
     set USE_CUDA=0
     @REM MKLDNN is an Apache-2.0 licensed library for DNNs and is used
diff --git a/recipe/build.sh b/recipe/build.sh
index 324e1a00..3cc1ba76 100644
--- a/recipe/build.sh
+++ b/recipe/build.sh
@@ -220,7 +220,7 @@ elif [[ ${cuda_compiler_version} != "None" ]]; then
     export MAGMA_HOME="${PREFIX}"
     export USE_MAGMA=1
     # turn off noisy nvcc warnings
-    export CUDAFLAGS="-w"
+    export CUDAFLAGS="-w --ptxas-options=-w"
 else
     if [[ "$target_platform" != *-64 ]]; then
       # Breakpad seems to not work on aarch64 or ppc64le

From 5ee95f4bc7f954bef665ee3daecb4ae1c18903bf Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Mon, 3 Feb 2025 07:58:04 +1100
Subject: [PATCH 24/25] Reapply "skip test failures with CUDA due to non-unique
 temporaries"

This reverts commit b6808c2fff00fabd4b4d9c1a878491a7933a0660.
---
 recipe/meta.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 1885530d..e1101903 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -484,6 +484,9 @@ outputs:
         # may crash spuriously
         {% set skips = skips ~ " or (TestAutograd and test_profiler_seq_nr)" %}
         {% set skips = skips ~ " or (TestAutograd and test_profiler_propagation)" %}
+        # tests that fail due to resource clean-up issues (non-unique temporary libraries), see
+        # https://github.com/conda-forge/pytorch-cpu-feedstock/pull/318#issuecomment-2620080859
+        {% set skips = skips ~ " or test_mutable_custom_op_fixed_layout" %}             # [cuda_compiler_version != "None"]
         # trivial accuracy problems
         {% set skips = skips ~ " or test_BCELoss_weights_no_reduce_cuda" %}             # [unix and cuda_compiler_version != "None"]
         {% set skips = skips ~ " or test_ctc_loss_cudnn_tensor_cuda " %}                # [unix and cuda_compiler_version != "None"]

From 162a7ebefa5582aa1671b82b455060c92deb1ec9 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Mon, 3 Feb 2025 08:04:57 +1100
Subject: [PATCH 25/25] Revert "reduce verbosity of pip install"

On unix, this apparently busts the CMake cache and causes a full rebuild of pytorch?

This reverts commit e284ed07eaa857f89a4fe57560fbc0f4ebde06a9.
---
 recipe/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipe/build.sh b/recipe/build.sh
index 3cc1ba76..a503b8f9 100644
--- a/recipe/build.sh
+++ b/recipe/build.sh
@@ -253,7 +253,7 @@ case ${PKG_NAME} in
     cp build/CMakeCache.txt build/CMakeCache.txt.orig
     ;;
   pytorch)
-    $PREFIX/bin/python -m pip install . --no-deps --no-build-isolation --no-clean \
+    $PREFIX/bin/python -m pip install . --no-deps --no-build-isolation -vvv --no-clean \
         | sed "s,${CXX},\$\{CXX\},g" \
         | sed "s,${PREFIX},\$\{PREFIX\},g"
     # Keep this in ${PREFIX}/lib so that the library can be found by