From 912c8cde66e88ea0af4c7bfd1f74de1971e6cb04 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Wed, 10 Sep 2025 11:58:19 +0100
Subject: [PATCH 01/32] resync with cf recipe - mainly patch updates

---
 recipe/meta.yaml                              | 16 ++--
 ...1-windows-FindMKL-add-library-suffix.patch | 12 +--
 .../0002-swap-openmp-search-precedence.patch  |  8 +-
 ...-of-python-3-and-error-without-numpy.patch | 10 ++-
 recipe/patches/0004-Help-find-numpy.patch     |  8 +-
 .../patches/0006-Update-sympy-version.patch   |  8 +-
 .../0007-Fix-duplicate-linker-script.patch    | 15 ----
 .../0007-continue-tests-on-failure.patch      |  8 +-
 .../patches/0008-add-missing-includes.patch   |  6 +-
 ...0009-use-prefix-include-for-inductor.patch |  8 +-
 ...E_DIR-relative-to-TORCH_INSTALL_PREF.patch | 10 +--
 ...ON-lib-from-CMake-install-TARGETS-di.patch | 74 +++++++++----------
 ...tils.cpp_extension.include_paths-use.patch | 25 ++++---
 ...oint-include-paths-to-PREFIX-include.patch | 10 ++-
 .../0015-point-lib-paths-to-PREFIX-lib.patch  |  8 +-
 15 files changed, 120 insertions(+), 106 deletions(-)
 delete mode 100644 recipe/patches/0007-Fix-duplicate-linker-script.patch

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 4a34d38f..457ecd1f 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -1,8 +1,8 @@
-{% set version = "2.6.0" %}
+{% set version = "2.7.0" %}
 {% set sha256 = "3005690eb7b083c443a38c7657938af63902f524ad87a6c83f1aca38c77e3b57" %}
 # Set the RC number to build release candidates. Set to None otherwise
 {% set rc = None %}
-{% set build = 6 %}
+{% set build = 0 %}
 
 # Keep this in sync with the release
 {% set smoke_test_commit = "1eba9b3aa3c43f86f4a2c807ac8e12c4a7767340" %}
@@ -30,8 +30,14 @@ package:
 
 source:
 {% if rc != None %}
-  - git_url: https://github.com/pytorch/pytorch.git
-    git_rev: v{{ version }}-rc{{ rc }}
+  # - git_url: https://github.com/pytorch/pytorch.git
+  #   git_rev: v{{ version.replace(".rc", "-rc") }}
+  # we cannot apply patches to submodules when checking out with git_url, because
+  # then conda switches the patch-application to use git, which cannot construct
+  # a usable ancestor from outside the submodule; the only option then is to
+  # pull in the submodules separately.
+  - url: https://github.com/pytorch/pytorch/archive/refs/tags/v{{ version }}.tar.gz
+    sha256: 04ae0a8babdc9cb9dfc4f8746b2b8aa0f8ed0f9e92835cc4af0bcb01e3969e51
 {% else %}
   # The "pytorch-v" tarballs contain submodules; the "pytorch-" ones don't.
   - url: https://github.com/pytorch/pytorch/releases/download/v{{ version }}/pytorch-v{{ version }}.tar.gz
@@ -374,7 +380,7 @@ outputs:
         - nomkl                 # [blas_impl != "mkl"]
         - fsspec
         # Required to support torch.compile. This is tested in smoke_test.py, which is required to pass
-        - triton 3.1.0       # [(gpu_variant or "").startswith("cuda") and (linux and x86_64)]
+        - triton 3.3.0       # [(gpu_variant or "").startswith("cuda") and (linux and x86_64)]
         # avoid that people without GPUs needlessly download ~0.5-1GB
         # The CUDA version constraint is handled in cuda-version as a run_constrained.
         # However, that doesn't enforce that the package requires a GPU; that needs to be done here.
diff --git a/recipe/patches/0001-windows-FindMKL-add-library-suffix.patch b/recipe/patches/0001-windows-FindMKL-add-library-suffix.patch
index ce333117..6c54ee4b 100644
--- a/recipe/patches/0001-windows-FindMKL-add-library-suffix.patch
+++ b/recipe/patches/0001-windows-FindMKL-add-library-suffix.patch
@@ -8,9 +8,11 @@ This is required because our mdl-devel package contains libraries named like
 
 Index: pytorch/cmake/Modules/FindMKL.cmake
 ===================================================================
---- pytorch.orig/cmake/Modules/FindMKL.cmake	2024-11-19 16:11:16.666292111 -0600
-+++ pytorch/cmake/Modules/FindMKL.cmake	2024-11-19 16:14:15.719045239 -0600
-@@ -119,6 +119,9 @@
+diff --git cmake/Modules/FindMKL.cmake cmake/Modules/FindMKL.cmake
+index a9276de..d340c41 100644
+--- a/cmake/Modules/FindMKL.cmake
++++ b/cmake/Modules/FindMKL.cmake
+@@ -119,6 +119,9 @@ ELSE(WIN32)
      ELSE()
        SET(mklthreads "mkl_intel_thread")
        SET(mklrtls "iomp5" "guide")
@@ -20,7 +22,7 @@ Index: pytorch/cmake/Modules/FindMKL.cmake
      ENDIF()
      SET(mklifaces  "intel")
    ENDIF (CMAKE_COMPILER_IS_GNUCC)
-@@ -268,7 +271,7 @@
+@@ -268,7 +271,7 @@ MACRO(CHECK_ALL_LIBRARIES LIBRARIES OPENMP_TYPE OPENMP_LIBRARY _name _list _flag
            ENDIF(OPENMP_FOUND)
          ELSEIF(${_library} MATCHES "iomp")
            SET(_openmp_type "Intel")
@@ -29,7 +31,7 @@ Index: pytorch/cmake/Modules/FindMKL.cmake
            SET(_openmp_library "${${_prefix}_${_library}_LIBRARY}")
          ELSE()
            MESSAGE(FATAL_ERROR "Unknown OpenMP flavor: ${_library}")
-@@ -421,23 +424,23 @@
+@@ -421,23 +424,23 @@ IF (MKL_LIBRARIES)
    FOREACH(mkl64 ${mkl64s} "_core" "")
      FOREACH(mkls ${mklseq} "")
        IF (NOT MKL_LAPACK_LIBRARIES)
diff --git a/recipe/patches/0002-swap-openmp-search-precedence.patch b/recipe/patches/0002-swap-openmp-search-precedence.patch
index 7325ca22..0dbe22bc 100644
--- a/recipe/patches/0002-swap-openmp-search-precedence.patch
+++ b/recipe/patches/0002-swap-openmp-search-precedence.patch
@@ -1,8 +1,10 @@
 Index: pytorch/cmake/Modules/FindMKL.cmake
 ===================================================================
---- pytorch.orig/cmake/Modules/FindMKL.cmake	2024-11-19 16:17:57.874552303 -0600
-+++ pytorch/cmake/Modules/FindMKL.cmake	2024-11-19 16:18:29.915971379 -0600
-@@ -108,8 +108,8 @@
+diff --git cmake/Modules/FindMKL.cmake cmake/Modules/FindMKL.cmake
+index d340c41..3eab320 100644
+--- a/cmake/Modules/FindMKL.cmake
++++ b/cmake/Modules/FindMKL.cmake
+@@ -108,8 +108,8 @@ ELSE(WIN32)
        SET(mklthreads "mkl_tbb_thread")
        SET(mklrtls "tbb")
      ELSE()
diff --git a/recipe/patches/0003-Force-usage-of-python-3-and-error-without-numpy.patch b/recipe/patches/0003-Force-usage-of-python-3-and-error-without-numpy.patch
index b5e09dfa..6141dd63 100644
--- a/recipe/patches/0003-Force-usage-of-python-3-and-error-without-numpy.patch
+++ b/recipe/patches/0003-Force-usage-of-python-3-and-error-without-numpy.patch
@@ -9,9 +9,11 @@ Subject: [PATCH] Force usage of python 3 and error without numpy
 
 Index: pytorch/cmake/Dependencies.cmake
 ===================================================================
---- pytorch.orig/cmake/Dependencies.cmake	2024-11-12 20:18:55.008695666 -0600
-+++ pytorch/cmake/Dependencies.cmake	2024-11-19 16:18:44.359469010 -0600
-@@ -861,9 +861,9 @@
+diff --git cmake/Dependencies.cmake cmake/Dependencies.cmake
+index 1813f44..36b507f 100644
+--- a/cmake/Dependencies.cmake
++++ b/cmake/Dependencies.cmake
+@@ -848,9 +848,9 @@ if(BUILD_PYTHON)
    if(USE_NUMPY)
      list(APPEND PYTHON_COMPONENTS NumPy)
    endif()
@@ -23,7 +25,7 @@ Index: pytorch/cmake/Dependencies.cmake
  endif()
  
  if(NOT Python_Interpreter_FOUND)
-@@ -880,7 +880,7 @@
+@@ -867,7 +867,7 @@ if(BUILD_PYTHON)
    if(Python_Development.Module_FOUND)
      if(USE_NUMPY)
        if(NOT Python_NumPy_FOUND)
diff --git a/recipe/patches/0004-Help-find-numpy.patch b/recipe/patches/0004-Help-find-numpy.patch
index de2a147d..b25296e0 100644
--- a/recipe/patches/0004-Help-find-numpy.patch
+++ b/recipe/patches/0004-Help-find-numpy.patch
@@ -9,9 +9,11 @@ Subject: [PATCH] Help find numpy
 
 Index: pytorch/tools/setup_helpers/cmake.py
 ===================================================================
---- pytorch.orig/tools/setup_helpers/cmake.py	2024-11-12 20:18:55.448001614 -0600
-+++ pytorch/tools/setup_helpers/cmake.py	2024-11-19 16:18:54.574972542 -0600
-@@ -305,9 +305,15 @@
+diff --git tools/setup_helpers/cmake.py tools/setup_helpers/cmake.py
+index 84e4dad..8ce7272 100644
+--- a/tools/setup_helpers/cmake.py
++++ b/tools/setup_helpers/cmake.py
+@@ -306,9 +306,15 @@ class CMake:
              sys.exit(1)
          build_options.update(cmake__options)
  
diff --git a/recipe/patches/0006-Update-sympy-version.patch b/recipe/patches/0006-Update-sympy-version.patch
index c97152fd..852a5594 100644
--- a/recipe/patches/0006-Update-sympy-version.patch
+++ b/recipe/patches/0006-Update-sympy-version.patch
@@ -1,6 +1,8 @@
---- setup.py.orig	2025-05-09 09:31:46.585275237 +0100
-+++ setup.py	2025-05-15 09:20:32.995744486 +0100
-@@ -1099,7 +1099,7 @@
+diff --git setup.py setup.py
+index a6a6db7..083919f 100644
+--- a/setup.py
++++ b/setup.py
+@@ -1099,7 +1099,7 @@ def main():
          "filelock",
          "typing-extensions>=4.10.0",
          'setuptools ; python_version >= "3.12"',
diff --git a/recipe/patches/0007-Fix-duplicate-linker-script.patch b/recipe/patches/0007-Fix-duplicate-linker-script.patch
deleted file mode 100644
index 8aeafbc8..00000000
--- a/recipe/patches/0007-Fix-duplicate-linker-script.patch
+++ /dev/null
@@ -1,15 +0,0 @@
-diff --git a/setup.py b/setup.py
-index 2b0cfa99d71..080dc0aa8e7 100644
---- a/setup.py
-+++ b/setup.py
-@@ -1170,7 +1170,9 @@ def main():
-             filein="cmake/prioritized_text.txt", fout="cmake/linker_script.ld"
-         )
-         linker_script_path = os.path.abspath("cmake/linker_script.ld")
--        os.environ["LDFLAGS"] = os.getenv("LDFLAGS", "") + f" -T{linker_script_path}"
-+        ldflags = os.getenv("LDFLAGS", "")
-+        if f"-T{linker_script_path}" not in ldflags:
-+            os.environ["LDFLAGS"] = ldflags + f" -T{linker_script_path}"
-         os.environ["CFLAGS"] = (
-             os.getenv("CFLAGS", "") + " -ffunction-sections -fdata-sections"
-         )
diff --git a/recipe/patches/0007-continue-tests-on-failure.patch b/recipe/patches/0007-continue-tests-on-failure.patch
index 882b63f7..161fc8ce 100644
--- a/recipe/patches/0007-continue-tests-on-failure.patch
+++ b/recipe/patches/0007-continue-tests-on-failure.patch
@@ -1,8 +1,10 @@
 Index: pytorch/test/run_test.py
 ===================================================================
---- pytorch.orig/test/run_test.py	2024-11-19 16:09:45.970287774 -0600
-+++ pytorch/test/run_test.py	2024-11-19 16:19:29.415077042 -0600
-@@ -1044,7 +1044,7 @@
+diff --git test/run_test.py test/run_test.py
+index a508d8d..02d9af9 100755
+--- a/test/run_test.py
++++ b/test/run_test.py
+@@ -1384,7 +1384,7 @@ def get_pytest_args(options, is_cpp_test=False, is_distributed_test=False):
      else:
          # When under the normal mode, retry a failed test 2 more times. -x means stop at the first
          # failure
diff --git a/recipe/patches/0008-add-missing-includes.patch b/recipe/patches/0008-add-missing-includes.patch
index d476c65b..85bba937 100644
--- a/recipe/patches/0008-add-missing-includes.patch
+++ b/recipe/patches/0008-add-missing-includes.patch
@@ -4,8 +4,10 @@ It won't compile for osx-arm64 metal; unsure why it can on linux-64, but in any
 case, they should be present.
 Index: pytorch/torch/csrc/distributed/c10d/control_plane/Handlers.hpp
 ===================================================================
---- pytorch.orig/torch/csrc/distributed/c10d/control_plane/Handlers.hpp	2024-11-12 20:18:55.762951714 -0600
-+++ pytorch/torch/csrc/distributed/c10d/control_plane/Handlers.hpp	2024-11-19 16:23:52.134859846 -0600
+diff --git torch/csrc/distributed/c10d/control_plane/Handlers.hpp torch/csrc/distributed/c10d/control_plane/Handlers.hpp
+index 70333a3..aa9619e 100644
+--- a/torch/csrc/distributed/c10d/control_plane/Handlers.hpp
++++ b/torch/csrc/distributed/c10d/control_plane/Handlers.hpp
 @@ -4,6 +4,8 @@
  #include <map>
  #include <string>
diff --git a/recipe/patches/0009-use-prefix-include-for-inductor.patch b/recipe/patches/0009-use-prefix-include-for-inductor.patch
index 70f94172..3657ef73 100644
--- a/recipe/patches/0009-use-prefix-include-for-inductor.patch
+++ b/recipe/patches/0009-use-prefix-include-for-inductor.patch
@@ -4,9 +4,11 @@ author: dpetry@anaconda.com
 
 Index: pytorch/torch/_inductor/cpp_builder.py
 ===================================================================
---- pytorch.orig/torch/_inductor/cpp_builder.py	2024-12-16 15:16:47.074821258 -0600
-+++ pytorch/torch/_inductor/cpp_builder.py	2024-12-16 15:17:33.922130106 -0600
-@@ -1055,6 +1055,7 @@
+diff --git torch/_inductor/cpp_builder.py torch/_inductor/cpp_builder.py
+index 92cf88d..5275dfa 100644
+--- a/torch/_inductor/cpp_builder.py
++++ b/torch/_inductor/cpp_builder.py
+@@ -1071,6 +1071,7 @@ def get_cpp_torch_options(
          + python_include_dirs
          + torch_include_dirs
          + omp_include_dir_paths
diff --git a/recipe/patches/0010-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch b/recipe/patches/0010-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
index cc7e33dd..7a8da185 100644
--- a/recipe/patches/0010-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
+++ b/recipe/patches/0010-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
@@ -10,11 +10,11 @@ https://github.com/pytorch/pytorch/blob/v2.5.1/cmake/TorchConfig.cmake.in#L47
  aten/src/ATen/CMakeLists.txt | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
-diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
-index 6d9152a4d07..aa4dd7b05cc 100644
---- a/aten/src/ATen/CMakeLists.txt
-+++ b/aten/src/ATen/CMakeLists.txt
-@@ -563,7 +563,7 @@ if(USE_ROCM)
+diff --git aten/src/ATen/CMakeLists.txt aten/src/ATen/CMakeLists.txt
+index f0868ea..c20ea20 100644
+--- a/aten/src/ATen/aten/src/ATen/CMakeLists.txt
++++ b/aten/src/ATen/aten/src/ATen/CMakeLists.txt
+@@ -604,7 +604,7 @@ if(USE_ROCM)
    # list(APPEND ATen_HIP_DEPENDENCY_LIBS ATEN_CUDA_FILES_GEN_LIB)
  endif()
  
diff --git a/recipe/patches/0011-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch b/recipe/patches/0011-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch
index 9229befc..d182fe52 100644
--- a/recipe/patches/0011-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch
+++ b/recipe/patches/0011-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch
@@ -15,11 +15,11 @@ Suggested-By: Silvio Traversaro <silvio@traversaro.it>
  torch/lib/libshm_windows/CMakeLists.txt |  2 +-
  7 files changed, 15 insertions(+), 15 deletions(-)
 
-diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt
-index 80e172497d5..d7f8987020d 100644
---- a/c10/CMakeLists.txt
-+++ b/c10/CMakeLists.txt
-@@ -162,7 +162,7 @@ if(NOT BUILD_LIBTORCHLESS)
+diff --git c10/CMakeLists.txt c10/CMakeLists.txt
+index 34577ca..8f00b3d 100644
+--- a/c10/c10/CMakeLists.txt
++++ b/c10/c10/CMakeLists.txt
+@@ -163,7 +163,7 @@ if(NOT BUILD_LIBTORCHLESS)
    # Note: for now, we will put all export path into one single Caffe2Targets group
    # to deal with the cmake deployment need. Inside the Caffe2Targets set, the
    # individual libraries like libc10.so and libcaffe2.so are still self-contained.
@@ -28,10 +28,10 @@ index 80e172497d5..d7f8987020d 100644
  endif()
  
  install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
-diff --git a/c10/cuda/CMakeLists.txt b/c10/cuda/CMakeLists.txt
-index 3327dab4779..9336c9e8f77 100644
---- a/c10/cuda/CMakeLists.txt
-+++ b/c10/cuda/CMakeLists.txt
+diff --git c10/cuda/CMakeLists.txt c10/cuda/CMakeLists.txt
+index 3327dab..9336c9e 100644
+--- a/c10/c10/cuda/CMakeLists.txt
++++ b/c10/c10/cuda/CMakeLists.txt
 @@ -82,7 +82,7 @@ if(NOT BUILD_LIBTORCHLESS)
  # Note: for now, we will put all export path into one single Caffe2Targets group
  # to deal with the cmake deployment need. Inside the Caffe2Targets set, the
@@ -41,10 +41,10 @@ index 3327dab4779..9336c9e8f77 100644
  
  endif()
  
-diff --git a/c10/hip/CMakeLists.txt b/c10/hip/CMakeLists.txt
-index f153030e793..514c6d29266 100644
---- a/c10/hip/CMakeLists.txt
-+++ b/c10/hip/CMakeLists.txt
+diff --git c10/hip/CMakeLists.txt c10/hip/CMakeLists.txt
+index f153030..514c6d2 100644
+--- a/c10/c10/hip/CMakeLists.txt
++++ b/c10/c10/hip/CMakeLists.txt
 @@ -55,7 +55,7 @@ if(NOT BUILD_LIBTORCHLESS)
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
        $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
@@ -54,10 +54,10 @@ index f153030e793..514c6d29266 100644
    set(C10_HIP_LIB c10_hip)
  endif()
  
-diff --git a/c10/xpu/CMakeLists.txt b/c10/xpu/CMakeLists.txt
-index 01f77d61713..437ade657f9 100644
---- a/c10/xpu/CMakeLists.txt
-+++ b/c10/xpu/CMakeLists.txt
+diff --git c10/xpu/CMakeLists.txt c10/xpu/CMakeLists.txt
+index 01f77d6..437ade6 100644
+--- a/c10/c10/xpu/CMakeLists.txt
++++ b/c10/c10/xpu/CMakeLists.txt
 @@ -45,7 +45,7 @@ if(NOT BUILD_LIBTORCHLESS)
        $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
        $<INSTALL_INTERFACE:include>
@@ -67,11 +67,11 @@ index 01f77d61713..437ade657f9 100644
    set(C10_XPU_LIB c10_xpu)
    add_subdirectory(test)
  endif()
-diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
-index 9be7f3732f3..b51c7cc637b 100644
---- a/caffe2/CMakeLists.txt
-+++ b/caffe2/CMakeLists.txt
-@@ -549,7 +549,7 @@ if(USE_CUDA)
+diff --git caffe2/CMakeLists.txt caffe2/CMakeLists.txt
+index 33199c7..fc858f3 100644
+--- a/c10/caffe2/CMakeLists.txt
++++ b/c10/caffe2/CMakeLists.txt
+@@ -557,7 +557,7 @@ if(USE_CUDA)
    endif()
  
    target_link_libraries(caffe2_nvrtc PRIVATE caffe2::nvrtc ${DELAY_LOAD_FLAGS})
@@ -80,8 +80,8 @@ index 9be7f3732f3..b51c7cc637b 100644
    if(USE_NCCL)
      list(APPEND Caffe2_GPU_SRCS
        ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp)
-@@ -609,7 +609,7 @@ if(USE_ROCM)
-   target_link_libraries(caffe2_nvrtc ${PYTORCH_HIP_LIBRARIES} ${ROCM_HIPRTC_LIB})
+@@ -628,7 +628,7 @@ if(USE_ROCM)
+   target_link_libraries(caffe2_nvrtc hip::amdhip64 hiprtc::hiprtc)
    target_include_directories(caffe2_nvrtc PRIVATE ${CMAKE_BINARY_DIR})
    target_compile_definitions(caffe2_nvrtc PRIVATE USE_ROCM __HIP_PLATFORM_AMD__)
 -  install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}")
@@ -89,7 +89,7 @@ index 9be7f3732f3..b51c7cc637b 100644
  endif()
  
  if(NOT NO_API AND NOT BUILD_LITE_INTERPRETER)
-@@ -995,7 +995,7 @@ elseif(USE_CUDA)
+@@ -1031,7 +1031,7 @@ elseif(USE_CUDA)
            CUDA::culibos ${CMAKE_DL_LIBS})
      endif()
      set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/cuda/LinearAlgebraStubs.cpp PROPERTIES COMPILE_FLAGS "-DBUILD_LAZY_CUDA_LINALG")
@@ -98,7 +98,7 @@ index 9be7f3732f3..b51c7cc637b 100644
    endif()
  
    if(USE_PRECOMPILED_HEADERS)
-@@ -1467,17 +1467,17 @@ endif()
+@@ -1517,17 +1517,17 @@ endif()
  
  caffe2_interface_library(torch torch_library)
  
@@ -121,7 +121,7 @@ index 9be7f3732f3..b51c7cc637b 100644
  
  target_link_libraries(torch PUBLIC torch_cpu_library)
  
-@@ -1616,7 +1616,7 @@ if(BUILD_SHARED_LIBS)
+@@ -1666,7 +1666,7 @@ if(BUILD_SHARED_LIBS)
        target_link_libraries(torch_global_deps torch::nvtoolsext)
      endif()
    endif()
@@ -130,11 +130,11 @@ index 9be7f3732f3..b51c7cc637b 100644
  endif()
  
  # ---[ Caffe2 HIP sources.
-diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
-index c74b45431c9..80fb5e7734e 100644
---- a/torch/CMakeLists.txt
-+++ b/torch/CMakeLists.txt
-@@ -447,7 +447,7 @@ if(NOT TORCH_PYTHON_LINK_FLAGS STREQUAL "")
+diff --git torch/CMakeLists.txt torch/CMakeLists.txt
+index b123023..650319c 100644
+--- a/c10/torch/CMakeLists.txt
++++ b/c10/torch/CMakeLists.txt
+@@ -458,7 +458,7 @@ if(NOT TORCH_PYTHON_LINK_FLAGS STREQUAL "")
      set_target_properties(torch_python PROPERTIES LINK_FLAGS ${TORCH_PYTHON_LINK_FLAGS})
  endif()
  
@@ -143,10 +143,10 @@ index c74b45431c9..80fb5e7734e 100644
  
  # Generate torch/version.py from the appropriate CMake cache variables.
  if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
-diff --git a/torch/lib/libshm_windows/CMakeLists.txt b/torch/lib/libshm_windows/CMakeLists.txt
-index df2a1064938..5fa15e6be31 100644
---- a/torch/lib/libshm_windows/CMakeLists.txt
-+++ b/torch/lib/libshm_windows/CMakeLists.txt
+diff --git torch/lib/libshm_windows/CMakeLists.txt torch/lib/libshm_windows/CMakeLists.txt
+index df2a106..5fa15e6 100644
+--- a/c10/torch/lib/libshm_windows/CMakeLists.txt
++++ b/c10/torch/lib/libshm_windows/CMakeLists.txt
 @@ -19,7 +19,7 @@ target_include_directories(shm PRIVATE
  target_link_libraries(shm torch c10)
  
@@ -155,4 +155,4 @@ index df2a1064938..5fa15e6be31 100644
 +install(TARGETS shm)
  install(FILES libshm.h DESTINATION "include")
  
- if(MSVC AND BUILD_SHARED_LIBS)
\ No newline at end of file
+ if(MSVC AND BUILD_SHARED_LIBS)
diff --git a/recipe/patches/0013-simplify-torch.utils.cpp_extension.include_paths-use.patch b/recipe/patches/0013-simplify-torch.utils.cpp_extension.include_paths-use.patch
index f9615f0e..52db6cc2 100644
--- a/recipe/patches/0013-simplify-torch.utils.cpp_extension.include_paths-use.patch
+++ b/recipe/patches/0013-simplify-torch.utils.cpp_extension.include_paths-use.patch
@@ -12,15 +12,16 @@ The /TH headers have not existed since pytorch 1.11
 
 Index: pytorch/torch/_inductor/cpp_builder.py
 ===================================================================
---- pytorch.orig/torch/_inductor/cpp_builder.py	2025-03-06 16:00:42.392955757 -0600
-+++ pytorch/torch/_inductor/cpp_builder.py	2025-03-06 16:00:42.456841659 -0600
-@@ -743,16 +743,9 @@
+diff --git torch/_inductor/cpp_builder.py torch/_inductor/cpp_builder.py
+index 5275dfa..5b455ff 100644
+--- a/torch/_inductor/cpp_builder.py
++++ b/torch/_inductor/cpp_builder.py
+@@ -764,16 +764,9 @@ def _get_build_args_of_chosen_isa(vec_isa: VecISA) -> Tuple[List[str], List[str]
  def _get_torch_related_args(
      include_pytorch: bool, aot_mode: bool
  ) -> Tuple[List[str], List[str], List[str]]:
 -    from torch.utils.cpp_extension import _TORCH_PATH, TORCH_LIB_PATH
-+    from torch.utils.cpp_extension import include_paths, TORCH_LIB_PATH
- 
+-
 -    include_dirs = [
 -        os.path.join(_TORCH_PATH, "include"),
 -        os.path.join(_TORCH_PATH, "include", "torch", "csrc", "api", "include"),
@@ -29,15 +30,17 @@ Index: pytorch/torch/_inductor/cpp_builder.py
 -        os.path.join(_TORCH_PATH, "include", "TH"),
 -        os.path.join(_TORCH_PATH, "include", "THC"),
 -    ]
++    from torch.utils.cpp_extension import include_paths, TORCH_LIB_PATH
++
 +    include_dirs = include_paths()
      libraries_dirs = [TORCH_LIB_PATH]
      libraries = []
      if sys.platform != "darwin" and not config.is_fbcode():
-Index: pytorch/torch/utils/cpp_extension.py
-===================================================================
---- pytorch.orig/torch/utils/cpp_extension.py	2024-12-16 15:07:01.482833535 -0600
-+++ pytorch/torch/utils/cpp_extension.py	2025-03-06 16:01:11.398235000 -0600
-@@ -1159,10 +1159,6 @@
+diff --git torch/utils/cpp_extension.py torch/utils/cpp_extension.py
+index b4a70dc..23e2499 100644
+--- a/torch/utils/cpp_extension.py
++++ b/torch/utils/cpp_extension.py
+@@ -1212,10 +1212,6 @@ def include_paths(device_type: str = "cpu") -> List[str]:
          lib_include,
          # Remove this once torch/torch.h is officially no longer supported for C++ extensions.
          os.path.join(lib_include, 'torch', 'csrc', 'api', 'include'),
@@ -46,5 +49,5 @@ Index: pytorch/torch/utils/cpp_extension.py
 -        os.path.join(lib_include, 'TH'),
 -        os.path.join(lib_include, 'THC')
      ]
-     if cuda and IS_HIP_EXTENSION:
+     if device_type == "cuda" and IS_HIP_EXTENSION:
          paths.append(os.path.join(lib_include, 'THH'))
diff --git a/recipe/patches/0014-point-include-paths-to-PREFIX-include.patch b/recipe/patches/0014-point-include-paths-to-PREFIX-include.patch
index d5961fbc..3c2d6130 100644
--- a/recipe/patches/0014-point-include-paths-to-PREFIX-include.patch
+++ b/recipe/patches/0014-point-include-paths-to-PREFIX-include.patch
@@ -9,9 +9,11 @@ Subject: [PATCH 10/15] point include paths to $PREFIX/include
 
 Index: pytorch/torch/utils/cpp_extension.py
 ===================================================================
---- pytorch.orig/torch/utils/cpp_extension.py	2025-03-06 16:00:42.457678160 -0600
-+++ pytorch/torch/utils/cpp_extension.py	2025-03-06 16:00:42.489486590 -0600
-@@ -1155,10 +1155,28 @@
+diff --git torch/utils/cpp_extension.py torch/utils/cpp_extension.py
+index 23e2499..a8caba3 100644
+--- a/torch/utils/cpp_extension.py
++++ b/torch/utils/cpp_extension.py
+@@ -1208,10 +1208,28 @@ def include_paths(device_type: str = "cpu") -> List[str]:
          A list of include path strings.
      """
      lib_include = os.path.join(_TORCH_PATH, 'include')
@@ -38,5 +40,5 @@ Index: pytorch/torch/utils/cpp_extension.py
 +        # $PREFIX/include), as some torch-internal headers are still in this directory
 +        os.path.join(_TORCH_PATH, 'include'),
      ]
-     if cuda and IS_HIP_EXTENSION:
+     if device_type == "cuda" and IS_HIP_EXTENSION:
          paths.append(os.path.join(lib_include, 'THH'))
diff --git a/recipe/patches/0015-point-lib-paths-to-PREFIX-lib.patch b/recipe/patches/0015-point-lib-paths-to-PREFIX-lib.patch
index 63003b6f..21322ba0 100644
--- a/recipe/patches/0015-point-lib-paths-to-PREFIX-lib.patch
+++ b/recipe/patches/0015-point-lib-paths-to-PREFIX-lib.patch
@@ -8,9 +8,11 @@ that's where are sos/dlls are.
 
 Index: pytorch/torch/utils/cpp_extension.py
 ===================================================================
---- pytorch.orig/torch/utils/cpp_extension.py	2025-03-06 16:01:26.600202667 -0600
-+++ pytorch/torch/utils/cpp_extension.py	2025-03-06 16:02:39.676025365 -0600
-@@ -38,7 +38,18 @@
+diff --git torch/utils/cpp_extension.py torch/utils/cpp_extension.py
+index a8caba3..b2d28ab 100644
+--- a/torch/utils/cpp_extension.py
++++ b/torch/utils/cpp_extension.py
+@@ -39,7 +39,18 @@ SHARED_FLAG = '/DLL' if IS_WINDOWS else '-shared'
  
  _HERE = os.path.abspath(__file__)
  _TORCH_PATH = os.path.dirname(os.path.dirname(_HERE))

From 6d18b42bd551d62604a003d71c9f9ab5c17a7c25 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Wed, 10 Sep 2025 12:22:22 +0100
Subject: [PATCH 02/32] remove old patches and repair zip keys

---
 recipe/conda_build_config.yaml                |  4 +-
 recipe/meta.yaml                              |  6 +--
 .../patches/0006-Update-sympy-version.patch   | 13 -----
 ...tils.cpp_extension.include_paths-use.patch | 53 -------------------
 4 files changed, 3 insertions(+), 73 deletions(-)
 delete mode 100644 recipe/patches/0006-Update-sympy-version.patch
 delete mode 100644 recipe/patches/0013-simplify-torch.utils.cpp_extension.include_paths-use.patch

diff --git a/recipe/conda_build_config.yaml b/recipe/conda_build_config.yaml
index a35919a2..88667118 100644
--- a/recipe/conda_build_config.yaml
+++ b/recipe/conda_build_config.yaml
@@ -12,10 +12,10 @@ cxx_compiler_version:    # [osx]
 # However there's currently a conda-forge bug that prevents this: https://github.com/conda/conda-build/issues/5048
 MACOSX_SDK_VERSION:          # [(osx and arm64)]
   - 11.1                     # [(osx and arm64)]
-  #- 13.3                     # [(osx and arm64)]
+  - 13.3                     # [(osx and arm64)]
 CONDA_BUILD_SYSROOT:         # [(osx and arm64)]
   - /Library/Developer/CommandLineTools/SDKs/MacOSX11.1.sdk  # [(osx and arm64)]
-  #- /Library/Developer/CommandLineTools/SDKs/MacOSX13.3.sdk  # [(osx and arm64)]
+  - /Library/Developer/CommandLineTools/SDKs/MacOSX13.3.sdk  # [(osx and arm64)]
 zip_keys:                    # [(osx and arm64)]
   - gpu_variant              # [(osx and arm64)]
   - MACOSX_SDK_VERSION       # [(osx and arm64)]
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 457ecd1f..095940d3 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -1,5 +1,5 @@
 {% set version = "2.7.0" %}
-{% set sha256 = "3005690eb7b083c443a38c7657938af63902f524ad87a6c83f1aca38c77e3b57" %}
+{% set sha256 = "ecca266fa2de4235a9fd5a18a33299a9de55ab3babb87f8c297c1c9ab8d436bd" %}
 # Set the RC number to build release candidates. Set to None otherwise
 {% set rc = None %}
 {% set build = 0 %}
@@ -48,16 +48,12 @@ source:
       - patches/0003-Force-usage-of-python-3-and-error-without-numpy.patch
       # https://github.com/pytorch/pytorch/pull/137084
       - patches/0004-Help-find-numpy.patch
-      # sympy 1.13.2 was reported to result in test failures on Windows and mac - skipping to 1.13.3 for stability
-      # https://github.com/pytorch/pytorch/pull/133235
-      - patches/0006-Update-sympy-version.patch
       - patches/0007-continue-tests-on-failure.patch
       - patches/0008-add-missing-includes.patch
       - patches/0009-use-prefix-include-for-inductor.patch
       - patches/0010-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch 
       - patches/0011-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch             # [win]
       - patches_submodules/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch  # [win]
-      - patches/0013-simplify-torch.utils.cpp_extension.include_paths-use.patch
       - patches/0014-point-include-paths-to-PREFIX-include.patch
       - patches/0015-point-lib-paths-to-PREFIX-lib.patch
 {% endif %}
diff --git a/recipe/patches/0006-Update-sympy-version.patch b/recipe/patches/0006-Update-sympy-version.patch
deleted file mode 100644
index 852a5594..00000000
--- a/recipe/patches/0006-Update-sympy-version.patch
+++ /dev/null
@@ -1,13 +0,0 @@
-diff --git setup.py setup.py
-index a6a6db7..083919f 100644
---- a/setup.py
-+++ b/setup.py
-@@ -1099,7 +1099,7 @@ def main():
-         "filelock",
-         "typing-extensions>=4.10.0",
-         'setuptools ; python_version >= "3.12"',
--        'sympy==1.13.1 ; python_version >= "3.9"',
-+        'sympy>=1.13.3',
-         "networkx",
-         "jinja2",
-         "fsspec",
diff --git a/recipe/patches/0013-simplify-torch.utils.cpp_extension.include_paths-use.patch b/recipe/patches/0013-simplify-torch.utils.cpp_extension.include_paths-use.patch
deleted file mode 100644
index 52db6cc2..00000000
--- a/recipe/patches/0013-simplify-torch.utils.cpp_extension.include_paths-use.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From 12a4473ae7a47da2a30121f329a2c3c8f3f456c5 Mon Sep 17 00:00:00 2001
-From: "H. Vetinari" <h.vetinari@gmx.com>
-Date: Thu, 23 Jan 2025 22:46:58 +1100
-Subject: [PATCH 09/15] simplify torch.utils.cpp_extension.include_paths; use
- it in cpp_builder
-
-The /TH headers have not existed since pytorch 1.11
----
- torch/_inductor/cpp_builder.py | 13 +++----------
- torch/utils/cpp_extension.py   |  4 ----
- 2 files changed, 3 insertions(+), 14 deletions(-)
-
-Index: pytorch/torch/_inductor/cpp_builder.py
-===================================================================
-diff --git torch/_inductor/cpp_builder.py torch/_inductor/cpp_builder.py
-index 5275dfa..5b455ff 100644
---- a/torch/_inductor/cpp_builder.py
-+++ b/torch/_inductor/cpp_builder.py
-@@ -764,16 +764,9 @@ def _get_build_args_of_chosen_isa(vec_isa: VecISA) -> Tuple[List[str], List[str]
- def _get_torch_related_args(
-     include_pytorch: bool, aot_mode: bool
- ) -> Tuple[List[str], List[str], List[str]]:
--    from torch.utils.cpp_extension import _TORCH_PATH, TORCH_LIB_PATH
--
--    include_dirs = [
--        os.path.join(_TORCH_PATH, "include"),
--        os.path.join(_TORCH_PATH, "include", "torch", "csrc", "api", "include"),
--        # Some internal (old) Torch headers don't properly prefix their includes,
--        # so we need to pass -Itorch/lib/include/TH as well.
--        os.path.join(_TORCH_PATH, "include", "TH"),
--        os.path.join(_TORCH_PATH, "include", "THC"),
--    ]
-+    from torch.utils.cpp_extension import include_paths, TORCH_LIB_PATH
-+
-+    include_dirs = include_paths()
-     libraries_dirs = [TORCH_LIB_PATH]
-     libraries = []
-     if sys.platform != "darwin" and not config.is_fbcode():
-diff --git torch/utils/cpp_extension.py torch/utils/cpp_extension.py
-index b4a70dc..23e2499 100644
---- a/torch/utils/cpp_extension.py
-+++ b/torch/utils/cpp_extension.py
-@@ -1212,10 +1212,6 @@ def include_paths(device_type: str = "cpu") -> List[str]:
-         lib_include,
-         # Remove this once torch/torch.h is officially no longer supported for C++ extensions.
-         os.path.join(lib_include, 'torch', 'csrc', 'api', 'include'),
--        # Some internal (old) Torch headers don't properly prefix their includes,
--        # so we need to pass -Itorch/lib/include/TH as well.
--        os.path.join(lib_include, 'TH'),
--        os.path.join(lib_include, 'THC')
-     ]
-     if device_type == "cuda" and IS_HIP_EXTENSION:
-         paths.append(os.path.join(lib_include, 'THH'))

From 6cf6aedc1ab1f23961d319c3685fe979eebae5d2 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Thu, 11 Sep 2025 07:27:37 +0100
Subject: [PATCH 03/32] add nccl patch

---
 recipe/meta.yaml                              |  1 +
 ...-check-out-nccl-when-not-building-it.patch | 45 +++++++++++++++++++
 2 files changed, 46 insertions(+)
 create mode 100644 recipe/patches/0016-Do-not-check-out-nccl-when-not-building-it.patch

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 095940d3..09beb463 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -56,6 +56,7 @@ source:
       - patches_submodules/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch  # [win]
       - patches/0014-point-include-paths-to-PREFIX-include.patch
       - patches/0015-point-lib-paths-to-PREFIX-lib.patch
+      - patches/0016-Do-not-check-out-nccl-when-not-building-it.patch
 {% endif %}
   - url: https://raw.githubusercontent.com/pytorch/pytorch/{{ smoke_test_commit }}/.ci/pytorch/smoke_test/smoke_test.py
     folder: smoke_test
diff --git a/recipe/patches/0016-Do-not-check-out-nccl-when-not-building-it.patch b/recipe/patches/0016-Do-not-check-out-nccl-when-not-building-it.patch
new file mode 100644
index 00000000..2e0527c2
--- /dev/null
+++ b/recipe/patches/0016-Do-not-check-out-nccl-when-not-building-it.patch
@@ -0,0 +1,45 @@
+From 89e36459795a0d680ce902e53983419115c646dd Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
+Date: Wed, 30 Apr 2025 17:11:56 +0200
+Subject: [PATCH] Do not check out nccl when not building it
+
+Add additional conditions to `build_pytorch_libs.py` to avoid fetching
+NCCL when `USE_CUDA` or `USE_NCCL` are disabled. While at it, adjust
+the existing condition for `USE_SYSTEM_NCCL` to use the utility
+function.
+---
+ tools/build_pytorch_libs.py | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/tools/build_pytorch_libs.py b/tools/build_pytorch_libs.py
+index 5dd5a2219..2b8b868ea 100644
+--- a/tools/build_pytorch_libs.py
++++ b/tools/build_pytorch_libs.py
+@@ -7,7 +7,12 @@ from glob import glob
+ from pathlib import Path
+ 
+ from .setup_helpers.cmake import CMake, USE_NINJA
+-from .setup_helpers.env import check_negative_env_flag, IS_64BIT, IS_WINDOWS
++from .setup_helpers.env import (
++    check_env_flag,
++    check_negative_env_flag,
++    IS_64BIT,
++    IS_WINDOWS,
++)
+ 
+ 
+ repo_root = Path(__file__).absolute().parent.parent
+@@ -119,7 +124,12 @@ def build_pytorch(
+     cmake: CMake,
+ ) -> None:
+     my_env = _create_build_env()
+-    checkout_nccl()
++    if (
++        not check_negative_env_flag("USE_CUDA")
++        and not check_negative_env_flag("USE_NCCL")
++        and not check_env_flag("USE_SYSTEM_NCCL")
++    ):
++        checkout_nccl()
+     build_test = not check_negative_env_flag("BUILD_TEST")
+     cmake.generate(
+         version, cmake_python_library, build_python, build_test, my_env, rerun_cmake
\ No newline at end of file

From 1cb8a8185606ca61f762e583ee19f39d17a702e1 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Wed, 17 Sep 2025 14:09:12 +0100
Subject: [PATCH 04/32] ensure test is explictly not being ran. They're listed
 as a known issue in this release and can break on CI

---
 recipe/meta.yaml                              |  6 ++++-
 .../0007-continue-tests-on-failure.patch      | 23 +++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 09beb463..22b9bc03 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -48,6 +48,7 @@ source:
       - patches/0003-Force-usage-of-python-3-and-error-without-numpy.patch
       # https://github.com/pytorch/pytorch/pull/137084
       - patches/0004-Help-find-numpy.patch
+      # https://github.com/pytorch/pytorch/issues/150918 - continue tests on failure due to flaky tests
       - patches/0007-continue-tests-on-failure.patch
       - patches/0008-add-missing-includes.patch
       - patches/0009-use-prefix-include-for-inductor.patch
@@ -455,7 +456,10 @@ outputs:
         # Note that the `|| true` expression will make the build continue even if the whole script falls over completely
         # (for example, in the case of missing imports). There doesn't seem to be a way of making a script exception return
         # non-zero but failing tests return zero.
-        - python ./test/run_test.py --core --continue-through-error || true
+        # ------------------------------------------------------------------------------------------------
+        # Exclude complex tests that are known to be flaky for -k "not (complex and (linalg_vecdot or dot or vdot))"
+        # https://github.com/pytorch/pytorch/issues/150918
+        - python ./test/run_test.py --core --continue-through-error -k "not (complex and (linalg_vecdot or dot or vdot))" || true
         # The inductor tests test the torch.compile backend. Using the options below avoids running distributed tests,
         # which would be run if we used the --inductor option. (Distributed tests would only be correctly run on a multi-gpu test platform,
         # which we don't have.)
diff --git a/recipe/patches/0007-continue-tests-on-failure.patch b/recipe/patches/0007-continue-tests-on-failure.patch
index 161fc8ce..8e8ea31e 100644
--- a/recipe/patches/0007-continue-tests-on-failure.patch
+++ b/recipe/patches/0007-continue-tests-on-failure.patch
@@ -1,3 +1,26 @@
+
+From: Jamie Robertson <jrobertson@anaconda.com>
+Date: Web, 17 Oct 2025 00:28:40 -0000
+Subject: [PATCH] Continue tests on failure due to flaky tests
+
+This is a workaround to allow the build to continue even if some tests fail due to flaky tests.
+---
+Index: pytorch/test/run_test.py
+===================================================================
+diff --git test/run_test.py test/run_test.py
+index a508d8d..02d9af9 100755
+--- a/test/run_test.py
++++ b/test/run_test.py
+@@ -1384,7 +1384,7 @@ def get_pytest_args(options, is_cpp_test=False, is_distributed_test=False):
+     else:
+         # When under the normal mode, retry a failed test 2 more times. -x means stop at the first
+         # failure
+-        rerun_options = ["-x", "--reruns=2"]
++        rerun_options = ["--reruns=2"]
+ 
+     pytest_args = [
+         "-vv",
+---
 Index: pytorch/test/run_test.py
 ===================================================================
 diff --git test/run_test.py test/run_test.py

From 2dbe6b3915690e9e78d663eaf60ab19b5330bf5e Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Thu, 18 Sep 2025 09:03:35 +0100
Subject: [PATCH 05/32] megabuild false

---
 recipe/conda_build_config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/recipe/conda_build_config.yaml b/recipe/conda_build_config.yaml
index 88667118..802a3447 100644
--- a/recipe/conda_build_config.yaml
+++ b/recipe/conda_build_config.yaml
@@ -27,8 +27,8 @@ zip_keys:                    # [(osx and arm64)]
 # Conda-forge didn't do a "megabuild" on osx because it pushed their CI runners over their 6-hour limit. We don't have
 # such a limit.
 megabuild:
-- true
-#- false     # [osx]
+# - true
+- false
 
 # The version of python to use when building libtorch in a "megabuild"
 megabuild_python:

From 4b1bbe365baf4006eaa23fecd94a3fd92284825e Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Thu, 18 Sep 2025 09:37:09 +0100
Subject: [PATCH 06/32] remove header

---
 recipe/patches/0007-continue-tests-on-failure.patch | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/recipe/patches/0007-continue-tests-on-failure.patch b/recipe/patches/0007-continue-tests-on-failure.patch
index 8e8ea31e..6e0f91ed 100644
--- a/recipe/patches/0007-continue-tests-on-failure.patch
+++ b/recipe/patches/0007-continue-tests-on-failure.patch
@@ -1,10 +1,3 @@
-
-From: Jamie Robertson <jrobertson@anaconda.com>
-Date: Web, 17 Oct 2025 00:28:40 -0000
-Subject: [PATCH] Continue tests on failure due to flaky tests
-
-This is a workaround to allow the build to continue even if some tests fail due to flaky tests.
----
 Index: pytorch/test/run_test.py
 ===================================================================
 diff --git test/run_test.py test/run_test.py

From 64a7f3a0d493c497dbcf23db1df8e60569302f29 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Thu, 18 Sep 2025 09:53:54 +0100
Subject: [PATCH 07/32] duplicate patch deleted

---
 .../patches/0007-continue-tests-on-failure.patch | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/recipe/patches/0007-continue-tests-on-failure.patch b/recipe/patches/0007-continue-tests-on-failure.patch
index 6e0f91ed..161fc8ce 100644
--- a/recipe/patches/0007-continue-tests-on-failure.patch
+++ b/recipe/patches/0007-continue-tests-on-failure.patch
@@ -13,19 +13,3 @@ index a508d8d..02d9af9 100755
  
      pytest_args = [
          "-vv",
----
-Index: pytorch/test/run_test.py
-===================================================================
-diff --git test/run_test.py test/run_test.py
-index a508d8d..02d9af9 100755
---- a/test/run_test.py
-+++ b/test/run_test.py
-@@ -1384,7 +1384,7 @@ def get_pytest_args(options, is_cpp_test=False, is_distributed_test=False):
-     else:
-         # When under the normal mode, retry a failed test 2 more times. -x means stop at the first
-         # failure
--        rerun_options = ["-x", "--reruns=2"]
-+        rerun_options = ["--reruns=2"]
- 
-     pytest_args = [
-         "-vv",

From 2471496bcfe880dcd547d5653f8ac31d7c199098 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Tue, 23 Sep 2025 09:54:01 +0100
Subject: [PATCH 08/32] remove abs logic and remove cuda depend explicitly as
 it is getting a conflict

---
 abs.yaml                       | 3 ---
 recipe/conda_build_config.yaml | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/abs.yaml b/abs.yaml
index c317de07..0756ca38 100644
--- a/abs.yaml
+++ b/abs.yaml
@@ -1,6 +1,3 @@
-build_env_vars:
-  ANACONDA_ROCKET_ENABLE_PY313 : yes
-  
 # macOS 12.3 or above is required for running the GPU variant (MPS support). No way to specify this for only the GPU
 # variant, so it's specified for both.
 extra_labels_for_os:
diff --git a/recipe/conda_build_config.yaml b/recipe/conda_build_config.yaml
index 802a3447..5a46edf1 100644
--- a/recipe/conda_build_config.yaml
+++ b/recipe/conda_build_config.yaml
@@ -1,7 +1,7 @@
 gpu_variant:
   - cpu
   - metal                    # [(osx and arm64)]
-  - cuda-12                  # [(linux and x86_64)]
+#  - cuda-12                  # [(linux and x86_64)]
 c_compiler_version:      # [osx]
   - 17                   # [osx]
 cxx_compiler_version:    # [osx]

From 46ffceb683e0ff5e82092c7249d84a03b30c5382 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Mon, 29 Sep 2025 09:41:32 +0100
Subject: [PATCH 09/32] stdlibc for openblas compat

---
 recipe/meta.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 22b9bc03..96cba4ca 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -91,7 +91,7 @@ requirements:
     - cross-python_{{ target_platform }}     # [build_platform != target_platform]
     - numpy  *                               # [megabuild and build_platform != target_platform]
     - numpy                                  # [not megabuild and build_platform != target_platform]
-    #- {{ stdlib('c') }}
+    - {{ stdlib('c') }}
     - {{ compiler('c') }}
     - {{ compiler('cxx') }}
     - {{ compiler('cuda') }}                 # [(gpu_variant or "").startswith("cuda")]
@@ -265,7 +265,7 @@ outputs:
         - python                                 # [build_platform != target_platform]
         - cross-python_{{ target_platform }}     # [build_platform != target_platform]
         - numpy                                  # [build_platform != target_platform]
-        #- {{ stdlib('c') }}
+        - {{ stdlib('c') }}
         - {{ compiler('c') }}
         - {{ compiler('cxx') }}
         - {{ compiler('cuda') }}                 # [(gpu_variant or "").startswith("cuda")]

From f5833339ea324f0682879b66e146fe223de3e363 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Tue, 30 Sep 2025 09:38:53 +0100
Subject: [PATCH 10/32] mkl missmatch patch from pytorch repo

---
 recipe/meta.yaml                              |   1 +
 .../patches/0017-mkl-version-mismatch.patch   | 103 ++++++++++++++++++
 2 files changed, 104 insertions(+)
 create mode 100644 recipe/patches/0017-mkl-version-mismatch.patch

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 96cba4ca..e55004d8 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -58,6 +58,7 @@ source:
       - patches/0014-point-include-paths-to-PREFIX-include.patch
       - patches/0015-point-lib-paths-to-PREFIX-lib.patch
       - patches/0016-Do-not-check-out-nccl-when-not-building-it.patch
+      - patches/0017-mkl-version-mismatch.patch
 {% endif %}
   - url: https://raw.githubusercontent.com/pytorch/pytorch/{{ smoke_test_commit }}/.ci/pytorch/smoke_test/smoke_test.py
     folder: smoke_test
diff --git a/recipe/patches/0017-mkl-version-mismatch.patch b/recipe/patches/0017-mkl-version-mismatch.patch
new file mode 100644
index 00000000..1e956ea6
--- /dev/null
+++ b/recipe/patches/0017-mkl-version-mismatch.patch
@@ -0,0 +1,103 @@
+From a3ece7552117377895304744892fc4100656fd1e Mon Sep 17 00:00:00 2001
+From: CaoE <e.cao@intel.com>
+Date: Wed, 28 May 2025 20:04:24 -0700
+Subject: [PATCH 1/3] Update
+
+[ghstack-poisoned]
+---
+ aten/src/ATen/native/mkl/SpectralOps.cpp | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/aten/src/ATen/native/mkl/SpectralOps.cpp b/aten/src/ATen/native/mkl/SpectralOps.cpp
+index 8deefaade89c..917bd31f6eb9 100644
+--- a/aten/src/ATen/native/mkl/SpectralOps.cpp
++++ b/aten/src/ATen/native/mkl/SpectralOps.cpp
+@@ -479,6 +479,16 @@ static Tensor& _exec_fft(Tensor& out, const Tensor& self, IntArrayRef out_sizes,
+   const auto value_type = c10::toRealValueType(input.scalar_type());
+   out.resize_(batched_out_sizes, MemoryFormat::Contiguous);
+ 
++  // fix mkl issue
++  // https://github.com/pytorch/pytorch/issues/154477
++  auto istrides = input.strides();
++  for (const auto& stride : istrides) {
++    if (stride == 0) {
++      input = input.clone(MemoryFormat::Contiguous);
++      break;
++    }
++  }
++
+   auto descriptor = _plan_mkl_fft(
+       input.strides(), out.strides(), signal_size, input.is_complex(),
+       out.is_complex(), normalization, forward, value_type);
+
+From fe2e49da9a6b344c4cfa98b22b597ac5f4f81017 Mon Sep 17 00:00:00 2001
+From: CaoE <e.cao@intel.com>
+Date: Mon, 9 Jun 2025 22:08:12 -0700
+Subject: [PATCH 2/3] Update
+
+[ghstack-poisoned]
+---
+ aten/src/ATen/native/mkl/SpectralOps.cpp | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/aten/src/ATen/native/mkl/SpectralOps.cpp b/aten/src/ATen/native/mkl/SpectralOps.cpp
+index 6864e2f9b418..979d1e7b1ae0 100644
+--- a/aten/src/ATen/native/mkl/SpectralOps.cpp
++++ b/aten/src/ATen/native/mkl/SpectralOps.cpp
+@@ -481,8 +481,12 @@ static Tensor& _exec_fft(Tensor& out, const Tensor& self, IntArrayRef out_sizes,
+ 
+   // fix mkl issue
+   // https://github.com/pytorch/pytorch/issues/154477
+-  if (signal_size[0] > 1 && input.strides()[0] == 0) {
+-    input = input.clone(MemoryFormat::Contiguous);
++  auto istrides = input.strides();
++  for (const auto& stride : istrides) {
++    if (stride == 0) {
++      input = input.clone(MemoryFormat::Contiguous);
++      break;
++    }
+   }
+ 
+   auto descriptor = _plan_mkl_fft(
+
+From ffa596fd9d1265883c2d76f637b5ea362c9f2922 Mon Sep 17 00:00:00 2001
+From: CaoE <e.cao@intel.com>
+Date: Tue, 8 Jul 2025 19:30:20 -0700
+Subject: [PATCH 3/3] Update
+
+[ghstack-poisoned]
+---
+ aten/src/ATen/native/mkl/SpectralOps.cpp | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/aten/src/ATen/native/mkl/SpectralOps.cpp b/aten/src/ATen/native/mkl/SpectralOps.cpp
+index 979d1e7b1ae0..4aa53c5e794b 100644
+--- a/aten/src/ATen/native/mkl/SpectralOps.cpp
++++ b/aten/src/ATen/native/mkl/SpectralOps.cpp
+@@ -337,6 +337,7 @@ Tensor _fft_c2c_mkl(const Tensor& self, IntArrayRef dim, int64_t normalization,
+ #include <cmath>
+ 
+ #include <mkl_dfti.h>
++#include <mkl_version.h>
+ #include <ATen/mkl/Exceptions.h>
+ #include <ATen/mkl/Descriptors.h>
+ #include <ATen/mkl/Limits.h>
+@@ -481,13 +482,16 @@ static Tensor& _exec_fft(Tensor& out, const Tensor& self, IntArrayRef out_sizes,
+ 
+   // fix mkl issue
+   // https://github.com/pytorch/pytorch/issues/154477
+-  auto istrides = input.strides();
+-  for (const auto& stride : istrides) {
++#ifdef INTEL_MKL_VERSION
++#if INTEL_MKL_VERSION > 20210400L
++  for (const auto& stride : input.strides()) {
+     if (stride == 0) {
+       input = input.clone(MemoryFormat::Contiguous);
+       break;
+     }
+   }
++#endif
++#endif
+ 
+   auto descriptor = _plan_mkl_fft(
+       input.strides(), out.strides(), signal_size, input.is_complex(),
\ No newline at end of file

From cdc5b39105aed0d3f18769f63b2fc9eeed3f2c7a Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Tue, 30 Sep 2025 10:29:20 +0100
Subject: [PATCH 11/32] simplify patch

---
 .../patches/0017-mkl-version-mismatch.patch   | 112 ++++--------------
 1 file changed, 22 insertions(+), 90 deletions(-)

diff --git a/recipe/patches/0017-mkl-version-mismatch.patch b/recipe/patches/0017-mkl-version-mismatch.patch
index 1e956ea6..a50de9c1 100644
--- a/recipe/patches/0017-mkl-version-mismatch.patch
+++ b/recipe/patches/0017-mkl-version-mismatch.patch
@@ -1,103 +1,35 @@
-From a3ece7552117377895304744892fc4100656fd1e Mon Sep 17 00:00:00 2001
-From: CaoE <e.cao@intel.com>
-Date: Wed, 28 May 2025 20:04:24 -0700
-Subject: [PATCH 1/3] Update
+From 714ead5bf5c7e7ac0f91934232af2e1966b562fb Mon Sep 17 00:00:00 2001
+From: "Zheng, Zhaoqiong" <zhaoqiong.zheng@intel.com>
+Date: Fri, 27 Dec 2024 13:49:36 +0800
+Subject: [PATCH] fix issue 142484
 
-[ghstack-poisoned]
+From https://github.com/pytorch/pytorch/pull/143894
 ---
- aten/src/ATen/native/mkl/SpectralOps.cpp | 10 ++++++++++
- 1 file changed, 10 insertions(+)
+ aten/src/ATen/native/mkl/SpectralOps.cpp | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
 
 diff --git a/aten/src/ATen/native/mkl/SpectralOps.cpp b/aten/src/ATen/native/mkl/SpectralOps.cpp
-index 8deefaade89c..917bd31f6eb9 100644
+index e26cfbf6d..c61b76d32 100644
 --- a/aten/src/ATen/native/mkl/SpectralOps.cpp
 +++ b/aten/src/ATen/native/mkl/SpectralOps.cpp
-@@ -479,6 +479,16 @@ static Tensor& _exec_fft(Tensor& out, const Tensor& self, IntArrayRef out_sizes,
+@@ -477,7 +477,17 @@ static Tensor& _exec_fft(Tensor& out, const Tensor& self, IntArrayRef out_sizes,
+ 
    const auto value_type = c10::toRealValueType(input.scalar_type());
    out.resize_(batched_out_sizes, MemoryFormat::Contiguous);
- 
-+  // fix mkl issue
-+  // https://github.com/pytorch/pytorch/issues/154477
-+  auto istrides = input.strides();
-+  for (const auto& stride : istrides) {
-+    if (stride == 0) {
+-
++  auto astrides = input.strides();
++  bool all_zero = true;
++  for (const auto& stride : astrides) {
++      if (stride != 0) {
++          all_zero = false;
++          break;
++      }
++  }
++  if (all_zero) {
 +      input = input.clone(MemoryFormat::Contiguous);
-+      break;
-+    }
 +  }
-+
    auto descriptor = _plan_mkl_fft(
        input.strides(), out.strides(), signal_size, input.is_complex(),
        out.is_complex(), normalization, forward, value_type);
-
-From fe2e49da9a6b344c4cfa98b22b597ac5f4f81017 Mon Sep 17 00:00:00 2001
-From: CaoE <e.cao@intel.com>
-Date: Mon, 9 Jun 2025 22:08:12 -0700
-Subject: [PATCH 2/3] Update
-
-[ghstack-poisoned]
----
- aten/src/ATen/native/mkl/SpectralOps.cpp | 8 ++++++--
- 1 file changed, 6 insertions(+), 2 deletions(-)
-
-diff --git a/aten/src/ATen/native/mkl/SpectralOps.cpp b/aten/src/ATen/native/mkl/SpectralOps.cpp
-index 6864e2f9b418..979d1e7b1ae0 100644
---- a/aten/src/ATen/native/mkl/SpectralOps.cpp
-+++ b/aten/src/ATen/native/mkl/SpectralOps.cpp
-@@ -481,8 +481,12 @@ static Tensor& _exec_fft(Tensor& out, const Tensor& self, IntArrayRef out_sizes,
- 
-   // fix mkl issue
-   // https://github.com/pytorch/pytorch/issues/154477
--  if (signal_size[0] > 1 && input.strides()[0] == 0) {
--    input = input.clone(MemoryFormat::Contiguous);
-+  auto istrides = input.strides();
-+  for (const auto& stride : istrides) {
-+    if (stride == 0) {
-+      input = input.clone(MemoryFormat::Contiguous);
-+      break;
-+    }
-   }
- 
-   auto descriptor = _plan_mkl_fft(
-
-From ffa596fd9d1265883c2d76f637b5ea362c9f2922 Mon Sep 17 00:00:00 2001
-From: CaoE <e.cao@intel.com>
-Date: Tue, 8 Jul 2025 19:30:20 -0700
-Subject: [PATCH 3/3] Update
-
-[ghstack-poisoned]
----
- aten/src/ATen/native/mkl/SpectralOps.cpp | 8 ++++++--
- 1 file changed, 6 insertions(+), 2 deletions(-)
-
-diff --git a/aten/src/ATen/native/mkl/SpectralOps.cpp b/aten/src/ATen/native/mkl/SpectralOps.cpp
-index 979d1e7b1ae0..4aa53c5e794b 100644
---- a/aten/src/ATen/native/mkl/SpectralOps.cpp
-+++ b/aten/src/ATen/native/mkl/SpectralOps.cpp
-@@ -337,6 +337,7 @@ Tensor _fft_c2c_mkl(const Tensor& self, IntArrayRef dim, int64_t normalization,
- #include <cmath>
- 
- #include <mkl_dfti.h>
-+#include <mkl_version.h>
- #include <ATen/mkl/Exceptions.h>
- #include <ATen/mkl/Descriptors.h>
- #include <ATen/mkl/Limits.h>
-@@ -481,13 +482,16 @@ static Tensor& _exec_fft(Tensor& out, const Tensor& self, IntArrayRef out_sizes,
- 
-   // fix mkl issue
-   // https://github.com/pytorch/pytorch/issues/154477
--  auto istrides = input.strides();
--  for (const auto& stride : istrides) {
-+#ifdef INTEL_MKL_VERSION
-+#if INTEL_MKL_VERSION > 20210400L
-+  for (const auto& stride : input.strides()) {
-     if (stride == 0) {
-       input = input.clone(MemoryFormat::Contiguous);
-       break;
-     }
-   }
-+#endif
-+#endif
- 
-   auto descriptor = _plan_mkl_fft(
-       input.strides(), out.strides(), signal_size, input.is_complex(),
\ No newline at end of file
+-- 
+2.47.1
\ No newline at end of file

From cc762364325c5d3bef021fbc460b399e27dab1e9 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Thu, 2 Oct 2025 16:07:13 +0100
Subject: [PATCH 12/32] skip test patch for windows testing

---
 recipe/meta.yaml               |  3 ++-
 recipe/patches/skip-test.patch | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)
 create mode 100644 recipe/patches/skip-test.patch

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index e55004d8..3eee0366 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -59,6 +59,7 @@ source:
       - patches/0015-point-lib-paths-to-PREFIX-lib.patch
       - patches/0016-Do-not-check-out-nccl-when-not-building-it.patch
       - patches/0017-mkl-version-mismatch.patch
+      - patches/skip-test.patch
 {% endif %}
   - url: https://raw.githubusercontent.com/pytorch/pytorch/{{ smoke_test_commit }}/.ci/pytorch/smoke_test/smoke_test.py
     folder: smoke_test
@@ -436,7 +437,7 @@ outputs:
         - set MATRIX_STABLE_VERSION={{ version }}                                               # [win]
         - set MATRIX_PACKAGE_TYPE="conda"                                                       # [win]
         - set TARGET_OS="windows"                                                               # [win]
-        - set OMP_NUM_THREADS=4                                                                 # [win]
+        - set OMP_NUM_THREADS=1                                                                 # [win]
         - export MATRIX_GPU_ARCH_VERSION="{{ '.'.join(cudatoolkit.split('.')[:2]) }}"           # [(gpu_variant == "cuda-11") and (linux and x86_64)]
         - export MATRIX_GPU_ARCH_VERSION="{{ '.'.join(cuda_compiler_version.split('.')[:2]) }}" # [(gpu_variant == "cuda-12") and (linux and x86_64)]
         - export MATRIX_GPU_ARCH_VERSION="{{ MACOSX_SDK_VERSION }}"                             # [(gpu_variant == "metal")]
diff --git a/recipe/patches/skip-test.patch b/recipe/patches/skip-test.patch
new file mode 100644
index 00000000..5da3e25c
--- /dev/null
+++ b/recipe/patches/skip-test.patch
@@ -0,0 +1,32 @@
+--- a/test/run_test.py
++++ b/test/run_test.py
+@@ -594,14 +594,8 @@ ONNX_SERIAL_LIST = [
+ 
+ # A subset of our TEST list that validates PyTorch's ops, modules, and autograd function as expected
+ CORE_TEST_LIST = [
+-    "test_autograd",
+-    "test_autograd_fallback",
+-    "test_modules",
+-    "test_nn",
+     "test_ops",
+     "test_ops_gradients",
+-    "test_ops_fwd_gradients",
+-    "test_ops_jit",
+-    "test_torch",
+ ]
+ 
+ 
+@@ -1588,10 +1582,10 @@ def parse_args():
+         "-i",
+         "--include",
+         nargs="+",
+         choices=TestChoices(TESTS),
+-        default=TESTS,
++        default=CORE_TEST_LIST,
+         metavar="TESTS",
+-        help="select a set of tests to include (defaults to ALL tests)."
++        help="select a set of tests to include (defaults to CORE_TEST_LIST: test_ops and test_ops_gradients)."
+         " tests must be a part of the TESTS list defined in run_test.py",
+     )
+     parser.add_argument(
+

From 0cb7f439df377774feeaf635eefe87e2683c7712 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Thu, 2 Oct 2025 16:09:33 +0100
Subject: [PATCH 13/32] mkl_num_threads in test

---
 recipe/meta.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 3eee0366..b3d22339 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -438,6 +438,7 @@ outputs:
         - set MATRIX_PACKAGE_TYPE="conda"                                                       # [win]
         - set TARGET_OS="windows"                                                               # [win]
         - set OMP_NUM_THREADS=1                                                                 # [win]
+        - set MKL_NUM_THREADS=1                                                                 # [win]
         - export MATRIX_GPU_ARCH_VERSION="{{ '.'.join(cudatoolkit.split('.')[:2]) }}"           # [(gpu_variant == "cuda-11") and (linux and x86_64)]
         - export MATRIX_GPU_ARCH_VERSION="{{ '.'.join(cuda_compiler_version.split('.')[:2]) }}" # [(gpu_variant == "cuda-12") and (linux and x86_64)]
         - export MATRIX_GPU_ARCH_VERSION="{{ MACOSX_SDK_VERSION }}"                             # [(gpu_variant == "metal")]

From ac7f8210c23198f06184c85d356d11d5d90f6725 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Thu, 2 Oct 2025 16:21:56 +0100
Subject: [PATCH 14/32] regenerate patch

---
 recipe/patches/skip-test.patch | 67 +++++++++++++++++++++++++++++++---
 1 file changed, 61 insertions(+), 6 deletions(-)

diff --git a/recipe/patches/skip-test.patch b/recipe/patches/skip-test.patch
index 5da3e25c..ccd146ff 100644
--- a/recipe/patches/skip-test.patch
+++ b/recipe/patches/skip-test.patch
@@ -1,6 +1,63 @@
---- a/test/run_test.py
-+++ b/test/run_test.py
-@@ -594,14 +594,8 @@ ONNX_SERIAL_LIST = [
+--- test/run_test.py	2025-10-02 13:39:34.543337955 +0100
++++ "test/run_test copy.py"	2025-10-02 16:02:58.515409486 +0100
+@@ -557,31 +557,31 @@
+ # Test files that should always be run serially with other test files,
+ # but it's okay if the tests inside them are run in parallel with each other.
+ CI_SERIAL_LIST = [
+-    "test_nn",
+-    "test_fake_tensor",
+-    "test_cpp_api_parity",
+-    "test_reductions",
+-    "test_fx_backends",
+-    "test_cpp_extensions_jit",
+-    "test_torch",
+-    "test_tensor_creation_ops",
+-    "test_dispatch",
+-    "test_python_dispatch",  # torch.library creation and deletion must be serialized
+-    "test_spectral_ops",  # Cause CUDA illegal memory access https://github.com/pytorch/pytorch/issues/88916
+-    "nn/test_pooling",
+-    "nn/test_convolution",  # Doesn't respect set_per_process_memory_fraction, results in OOM for other tests in slow gradcheck
+-    "distributions/test_distributions",
+-    "test_fx",  # gets SIGKILL
+-    "functorch/test_memory_efficient_fusion",  # Cause CUDA OOM on ROCm
+-    "test_utils",  # OOM
+-    "test_sort_and_select",  # OOM
+-    "test_backward_compatible_arguments",  # OOM
+-    "test_autocast",  # OOM
+-    "test_native_mha",  # OOM
+-    "test_module_hooks",  # OOM
+-    "inductor/test_max_autotune",
+-    "inductor/test_cutlass_backend",  # slow due to many nvcc compilation steps,
+-    "inductor/test_flex_attention",  # OOM
++    #"test_nn",
++    #"test_fake_tensor", 
++    #"test_cpp_api_parity",
++    #"test_reductions",
++    #"test_fx_backends",
++    #"test_cpp_extensions_jit",
++    #"test_torch",
++    #"test_tensor_creation_ops",
++    #"test_dispatch",
++    #"test_python_dispatch",  # torch.library creation and deletion must be serialized
++    #"test_spectral_ops",  # Cause CUDA illegal memory access https://github.com/pytorch/pytorch/issues/88916
++    #"nn/test_pooling",
++    #"nn/test_convolution",  # Doesn't respect set_per_process_memory_fraction, results in OOM for other tests in slow gradcheck
++    #"distributions/test_distributions",
++    #"test_fx",  # gets SIGKILL
++    #"functorch/test_memory_efficient_fusion",  # Cause CUDA OOM on ROCm
++    #"test_utils",  # OOM
++    #"test_sort_and_select",  # OOM
++    #"test_backward_compatible_arguments",  # OOM
++    #"test_autocast",  # OOM
++    #"test_native_mha",  # OOM
++    #"test_module_hooks",  # OOM
++    #"inductor/test_max_autotune",
++    #"inductor/test_cutlass_backend",  # slow due to many nvcc compilation steps,
++    #"inductor/test_flex_attention",  # OOM
+ ]
+ # A subset of onnx tests that cannot run in parallel due to high memory usage.
+ ONNX_SERIAL_LIST = [
+@@ -594,15 +594,8 @@
  
  # A subset of our TEST list that validates PyTorch's ops, modules, and autograd function as expected
  CORE_TEST_LIST = [
@@ -16,8 +73,7 @@
  ]
  
  
-@@ -1588,10 +1582,10 @@ def parse_args():
-         "-i",
+@@ -1588,9 +1581,9 @@
          "--include",
          nargs="+",
          choices=TestChoices(TESTS),
@@ -29,4 +85,3 @@
          " tests must be a part of the TESTS list defined in run_test.py",
      )
      parser.add_argument(
-

From ee0864e84d678d3a4c45b0dee2d6f82afb2980b4 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Thu, 2 Oct 2025 16:39:22 +0100
Subject: [PATCH 15/32] remove patch due to windows mismatch

---
 recipe/meta.yaml               |  1 -
 recipe/patches/skip-test.patch | 87 ----------------------------------
 2 files changed, 88 deletions(-)
 delete mode 100644 recipe/patches/skip-test.patch

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index b3d22339..32bc1f84 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -59,7 +59,6 @@ source:
       - patches/0015-point-lib-paths-to-PREFIX-lib.patch
       - patches/0016-Do-not-check-out-nccl-when-not-building-it.patch
       - patches/0017-mkl-version-mismatch.patch
-      - patches/skip-test.patch
 {% endif %}
   - url: https://raw.githubusercontent.com/pytorch/pytorch/{{ smoke_test_commit }}/.ci/pytorch/smoke_test/smoke_test.py
     folder: smoke_test
diff --git a/recipe/patches/skip-test.patch b/recipe/patches/skip-test.patch
deleted file mode 100644
index ccd146ff..00000000
--- a/recipe/patches/skip-test.patch
+++ /dev/null
@@ -1,87 +0,0 @@
---- test/run_test.py	2025-10-02 13:39:34.543337955 +0100
-+++ "test/run_test copy.py"	2025-10-02 16:02:58.515409486 +0100
-@@ -557,31 +557,31 @@
- # Test files that should always be run serially with other test files,
- # but it's okay if the tests inside them are run in parallel with each other.
- CI_SERIAL_LIST = [
--    "test_nn",
--    "test_fake_tensor",
--    "test_cpp_api_parity",
--    "test_reductions",
--    "test_fx_backends",
--    "test_cpp_extensions_jit",
--    "test_torch",
--    "test_tensor_creation_ops",
--    "test_dispatch",
--    "test_python_dispatch",  # torch.library creation and deletion must be serialized
--    "test_spectral_ops",  # Cause CUDA illegal memory access https://github.com/pytorch/pytorch/issues/88916
--    "nn/test_pooling",
--    "nn/test_convolution",  # Doesn't respect set_per_process_memory_fraction, results in OOM for other tests in slow gradcheck
--    "distributions/test_distributions",
--    "test_fx",  # gets SIGKILL
--    "functorch/test_memory_efficient_fusion",  # Cause CUDA OOM on ROCm
--    "test_utils",  # OOM
--    "test_sort_and_select",  # OOM
--    "test_backward_compatible_arguments",  # OOM
--    "test_autocast",  # OOM
--    "test_native_mha",  # OOM
--    "test_module_hooks",  # OOM
--    "inductor/test_max_autotune",
--    "inductor/test_cutlass_backend",  # slow due to many nvcc compilation steps,
--    "inductor/test_flex_attention",  # OOM
-+    #"test_nn",
-+    #"test_fake_tensor", 
-+    #"test_cpp_api_parity",
-+    #"test_reductions",
-+    #"test_fx_backends",
-+    #"test_cpp_extensions_jit",
-+    #"test_torch",
-+    #"test_tensor_creation_ops",
-+    #"test_dispatch",
-+    #"test_python_dispatch",  # torch.library creation and deletion must be serialized
-+    #"test_spectral_ops",  # Cause CUDA illegal memory access https://github.com/pytorch/pytorch/issues/88916
-+    #"nn/test_pooling",
-+    #"nn/test_convolution",  # Doesn't respect set_per_process_memory_fraction, results in OOM for other tests in slow gradcheck
-+    #"distributions/test_distributions",
-+    #"test_fx",  # gets SIGKILL
-+    #"functorch/test_memory_efficient_fusion",  # Cause CUDA OOM on ROCm
-+    #"test_utils",  # OOM
-+    #"test_sort_and_select",  # OOM
-+    #"test_backward_compatible_arguments",  # OOM
-+    #"test_autocast",  # OOM
-+    #"test_native_mha",  # OOM
-+    #"test_module_hooks",  # OOM
-+    #"inductor/test_max_autotune",
-+    #"inductor/test_cutlass_backend",  # slow due to many nvcc compilation steps,
-+    #"inductor/test_flex_attention",  # OOM
- ]
- # A subset of onnx tests that cannot run in parallel due to high memory usage.
- ONNX_SERIAL_LIST = [
-@@ -594,15 +594,8 @@
- 
- # A subset of our TEST list that validates PyTorch's ops, modules, and autograd function as expected
- CORE_TEST_LIST = [
--    "test_autograd",
--    "test_autograd_fallback",
--    "test_modules",
--    "test_nn",
-     "test_ops",
-     "test_ops_gradients",
--    "test_ops_fwd_gradients",
--    "test_ops_jit",
--    "test_torch",
- ]
- 
- 
-@@ -1588,9 +1581,9 @@
-         "--include",
-         nargs="+",
-         choices=TestChoices(TESTS),
--        default=TESTS,
-+        default=CORE_TEST_LIST,
-         metavar="TESTS",
--        help="select a set of tests to include (defaults to ALL tests)."
-+        help="select a set of tests to include (defaults to CORE_TEST_LIST: test_ops and test_ops_gradients)."
-         " tests must be a part of the TESTS list defined in run_test.py",
-     )
-     parser.add_argument(

From ba8ea8bfb98eb11d677b03366ca38edd835039b8 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Fri, 3 Oct 2025 08:28:46 +0100
Subject: [PATCH 16/32] add dynamic flag

---
 recipe/meta.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 32bc1f84..51e99075 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -438,6 +438,8 @@ outputs:
         - set TARGET_OS="windows"                                                               # [win]
         - set OMP_NUM_THREADS=1                                                                 # [win]
         - set MKL_NUM_THREADS=1                                                                 # [win]
+        - set OMP_DYNAMIC=FALSE                                                                 # [win]
+        - set MKL_DYNAMIC=FALSE                                                                 # [win]
         - export MATRIX_GPU_ARCH_VERSION="{{ '.'.join(cudatoolkit.split('.')[:2]) }}"           # [(gpu_variant == "cuda-11") and (linux and x86_64)]
         - export MATRIX_GPU_ARCH_VERSION="{{ '.'.join(cuda_compiler_version.split('.')[:2]) }}" # [(gpu_variant == "cuda-12") and (linux and x86_64)]
         - export MATRIX_GPU_ARCH_VERSION="{{ MACOSX_SDK_VERSION }}"                             # [(gpu_variant == "metal")]

From b25bdd002c910501107808957d4c34cd6d81ed25 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Thu, 9 Oct 2025 08:20:43 +0100
Subject: [PATCH 17/32] set mkl to 2023 compatibility mode with flags

---
 recipe/meta.yaml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 51e99075..3791a9ce 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -439,7 +439,10 @@ outputs:
         - set OMP_NUM_THREADS=1                                                                 # [win]
         - set MKL_NUM_THREADS=1                                                                 # [win]
         - set OMP_DYNAMIC=FALSE                                                                 # [win]
-        - set MKL_DYNAMIC=FALSE                                                                 # [win]
+        - set MKL_ENABLE_INSTRUCTIONS=AVX2                                                        # [win]
+        - set MKL_VML_MODE=HA                                                                     # [win]
+        - set KMP_DETERMINISTIC_REDUCTION=TRUE                                                    # [win]
+        - set MKL_CBWR=COMPATIBLE                                                              # [win]
         - export MATRIX_GPU_ARCH_VERSION="{{ '.'.join(cudatoolkit.split('.')[:2]) }}"           # [(gpu_variant == "cuda-11") and (linux and x86_64)]
         - export MATRIX_GPU_ARCH_VERSION="{{ '.'.join(cuda_compiler_version.split('.')[:2]) }}" # [(gpu_variant == "cuda-12") and (linux and x86_64)]
         - export MATRIX_GPU_ARCH_VERSION="{{ MACOSX_SDK_VERSION }}"                             # [(gpu_variant == "metal")]

From fc31816b08d82a6edff02b4afadbfb22496868fa Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Mon, 20 Oct 2025 13:02:58 +0100
Subject: [PATCH 18/32] ensure we use entire build target cpu & skip windows ci
 test fails

---
 recipe/bld.bat   |  2 +-
 recipe/meta.yaml | 19 ++++++-------------
 2 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/recipe/bld.bat b/recipe/bld.bat
index a076bd7e..11db1390 100644
--- a/recipe/bld.bat
+++ b/recipe/bld.bat
@@ -51,7 +51,7 @@ set DISTUTILS_USE_SDK=1
 set BUILD_TEST=0
 set INSTALL_TEST=0
 :: Don't increase MAX_JOBS to NUMBER_OF_PROCESSORS, as it will run out of heap
-set CPU_COUNT=1
+set CPU_COUNT=4
 set MAX_JOBS=%CPU_COUNT%
 :: Use our Pybind11, Eigen
 set USE_SYSTEM_PYBIND11=1
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 3791a9ce..f3a9d669 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -163,7 +163,7 @@ requirements:
     - requests
     - future  # [py<313]
     - six
-    - mkl-devel {{ mkl }}           # [blas_impl == "mkl"]
+    - mkl-devel 2023.*          # [blas_impl == "mkl"]
     - openblas-devel {{ openblas }}   # [blas_impl == "openblas"]
     # - libcblas * *_mkl      # [blas_impl == "mkl"]
     # - libcblas              # [blas_impl != "mkl"]
@@ -172,7 +172,7 @@ requirements:
     # We pull in the same versions of mkl and intel-openmp: intel aligns the versions
     # We use intel-openmp for all mkl variants.
     # For openblas on win and linux, we don't specify any openmp implementation; it comes from the compiler.
-    - intel-openmp   {{ mkl }}        # [blas_impl == "mkl"]
+    - intel-openmp   2023.*         # [blas_impl == "mkl"]
     - llvm-openmp 17              # [osx and not (blas_impl == "mkl")]
     - libabseil
     - libprotobuf {{ libprotobuf }}
@@ -232,7 +232,6 @@ outputs:
         - "**/shm.dll"                # [win]
         - "**/torch_cpu.dll"          # [win]
         - "**/torch_python.dll"       # [win]
-        - $RPATH/ld64.so.1  # [s390x]
         # libcuda.so is the cuda driver API library and is a system library.
         - "**/libcuda.so*"            # [(gpu_variant or "").startswith("cuda")]
   - name: pytorch
@@ -252,7 +251,6 @@ outputs:
         - "**/shm.dll"                # [win]
         - "**/torch_cpu.dll"          # [win]
         - "**/torch_python.dll"       # [win]
-        - $RPATH/ld64.so.1  # [s390x]
       detect_binary_files_with_prefix: false
       run_exports:
         - {{ pin_subpackage('pytorch', max_pin='x.x') }}
@@ -332,7 +330,7 @@ outputs:
         - requests
         - future  # [py<313]
         - six
-        - mkl-devel {{ mkl }}           # [blas_impl == "mkl"]
+        - mkl-devel 2023.*           # [blas_impl == "mkl"]
         - openblas-devel {{ openblas }}   # [blas_impl == "openblas"]
         # - libcblas * *_mkl      # [blas_impl == "mkl"]
         # - libcblas              # [blas_impl != "mkl"]
@@ -341,7 +339,7 @@ outputs:
         # We pull in the same versions of mkl and intel-openmp: intel aligns the versions
         # We use intel-openmp for all mkl variants.
         # For openblas on win and linux, we don't specify any openmp implementation; it comes from the compiler.
-        - intel-openmp   {{ mkl }}        # [blas_impl == "mkl"]
+        - intel-openmp 2023.*         # [blas_impl == "mkl"]
         - llvm-openmp 17              # [osx and not (blas_impl == "mkl")]
         - libabseil
         - libprotobuf {{ libprotobuf }}
@@ -437,12 +435,6 @@ outputs:
         - set MATRIX_PACKAGE_TYPE="conda"                                                       # [win]
         - set TARGET_OS="windows"                                                               # [win]
         - set OMP_NUM_THREADS=1                                                                 # [win]
-        - set MKL_NUM_THREADS=1                                                                 # [win]
-        - set OMP_DYNAMIC=FALSE                                                                 # [win]
-        - set MKL_ENABLE_INSTRUCTIONS=AVX2                                                        # [win]
-        - set MKL_VML_MODE=HA                                                                     # [win]
-        - set KMP_DETERMINISTIC_REDUCTION=TRUE                                                    # [win]
-        - set MKL_CBWR=COMPATIBLE                                                              # [win]
         - export MATRIX_GPU_ARCH_VERSION="{{ '.'.join(cudatoolkit.split('.')[:2]) }}"           # [(gpu_variant == "cuda-11") and (linux and x86_64)]
         - export MATRIX_GPU_ARCH_VERSION="{{ '.'.join(cuda_compiler_version.split('.')[:2]) }}" # [(gpu_variant == "cuda-12") and (linux and x86_64)]
         - export MATRIX_GPU_ARCH_VERSION="{{ MACOSX_SDK_VERSION }}"                             # [(gpu_variant == "metal")]
@@ -466,7 +458,8 @@ outputs:
         # ------------------------------------------------------------------------------------------------
         # Exclude complex tests that are known to be flaky for -k "not (complex and (linalg_vecdot or dot or vdot))"
         # https://github.com/pytorch/pytorch/issues/150918
-        - python ./test/run_test.py --core --continue-through-error -k "not (complex and (linalg_vecdot or dot or vdot))" || true
+        - python ./test/run_test.py --core --continue-through-error -k "not (complex and (linalg_vecdot or dot or vdot))" || true # [not win]
+        - python ./test/run_test.py --core --continue-through-error -k "not ((complex and (linalg_vecdot or dot or vdot)) or lgamma or mvlgamma or multigammaln or gammaln)" || exit 0
         # The inductor tests test the torch.compile backend. Using the options below avoids running distributed tests,
         # which would be run if we used the --inductor option. (Distributed tests would only be correctly run on a multi-gpu test platform,
         # which we don't have.)

From 8fa736e9d67b7969f9b94e9343dbaf92a234fb93 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Tue, 21 Oct 2025 10:59:03 +0100
Subject: [PATCH 19/32] ensure comments and improve threads for windows

---
 recipe/meta.yaml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index f3a9d669..a5cd587f 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -339,7 +339,7 @@ outputs:
         # We pull in the same versions of mkl and intel-openmp: intel aligns the versions
         # We use intel-openmp for all mkl variants.
         # For openblas on win and linux, we don't specify any openmp implementation; it comes from the compiler.
-        - intel-openmp 2023.*         # [blas_impl == "mkl"]
+        - intel-openmp 2025.*         # [blas_impl == "mkl"]
         - llvm-openmp 17              # [osx and not (blas_impl == "mkl")]
         - libabseil
         - libprotobuf {{ libprotobuf }}
@@ -434,7 +434,7 @@ outputs:
         - set MATRIX_STABLE_VERSION={{ version }}                                               # [win]
         - set MATRIX_PACKAGE_TYPE="conda"                                                       # [win]
         - set TARGET_OS="windows"                                                               # [win]
-        - set OMP_NUM_THREADS=1                                                                 # [win]
+        - set OMP_NUM_THREADS=4                                                                 # [win]
         - export MATRIX_GPU_ARCH_VERSION="{{ '.'.join(cudatoolkit.split('.')[:2]) }}"           # [(gpu_variant == "cuda-11") and (linux and x86_64)]
         - export MATRIX_GPU_ARCH_VERSION="{{ '.'.join(cuda_compiler_version.split('.')[:2]) }}" # [(gpu_variant == "cuda-12") and (linux and x86_64)]
         - export MATRIX_GPU_ARCH_VERSION="{{ MACOSX_SDK_VERSION }}"                             # [(gpu_variant == "metal")]
@@ -459,7 +459,10 @@ outputs:
         # Exclude complex tests that are known to be flaky for -k "not (complex and (linalg_vecdot or dot or vdot))"
         # https://github.com/pytorch/pytorch/issues/150918
         - python ./test/run_test.py --core --continue-through-error -k "not (complex and (linalg_vecdot or dot or vdot))" || true # [not win]
-        - python ./test/run_test.py --core --continue-through-error -k "not ((complex and (linalg_vecdot or dot or vdot)) or lgamma or mvlgamma or multigammaln or gammaln)" || exit 0
+        # lgamma or mvlgamma or multigammaln or gammaln all have these issues on a combination of Intel Xeon processors and Windows Server differences.
+        # enabling these tests on windows will cause numerical differences in the test suite.
+        # This is a non-deterministic issue where between 80-110 tests fail. This has been observed between Pytorch 2.5 and above.
+        - python ./test/run_test.py --core --continue-through-error -k "not ((complex and (linalg_vecdot or dot or vdot)) or lgamma or mvlgamma or multigammaln or gammaln)" || exit 0 # [win]
         # The inductor tests test the torch.compile backend. Using the options below avoids running distributed tests,
         # which would be run if we used the --inductor option. (Distributed tests would only be correctly run on a multi-gpu test platform,
         # which we don't have.)

From d8eb46d2804350123229f8b2afc1d5592e37aab4 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Tue, 21 Oct 2025 12:42:09 +0100
Subject: [PATCH 20/32] ensure 2025 for both

---
 recipe/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index a5cd587f..0c9a640d 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -330,7 +330,7 @@ outputs:
         - requests
         - future  # [py<313]
         - six
-        - mkl-devel 2023.*           # [blas_impl == "mkl"]
+        - mkl-devel 2025.*           # [blas_impl == "mkl"]
         - openblas-devel {{ openblas }}   # [blas_impl == "openblas"]
         # - libcblas * *_mkl      # [blas_impl == "mkl"]
         # - libcblas              # [blas_impl != "mkl"]

From 5f58bb41b6c292e8d877089e21fbb408a32c54c6 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Tue, 21 Oct 2025 12:55:25 +0100
Subject: [PATCH 21/32] ensure 2023 version in depends

---
 recipe/meta.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 0c9a640d..c73c4e4e 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -330,7 +330,7 @@ outputs:
         - requests
         - future  # [py<313]
         - six
-        - mkl-devel 2025.*           # [blas_impl == "mkl"]
+        - mkl-devel 2023.*           # [blas_impl == "mkl"]
         - openblas-devel {{ openblas }}   # [blas_impl == "openblas"]
         # - libcblas * *_mkl      # [blas_impl == "mkl"]
         # - libcblas              # [blas_impl != "mkl"]
@@ -339,7 +339,7 @@ outputs:
         # We pull in the same versions of mkl and intel-openmp: intel aligns the versions
         # We use intel-openmp for all mkl variants.
         # For openblas on win and linux, we don't specify any openmp implementation; it comes from the compiler.
-        - intel-openmp 2025.*         # [blas_impl == "mkl"]
+        - intel-openmp 2023.*         # [blas_impl == "mkl"]
         - llvm-openmp 17              # [osx and not (blas_impl == "mkl")]
         - libabseil
         - libprotobuf {{ libprotobuf }}

From 3187c34bc1b6e96a281370393d63433cd31fbc02 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Tue, 21 Oct 2025 13:13:59 +0100
Subject: [PATCH 22/32] ensure mkl pins

---
 recipe/meta.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index c73c4e4e..31e3ed9a 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -163,7 +163,7 @@ requirements:
     - requests
     - future  # [py<313]
     - six
-    - mkl-devel 2023.*          # [blas_impl == "mkl"]
+    - mkl-devel {{ mkl }}          # [blas_impl == "mkl"]
     - openblas-devel {{ openblas }}   # [blas_impl == "openblas"]
     # - libcblas * *_mkl      # [blas_impl == "mkl"]
     # - libcblas              # [blas_impl != "mkl"]
@@ -172,7 +172,7 @@ requirements:
     # We pull in the same versions of mkl and intel-openmp: intel aligns the versions
     # We use intel-openmp for all mkl variants.
     # For openblas on win and linux, we don't specify any openmp implementation; it comes from the compiler.
-    - intel-openmp   2023.*         # [blas_impl == "mkl"]
+    - intel-openmp {{ mkl }}         # [blas_impl == "mkl"]
     - llvm-openmp 17              # [osx and not (blas_impl == "mkl")]
     - libabseil
     - libprotobuf {{ libprotobuf }}
@@ -330,7 +330,7 @@ outputs:
         - requests
         - future  # [py<313]
         - six
-        - mkl-devel 2023.*           # [blas_impl == "mkl"]
+        - mkl-devel {{ mkl }}           # [blas_impl == "mkl"]
         - openblas-devel {{ openblas }}   # [blas_impl == "openblas"]
         # - libcblas * *_mkl      # [blas_impl == "mkl"]
         # - libcblas              # [blas_impl != "mkl"]

From 3ea3033210f450f3b524c024efd4f2527a1858ae Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Tue, 21 Oct 2025 19:35:08 +0100
Subject: [PATCH 23/32] last intel 2023 test removed

---
 recipe/meta.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 31e3ed9a..5d47c061 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -330,7 +330,7 @@ outputs:
         - requests
         - future  # [py<313]
         - six
-        - mkl-devel {{ mkl }}           # [blas_impl == "mkl"]
+        - mkl-devel {{ mkl }}             # [blas_impl == "mkl"]
         - openblas-devel {{ openblas }}   # [blas_impl == "openblas"]
         # - libcblas * *_mkl      # [blas_impl == "mkl"]
         # - libcblas              # [blas_impl != "mkl"]
@@ -339,7 +339,7 @@ outputs:
         # We pull in the same versions of mkl and intel-openmp: intel aligns the versions
         # We use intel-openmp for all mkl variants.
         # For openblas on win and linux, we don't specify any openmp implementation; it comes from the compiler.
-        - intel-openmp 2023.*         # [blas_impl == "mkl"]
+        - intel-openmp {{ mkl }}      # [blas_impl == "mkl"]
         - llvm-openmp 17              # [osx and not (blas_impl == "mkl")]
         - libabseil
         - libprotobuf {{ libprotobuf }}

From 6103e50edd4ce441f21ad3078a3c045349f029aa Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Tue, 21 Oct 2025 19:54:56 +0100
Subject: [PATCH 24/32] update patch:

---
 .../0016-Do-not-check-out-nccl-when-not-building-it.patch       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipe/patches/0016-Do-not-check-out-nccl-when-not-building-it.patch b/recipe/patches/0016-Do-not-check-out-nccl-when-not-building-it.patch
index 2e0527c2..2398e1ac 100644
--- a/recipe/patches/0016-Do-not-check-out-nccl-when-not-building-it.patch
+++ b/recipe/patches/0016-Do-not-check-out-nccl-when-not-building-it.patch
@@ -35,7 +35,7 @@ index 5dd5a2219..2b8b868ea 100644
      my_env = _create_build_env()
 -    checkout_nccl()
 +    if (
-+        not check_negative_env_flag("USE_CUDA")
++        check_env_flag("USE_CUDA")
 +        and not check_negative_env_flag("USE_NCCL")
 +        and not check_env_flag("USE_SYSTEM_NCCL")
 +    ):

From e4969fc6571c90ade175553403305dcf63349da1 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Tue, 21 Oct 2025 21:30:47 +0100
Subject: [PATCH 25/32] formatting

---
 recipe/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 5d47c061..e8ec5ced 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -69,7 +69,7 @@ build:
   number: {{ build }}
   string: gpu_cuda{{ cuda_compiler_version | replace('.', '') }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}  # [gpu_variant == "cuda-12"]
   string: gpu_mps_h{{PKG_HASH}}_{{ PKG_BUILDNUM }}                                                   # [gpu_variant == "metal"]
-  string: cpu_{{ blas_impl }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}                                 # [gpu_variant == "cpu"]
+  string: cpu_{{ blas_impl }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}                                     # [gpu_variant == "cpu"]
   detect_binary_files_with_prefix: false
   run_exports:
     - {{ pin_subpackage('libtorch', max_pin='x.x') }}

From 404a4eff0d180ff42171b77a86bf4a1bb74b1176 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Wed, 22 Oct 2025 16:06:49 +0100
Subject: [PATCH 26/32] ensure c_compiler 20

---
 recipe/conda_build_config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/recipe/conda_build_config.yaml b/recipe/conda_build_config.yaml
index 5a46edf1..3d7205f7 100644
--- a/recipe/conda_build_config.yaml
+++ b/recipe/conda_build_config.yaml
@@ -3,9 +3,9 @@ gpu_variant:
   - metal                    # [(osx and arm64)]
 #  - cuda-12                  # [(linux and x86_64)]
 c_compiler_version:      # [osx]
-  - 17                   # [osx]
+  - 20                   # [osx]
 cxx_compiler_version:    # [osx]
-  - 17                   # [osx]
+  - 20                   # [osx]
 # CONDA_BUILD_SYSROOT is defined in the base cbc.yaml, but it's reflected here so we can zip the keys and
 # build GPU and CPU at the same time for osx-arm64. It'll need to be manually updated here if the base cbc is changed.
 # This could be done using extend_keys instead, with a change to the base cbc.yaml.

From bb992fabd3e3a7d2767eadca3a2cdfe36ff2c48b Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Wed, 22 Oct 2025 16:24:20 +0100
Subject: [PATCH 27/32] ensure 17 compat

---
 recipe/conda_build_config.yaml | 4 ++--
 recipe/meta.yaml               | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/recipe/conda_build_config.yaml b/recipe/conda_build_config.yaml
index 3d7205f7..5a46edf1 100644
--- a/recipe/conda_build_config.yaml
+++ b/recipe/conda_build_config.yaml
@@ -3,9 +3,9 @@ gpu_variant:
   - metal                    # [(osx and arm64)]
 #  - cuda-12                  # [(linux and x86_64)]
 c_compiler_version:      # [osx]
-  - 20                   # [osx]
+  - 17                   # [osx]
 cxx_compiler_version:    # [osx]
-  - 20                   # [osx]
+  - 17                   # [osx]
 # CONDA_BUILD_SYSROOT is defined in the base cbc.yaml, but it's reflected here so we can zip the keys and
 # build GPU and CPU at the same time for osx-arm64. It'll need to be manually updated here if the base cbc is changed.
 # This could be done using extend_keys instead, with a change to the base cbc.yaml.
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index e8ec5ced..b3c9a42f 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -95,7 +95,7 @@ requirements:
     - {{ stdlib('c') }}
     - {{ compiler('c') }}
     - {{ compiler('cxx') }}
-    - {{ compiler('cuda') }}                 # [(gpu_variant or "").startswith("cuda")]
+    - {{ compiler('cuda') }}                # [(gpu_variant or "").startswith("cuda")]
     - nvtx-c                                 # [cuda_compiler_version != "None" and build_platform != target_platform]
     {% if cuda_major >= 12 %}
     - cuda-driver-dev                        # [build_platform != target_platform]
@@ -119,6 +119,7 @@ requirements:
     # This has a strong run_export so we don't need to put it in `host` or `run`
     # We use llvm-openmp for openblas variants on osx.
     - llvm-openmp 17              # [osx and not (blas_impl == "mkl")]
+    - libcxx 17 
     - libuv     # [win]
     - cmake
     - ninja-base
@@ -291,6 +292,7 @@ outputs:
         # This has a strong run_export so we don't need to put it in `host` or `run`
         # We use llvm-openmp for openblas variants on osx.
         - llvm-openmp 17              # [osx and not (blas_impl == "mkl")]
+        - libcxx 17
         - cmake
         - ninja-base
         # Keep libprotobuf here so that a compatibile version

From 8bfcc2fd965540e5df9fdc2aef5dcc39f9da69a2 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Wed, 22 Oct 2025 16:25:25 +0100
Subject: [PATCH 28/32] osx only pin

---
 recipe/meta.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index b3c9a42f..16ec8244 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -119,7 +119,7 @@ requirements:
     # This has a strong run_export so we don't need to put it in `host` or `run`
     # We use llvm-openmp for openblas variants on osx.
     - llvm-openmp 17              # [osx and not (blas_impl == "mkl")]
-    - libcxx 17 
+    - libcxx 17 # [osx] 
     - libuv     # [win]
     - cmake
     - ninja-base
@@ -292,7 +292,7 @@ outputs:
         # This has a strong run_export so we don't need to put it in `host` or `run`
         # We use llvm-openmp for openblas variants on osx.
         - llvm-openmp 17              # [osx and not (blas_impl == "mkl")]
-        - libcxx 17
+        - libcxx 17 # [osx]
         - cmake
         - ninja-base
         # Keep libprotobuf here so that a compatibile version

From 2a7da221135e06deda8bc4faea71e9282cd18310 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Thu, 23 Oct 2025 07:00:53 +0100
Subject: [PATCH 29/32] ensure libcxx in test

---
 recipe/meta.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 16ec8244..7d82aa7d 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -415,6 +415,7 @@ outputs:
         - pybind11
         # the inductor "test_aoti_eager..." tests require objcopy
         - binutils  # [linux]
+        - libcxx 17 # [osx]
       imports:
         - torch
       source_files:

From ae84e7476770b8f298e8cd7d6379e10e860d16ff Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Thu, 23 Oct 2025 07:14:31 +0100
Subject: [PATCH 30/32] ensure formatting

---
 recipe/meta.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 7d82aa7d..451f0f2b 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -189,8 +189,8 @@ requirements:
   # satisfy overlinking checks
   run:
     - {{ pin_compatible('intel-openmp') }}   # [blas_impl == "mkl"]
-    - libuv  # [win]
-    - {{ pin_compatible('magma') }}                       # [(gpu_variant or "").startswith("cuda")]
+    - libuv                                  # [win]
+    - {{ pin_compatible('magma') }}          # [(gpu_variant or "").startswith("cuda")]
 
 # these tests are for the libtorch output below, but due to
 # a particularity of conda-build, that output is defined in

From 89c1adfda605c36a97980cabc13815d20fc38489 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Thu, 23 Oct 2025 10:21:04 +0100
Subject: [PATCH 31/32] add abs for timeout

---
 abs.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/abs.yaml b/abs.yaml
index 0756ca38..1193e0c5 100644
--- a/abs.yaml
+++ b/abs.yaml
@@ -2,3 +2,5 @@
 # variant, so it's specified for both.
 extra_labels_for_os:
   osx-arm64: [ventura]
+
+task_timeout: 72000

From 73f8b23a21cfc5d79c6498b3c1c0d77ea8143b43 Mon Sep 17 00:00:00 2001
From: Jamie Robertson <jamierobertsongames@gmail.com>
Date: Thu, 30 Oct 2025 17:17:02 +0000
Subject: [PATCH 32/32] remove livcxx17

---
 recipe/meta.yaml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 451f0f2b..64fe1708 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -119,7 +119,6 @@ requirements:
     # This has a strong run_export so we don't need to put it in `host` or `run`
     # We use llvm-openmp for openblas variants on osx.
     - llvm-openmp 17              # [osx and not (blas_impl == "mkl")]
-    - libcxx 17 # [osx] 
     - libuv     # [win]
     - cmake
     - ninja-base
@@ -292,7 +291,6 @@ outputs:
         # This has a strong run_export so we don't need to put it in `host` or `run`
         # We use llvm-openmp for openblas variants on osx.
         - llvm-openmp 17              # [osx and not (blas_impl == "mkl")]
-        - libcxx 17 # [osx]
         - cmake
         - ninja-base
         # Keep libprotobuf here so that a compatibile version
@@ -415,7 +413,6 @@ outputs:
         - pybind11
         # the inductor "test_aoti_eager..." tests require objcopy
         - binutils  # [linux]
-        - libcxx 17 # [osx]
       imports:
         - torch
       source_files: