diff --git a/.ci_support/linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_h2ab6b548.yaml b/.ci_support/linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_h2ab6b548.yaml
new file mode 100644
index 00000000..47b5e0a9
--- /dev/null
+++ b/.ci_support/linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_h2ab6b548.yaml
@@ -0,0 +1,82 @@
+blas_impl:
+- generic
+c_compiler:
+- gcc
+c_compiler_version:
+- '13'
+c_stdlib:
+- sysroot
+c_stdlib_version:
+- '2.17'
+cdt_name:
+- conda
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- None
+cuda_compiler_version:
+- None
+cudnn:
+- '9'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '13'
+docker_image:
+- quay.io/condaforge/linux-anvil-x86_64:alma9
+github_actions_labels:
+- cirun-openstack-cpu-2xlarge
+is_rc:
+- 'False'
+libabseil:
+- '20240722'
+libblas:
+- 3.9.* *netlib
+libcblas:
+- 3.9.* *netlib
+liblapack:
+- 3.9.* *netlib
+libprotobuf:
+- 5.28.3
+libtorch:
+- '2.4'
+megabuild:
+- 'true'
+mkl:
+- '2024'
+nccl:
+- '2'
+numpy:
+- '2.0'
+- '2.0'
+- '2.0'
+- '2'
+- '2.0'
+orc:
+- 2.0.3
+pin_run_as_build:
+  python:
+    min_pin: x.x
+    max_pin: x.x
+python:
+- 3.10.* *_cpython
+- 3.11.* *_cpython
+- 3.12.* *_cpython
+- 3.13.* *_cp313
+- 3.9.* *_cpython
+pytorch:
+- '2.4'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+  - cuda_compiler
+  - cuda_compiler_version
+  - docker_image
+- - channel_targets
+  - is_rc
+- - python
+  - numpy
diff --git a/.ci_support/linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13is_rcFalse.yaml b/.ci_support/linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargeis_h6173e25b.yaml
similarity index 100%
rename from .ci_support/linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13is_rcFalse.yaml
rename to .ci_support/linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargeis_h6173e25b.yaml
diff --git a/.ci_support/linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xla_hf94d7e11.yaml b/.ci_support/linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xla_hf94d7e11.yaml
new file mode 100644
index 00000000..89ac1efe
--- /dev/null
+++ b/.ci_support/linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xla_hf94d7e11.yaml
@@ -0,0 +1,82 @@
+blas_impl:
+- generic
+c_compiler:
+- gcc
+c_compiler_version:
+- '13'
+c_stdlib:
+- sysroot
+c_stdlib_version:
+- '2.17'
+cdt_name:
+- conda
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- cuda-nvcc
+cuda_compiler_version:
+- '12.6'
+cudnn:
+- '9'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '13'
+docker_image:
+- quay.io/condaforge/linux-anvil-x86_64:alma9
+github_actions_labels:
+- cirun-openstack-cpu-2xlarge
+is_rc:
+- 'False'
+libabseil:
+- '20240722'
+libblas:
+- 3.9.* *netlib
+libcblas:
+- 3.9.* *netlib
+liblapack:
+- 3.9.* *netlib
+libprotobuf:
+- 5.28.3
+libtorch:
+- '2.4'
+megabuild:
+- 'true'
+mkl:
+- '2024'
+nccl:
+- '2'
+numpy:
+- '2.0'
+- '2.0'
+- '2.0'
+- '2'
+- '2.0'
+orc:
+- 2.0.3
+pin_run_as_build:
+  python:
+    min_pin: x.x
+    max_pin: x.x
+python:
+- 3.10.* *_cpython
+- 3.11.* *_cpython
+- 3.12.* *_cpython
+- 3.13.* *_cp313
+- 3.9.* *_cpython
+pytorch:
+- '2.4'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+  - cuda_compiler
+  - cuda_compiler_version
+  - docker_image
+- - channel_targets
+  - is_rc
+- - python
+  - numpy
diff --git a/.ci_support/linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13is_rcFalse.yaml b/.ci_support/linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xla_h2b83127f.yaml
similarity index 100%
rename from .ci_support/linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13is_rcFalse.yaml
rename to .ci_support/linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xla_h2b83127f.yaml
diff --git a/.ci_support/linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_rcF_he048d8d8.yaml b/.ci_support/linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_rcF_he048d8d8.yaml
new file mode 100644
index 00000000..475c5ca5
--- /dev/null
+++ b/.ci_support/linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_rcF_he048d8d8.yaml
@@ -0,0 +1,82 @@
+blas_impl:
+- mkl
+c_compiler:
+- gcc
+c_compiler_version:
+- '13'
+c_stdlib:
+- sysroot
+c_stdlib_version:
+- '2.17'
+cdt_name:
+- conda
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- None
+cuda_compiler_version:
+- None
+cudnn:
+- '9'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '13'
+docker_image:
+- quay.io/condaforge/linux-anvil-x86_64:alma9
+github_actions_labels:
+- cirun-openstack-cpu-2xlarge
+is_rc:
+- 'False'
+libabseil:
+- '20240722'
+libblas:
+- 3.9.* *netlib
+libcblas:
+- 3.9.* *netlib
+liblapack:
+- 3.9.* *netlib
+libprotobuf:
+- 5.28.3
+libtorch:
+- '2.4'
+megabuild:
+- 'true'
+mkl:
+- '2024'
+nccl:
+- '2'
+numpy:
+- '2.0'
+- '2.0'
+- '2.0'
+- '2'
+- '2.0'
+orc:
+- 2.0.3
+pin_run_as_build:
+  python:
+    min_pin: x.x
+    max_pin: x.x
+python:
+- 3.10.* *_cpython
+- 3.11.* *_cpython
+- 3.12.* *_cpython
+- 3.13.* *_cp313
+- 3.9.* *_cpython
+pytorch:
+- '2.4'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+  - cuda_compiler
+  - cuda_compiler_version
+  - docker_image
+- - channel_targets
+  - is_rc
+- - python
+  - numpy
diff --git a/.ci_support/linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13is_rcFalse.yaml b/.ci_support/linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargeis_rcF_h3782e28d.yaml
similarity index 100%
rename from .ci_support/linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13is_rcFalse.yaml
rename to .ci_support/linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargeis_rcF_h3782e28d.yaml
diff --git a/.ci_support/linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargei_h9d03fa1b.yaml b/.ci_support/linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargei_h9d03fa1b.yaml
new file mode 100644
index 00000000..60222233
--- /dev/null
+++ b/.ci_support/linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargei_h9d03fa1b.yaml
@@ -0,0 +1,82 @@
+blas_impl:
+- mkl
+c_compiler:
+- gcc
+c_compiler_version:
+- '13'
+c_stdlib:
+- sysroot
+c_stdlib_version:
+- '2.17'
+cdt_name:
+- conda
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- cuda-nvcc
+cuda_compiler_version:
+- '12.6'
+cudnn:
+- '9'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '13'
+docker_image:
+- quay.io/condaforge/linux-anvil-x86_64:alma9
+github_actions_labels:
+- cirun-openstack-cpu-2xlarge
+is_rc:
+- 'False'
+libabseil:
+- '20240722'
+libblas:
+- 3.9.* *netlib
+libcblas:
+- 3.9.* *netlib
+liblapack:
+- 3.9.* *netlib
+libprotobuf:
+- 5.28.3
+libtorch:
+- '2.4'
+megabuild:
+- 'true'
+mkl:
+- '2024'
+nccl:
+- '2'
+numpy:
+- '2.0'
+- '2.0'
+- '2.0'
+- '2'
+- '2.0'
+orc:
+- 2.0.3
+pin_run_as_build:
+  python:
+    min_pin: x.x
+    max_pin: x.x
+python:
+- 3.10.* *_cpython
+- 3.11.* *_cpython
+- 3.12.* *_cpython
+- 3.13.* *_cp313
+- 3.9.* *_cpython
+pytorch:
+- '2.4'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+  - cuda_compiler
+  - cuda_compiler_version
+  - docker_image
+- - channel_targets
+  - is_rc
+- - python
+  - numpy
diff --git a/.ci_support/linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13is_rcFalse.yaml b/.ci_support/linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargei_h0e8a3be7.yaml
similarity index 100%
rename from .ci_support/linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13is_rcFalse.yaml
rename to .ci_support/linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargei_h0e8a3be7.yaml
diff --git a/.ci_support/linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_rcFalse_hb5deabbb.yaml b/.ci_support/linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_rcFalse_hb5deabbb.yaml
new file mode 100644
index 00000000..692646e2
--- /dev/null
+++ b/.ci_support/linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_rcFalse_hb5deabbb.yaml
@@ -0,0 +1,82 @@
+blas_impl:
+- generic
+c_compiler:
+- gcc
+c_compiler_version:
+- '13'
+c_stdlib:
+- sysroot
+c_stdlib_version:
+- '2.17'
+cdt_name:
+- conda
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- None
+cuda_compiler_version:
+- None
+cudnn:
+- '9'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '13'
+docker_image:
+- quay.io/condaforge/linux-anvil-x86_64:alma9
+github_actions_labels:
+- cirun-openstack-cpu-2xlarge
+is_rc:
+- 'False'
+libabseil:
+- '20240722'
+libblas:
+- 3.9.* *netlib
+libcblas:
+- 3.9.* *netlib
+liblapack:
+- 3.9.* *netlib
+libprotobuf:
+- 5.28.3
+libtorch:
+- '2.4'
+megabuild:
+- 'true'
+mkl:
+- '2024'
+nccl:
+- '2'
+numpy:
+- '2.0'
+- '2.0'
+- '2.0'
+- '2'
+- '2.0'
+orc:
+- 2.0.3
+pin_run_as_build:
+  python:
+    min_pin: x.x
+    max_pin: x.x
+python:
+- 3.10.* *_cpython
+- 3.11.* *_cpython
+- 3.12.* *_cpython
+- 3.13.* *_cp313
+- 3.9.* *_cpython
+pytorch:
+- '2.4'
+target_platform:
+- linux-aarch64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+  - cuda_compiler
+  - cuda_compiler_version
+  - docker_image
+- - channel_targets
+  - is_rc
+- - python
+  - numpy
diff --git a/.ci_support/linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13is_rcFalse.yaml b/.ci_support/linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargeis_rcFalse_h95f6c8a0.yaml
similarity index 100%
rename from .ci_support/linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13is_rcFalse.yaml
rename to .ci_support/linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargeis_rcFalse_h95f6c8a0.yaml
diff --git a/.ci_support/linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_rcFal_h83318869.yaml b/.ci_support/linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_rcFal_h83318869.yaml
new file mode 100644
index 00000000..2059e9f4
--- /dev/null
+++ b/.ci_support/linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_rcFal_h83318869.yaml
@@ -0,0 +1,82 @@
+blas_impl:
+- generic
+c_compiler:
+- gcc
+c_compiler_version:
+- '13'
+c_stdlib:
+- sysroot
+c_stdlib_version:
+- '2.17'
+cdt_name:
+- conda
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- cuda-nvcc
+cuda_compiler_version:
+- '12.6'
+cudnn:
+- '9'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '13'
+docker_image:
+- quay.io/condaforge/linux-anvil-x86_64:alma9
+github_actions_labels:
+- cirun-openstack-cpu-2xlarge
+is_rc:
+- 'False'
+libabseil:
+- '20240722'
+libblas:
+- 3.9.* *netlib
+libcblas:
+- 3.9.* *netlib
+liblapack:
+- 3.9.* *netlib
+libprotobuf:
+- 5.28.3
+libtorch:
+- '2.4'
+megabuild:
+- 'true'
+mkl:
+- '2024'
+nccl:
+- '2'
+numpy:
+- '2.0'
+- '2.0'
+- '2.0'
+- '2'
+- '2.0'
+orc:
+- 2.0.3
+pin_run_as_build:
+  python:
+    min_pin: x.x
+    max_pin: x.x
+python:
+- 3.10.* *_cpython
+- 3.11.* *_cpython
+- 3.12.* *_cpython
+- 3.13.* *_cp313
+- 3.9.* *_cpython
+pytorch:
+- '2.4'
+target_platform:
+- linux-aarch64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+  - cuda_compiler
+  - cuda_compiler_version
+  - docker_image
+- - channel_targets
+  - is_rc
+- - python
+  - numpy
diff --git a/.ci_support/linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13is_rcFalse.yaml b/.ci_support/linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargeis_rcFal_haf4427a0.yaml
similarity index 100%
rename from .ci_support/linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13is_rcFalse.yaml
rename to .ci_support/linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargeis_rcFal_haf4427a0.yaml
diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml
index 40a005a9..12ddf873 100644
--- a/.github/workflows/conda-build.yml
+++ b/.github/workflows/conda-build.yml
@@ -21,50 +21,80 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - CONFIG: linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13is_rcFalse
+          - CONFIG: linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_h2ab6b548
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implgenericc_compiler_hb59e174a3e', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-cpu-2xlarge--${{ github.run_id }}-linux_64_blas_implgenericc_compiler_vers_h2ab6b548', 'linux', 'x64', 'self-hosted']
+            DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
+          - CONFIG: linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargeis_h6173e25b
+            UPLOAD_PACKAGES: True
+            os: ubuntu
+            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implgenericc_compiler_vers_h6173e25b', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
             CONDA_FORGE_DOCKER_RUN_ARGS: "--gpus all"
-          - CONFIG: linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13is_rcFalse
+          - CONFIG: linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xla_hf94d7e11
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implgenericc_compiler_h5e2cde8be1', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-cpu-2xlarge--${{ github.run_id }}-linux_64_blas_implgenericc_compiler_vers_hf94d7e11', 'linux', 'x64', 'self-hosted']
+            DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
+          - CONFIG: linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xla_h2b83127f
+            UPLOAD_PACKAGES: True
+            os: ubuntu
+            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implgenericc_compiler_vers_h2b83127f', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
             CONDA_FORGE_DOCKER_RUN_ARGS: "--gpus all"
-          - CONFIG: linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13is_rcFalse
+          - CONFIG: linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_rcF_he048d8d8
+            UPLOAD_PACKAGES: True
+            os: ubuntu
+            runs_on: ['cirun-openstack-cpu-2xlarge--${{ github.run_id }}-linux_64_blas_implmklc_compiler_version1_he048d8d8', 'linux', 'x64', 'self-hosted']
+            DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
+          - CONFIG: linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargeis_rcF_h3782e28d
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implmklc_compiler_ver_h0b96eb68c6', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implmklc_compiler_version1_h3782e28d', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
             CONDA_FORGE_DOCKER_RUN_ARGS: "--gpus all"
-          - CONFIG: linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13is_rcFalse
+          - CONFIG: linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargei_h9d03fa1b
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implmklc_compiler_ver_hc39dedf959', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-cpu-2xlarge--${{ github.run_id }}-linux_64_blas_implmklc_compiler_version1_h9d03fa1b', 'linux', 'x64', 'self-hosted']
+            DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
+          - CONFIG: linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargei_h0e8a3be7
+            UPLOAD_PACKAGES: True
+            os: ubuntu
+            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implmklc_compiler_version1_h0e8a3be7', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
             CONDA_FORGE_DOCKER_RUN_ARGS: "--gpus all"
-          - CONFIG: linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13is_rcFalse
+          - CONFIG: linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_rcFalse_hb5deabbb
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_aarch64_c_compiler_version13c_h352ed12235', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-cpu-2xlarge--${{ github.run_id }}-linux_aarch64_c_compiler_version13channe_hb5deabbb', 'linux', 'x64', 'self-hosted']
+            DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
+          - CONFIG: linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargeis_rcFalse_h95f6c8a0
+            UPLOAD_PACKAGES: True
+            os: ubuntu
+            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_aarch64_c_compiler_version13channe_h95f6c8a0', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
             CONDA_FORGE_DOCKER_RUN_ARGS: "--gpus all"
-          - CONFIG: linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13is_rcFalse
+          - CONFIG: linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_rcFal_h83318869
+            UPLOAD_PACKAGES: True
+            os: ubuntu
+            runs_on: ['cirun-openstack-cpu-2xlarge--${{ github.run_id }}-linux_aarch64_c_compiler_version13channe_h83318869', 'linux', 'x64', 'self-hosted']
+            DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
+          - CONFIG: linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargeis_rcFal_haf4427a0
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_aarch64_c_compiler_version13c_h94771c6c09', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_aarch64_c_compiler_version13channe_haf4427a0', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
             CONDA_FORGE_DOCKER_RUN_ARGS: "--gpus all"
           - CONFIG: win_64_channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNoneis_rcFalse
             UPLOAD_PACKAGES: True
             os: windows
-            runs_on: ['cirun-azure-windows-2xlarge--${{ github.run_id }}-win_64_channel_targetsconda-forge_m_h0b759d4e63', 'windows', 'x64', 'self-hosted']
+            runs_on: ['cirun-azure-windows-2xlarge--${{ github.run_id }}-win_64_channel_targetsconda-forge_maincu_h0b759d4e', 'windows', 'x64', 'self-hosted']
           - CONFIG: win_64_channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6is_rcFalse
             UPLOAD_PACKAGES: True
             os: windows
-            runs_on: ['cirun-azure-windows-2xlarge--${{ github.run_id }}-win_64_channel_targetsconda-forge_m_h790f30616b', 'windows', 'x64', 'self-hosted']
+            runs_on: ['cirun-azure-windows-2xlarge--${{ github.run_id }}-win_64_channel_targetsconda-forge_maincu_h790f3061', 'windows', 'x64', 'self-hosted']
     steps:
 
     - name: Checkout code
diff --git a/README.md b/README.md
index 22e879dc..c0e4e5e2 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,14 @@ Summary: PyTorch is an optimized tensor library for deep learning using GPUs and
 
 Development: https://github.com/pytorch/pytorch
 
+Documentation: https://pytorch.org/docs/
+
+PyTorch is a Python package that provides two high-level features:
+  - Tensor computation (like NumPy) with strong GPU acceleration
+  - Deep neural networks built on a tape-based autograd system
+You can reuse your favorite Python packages such as NumPy, SciPy, and Cython to extend PyTorch when needed.
+
+
 Current build status
 ====================
 
@@ -29,45 +37,87 @@ Current build status
         
           | Variant | Status | 
|---|
           
-              | linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13is_rcFalse+ | linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_h2ab6b548+ | +                
+ + +                
+ | 
+              | linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargeis_h6173e25b+ | +                
+ + +                
+ | 
+              | linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xla_hf94d7e11+ | +                
+ + +                
+ | 
+              | linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xla_h2b83127f+ | +                
+ + +                
+ | 
+              | linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_rcF_he048d8d8+ | +                
+ + +                
+ | 
+              | linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargeis_rcF_h3782e28d+ | +                
+ + +                
+ | 
+              | linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargei_h9d03fa1b | -  +   | 
-              | linux_64_blas_implgenericc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13is_rcFalse+ | linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargei_h0e8a3be7 | -  +   | 
-              | linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13is_rcFalse+ | linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_rcFalse_hb5deabbb | -  +   | 
-              | linux_64_blas_implmklc_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13is_rcFalse+ | linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargeis_rcFalse_h95f6c8a0 | -  +   | 
-              | linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13is_rcFalse+ | linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-cpu-2xlargeis_rcFal_h83318869 | -  +   | 
-              | linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13is_rcFalse+ | linux_aarch64_c_compiler_version13channel_targetsconda-forge_maincuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13github_actions_labelscirun-openstack-gpu-2xlargeis_rcFal_haf4427a0 | -  +   | 
diff --git a/recipe/bld.bat b/recipe/bld.bat
index e4d0bae5..c4775d00 100644
--- a/recipe/bld.bat
+++ b/recipe/bld.bat
@@ -7,6 +7,12 @@ if EXIST pyproject.toml (
   if %ERRORLEVEL% neq 0 exit 1
 )
 
+@REM The PyTorch test suite includes some symlinks, which aren't resolved on Windows, leading to packaging errors.
+@REM ATTN! These change and have to be updated manually, often with each release.
+@REM (no current symlinks being packaged. Leaving this information here as it took some months to find the issue. Look out
+@REM for a failure with error message: "conda_package_handling.exceptions.ArchiveCreationError:  Cannot stat
+@REM while writing file")
+
 set PYTORCH_BUILD_VERSION=%PKG_VERSION%
 @REM Always pass 0 to avoid appending ".post" to version string.
 @REM https://github.com/conda-forge/pytorch-cpu-feedstock/issues/315
@@ -97,6 +103,10 @@ if not "%cuda_compiler_version%" == "None" (
 
 set DISTUTILS_USE_SDK=1
 
+@REM Use our Pybind11, Eigen
+set USE_SYSTEM_PYBIND11=1
+set USE_SYSTEM_EIGEN_INSTALL=1
+
 set CMAKE_INCLUDE_PATH=%LIBRARY_PREFIX%\include
 set LIB=%LIBRARY_PREFIX%\lib;%LIB%
 
@@ -183,13 +193,6 @@ if "%PKG_NAME%" == "libtorch" (
     pushd torch-%PKG_VERSION%
     if %ERRORLEVEL% neq 0 exit 1
 
-    @REM Do not package `fmt.lib` (and its metadata); delete it before the move into
-    @REM %LIBRARY_BIN% because it may exist in host before installation already
-    del torch\lib\fmt.lib torch\lib\pkgconfig\fmt.pc
-    if %ERRORLEVEL% neq 0 exit 1
-    @REM also delete rest of fmt metadata
-    rmdir /s /q torch\lib\cmake\fmt
-
     @REM Move the binaries into the packages site-package directory
     @REM the only content of torch\bin, {asmjit,fbgemm}.dll, also exists in torch\lib
     robocopy /NP /NFL /NDL /NJH /E torch\lib\ %LIBRARY_BIN%\ torch*.dll c10.dll shm.dll asmjit.dll fbgemm.dll
diff --git a/recipe/build.sh b/recipe/build.sh
index 57044b09..2ed2f511 100644
--- a/recipe/build.sh
+++ b/recipe/build.sh
@@ -4,6 +4,10 @@ echo "=== Building ${PKG_NAME} (py: ${PY_VER}) ==="
 
 set -ex
 
+echo "####################################################################"
+echo "Building PyTorch using BLAS implementation: $blas_impl              "
+echo "####################################################################"
+
 # This is used to detect if it's in the process of building pytorch
 export IN_PYTORCH_BUILD=1
 
@@ -20,9 +24,21 @@ rm -rf pyproject.toml
 export USE_CUFILE=0
 export USE_NUMA=0
 export USE_ITT=0
+
+#################### ADJUST COMPILER AND LINKER FLAGS #####################
+# Pytorch's build system doesn't like us setting the c++ standard and will
+# issue a warning:
+# https://github.com/pytorch/pytorch/blob/3beb7006dd5a415dfa236081ad5d55ae38346324/CMakeLists.txt#L41
+export CXXFLAGS="$(echo $CXXFLAGS | sed 's/-std=c++[0-9][0-9]//g')"
+# The below three lines expose symbols that would otherwise be hidden or
+# optimised away. They were here before, so removing them would potentially
+# break users' programs
 export CFLAGS="$(echo $CFLAGS | sed 's/-fvisibility-inlines-hidden//g')"
 export CXXFLAGS="$(echo $CXXFLAGS | sed 's/-fvisibility-inlines-hidden//g')"
 export LDFLAGS="$(echo $LDFLAGS | sed 's/-Wl,--as-needed//g')"
+# The default conda LDFLAGs include -Wl,-dead_strip_dylibs, which removes all the
+# MKL sequential, core, etc. libraries, resulting in a "Symbol not found: _mkl_blas_caxpy"
+# error on osx-64.
 export LDFLAGS="$(echo $LDFLAGS | sed 's/-Wl,-dead_strip_dylibs//g')"
 export LDFLAGS_LD="$(echo $LDFLAGS_LD | sed 's/-dead_strip_dylibs//g')"
 if [[ "$c_compiler" == "clang" ]]; then
@@ -45,6 +61,7 @@ fi
 # can be imported on system without a GPU
 LDFLAGS="${LDFLAGS//-Wl,-z,now/-Wl,-z,lazy}"
 
+################ CONFIGURE CMAKE FOR CONDA ENVIRONMENT ###################
 export CMAKE_GENERATOR=Ninja
 export CMAKE_LIBRARY_PATH=$PREFIX/lib:$PREFIX/include:$CMAKE_LIBRARY_PATH
 export CMAKE_PREFIX_PATH=$PREFIX
@@ -73,6 +90,8 @@ export USE_SYSTEM_SLEEF=1
 # use our protobuf
 export BUILD_CUSTOM_PROTOBUF=OFF
 rm -rf $PREFIX/bin/protoc
+export USE_SYSTEM_PYBIND11=1
+export USE_SYSTEM_EIGEN_INSTALL=1
 
 # prevent six from being downloaded
 > third_party/NNPACK/cmake/DownloadSix.cmake
@@ -98,16 +117,23 @@ if [[ "${CI}" == "github_actions" ]]; then
     # reduce parallelism to avoid getting OOM-killed on
     # cirun-openstack-gpu-2xlarge, which has 32GB RAM, 8 CPUs
     export MAX_JOBS=4
-else
+elif [[ "${CI}" == "azure" ]]; then
     export MAX_JOBS=${CPU_COUNT}
+else
+    # Leave a spare core for other tasks, per common practice.
+    # Reducing further can help with out-of-memory errors.
+    export MAX_JOBS=$((CPU_COUNT > 1 ? CPU_COUNT - 1 : 1))
 fi
 
 if [[ "$blas_impl" == "generic" ]]; then
     # Fake openblas
     export BLAS=OpenBLAS
     export OpenBLAS_HOME=${PREFIX}
-else
+elif [[ "$blas_impl" == "mkl" ]]; then
     export BLAS=MKL
+else
+    echo "[ERROR] Unsupported BLAS implementation '${blas_impl}'" >&2
+    exit 1
 fi
 
 if [[ "$PKG_NAME" == "pytorch" ]]; then
@@ -163,12 +189,14 @@ elif [[ ${cuda_compiler_version} != "None" ]]; then
             echo "unknown CUDA arch, edit build.sh"
             exit 1
     esac
+
+    # Compatibility matrix for update: https://en.wikipedia.org/wiki/CUDA#GPUs_supported
     case ${cuda_compiler_version} in
-        12.6)
-            export TORCH_CUDA_ARCH_LIST="5.0;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0+PTX"
+        12.[0-6])
+            export CMAKE_ARGS="${CMAKE_ARGS} -DTORCH_CUDA_ARCH_LIST=5.0;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0+PTX"
             ;;
         *)
-            echo "unsupported cuda version. edit build.sh"
+            echo "No CUDA architecture list exists for CUDA v${cuda_compiler_version}. See build.sh for information on adding one."
             exit 1
     esac
     export TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
@@ -203,7 +231,8 @@ case ${PKG_NAME} in
 
     mv build/lib.*/torch/bin/* ${PREFIX}/bin/
     mv build/lib.*/torch/lib/* ${PREFIX}/lib/
-    mv build/lib.*/torch/share/* ${PREFIX}/share/
+    # need to merge these now because we're using system pybind11, meaning the destination directory is not empty
+    rsync -a build/lib.*/torch/share/* ${PREFIX}/share/
     mv build/lib.*/torch/include/{ATen,caffe2,tensorpipe,torch,c10} ${PREFIX}/include/
     rm ${PREFIX}/lib/libtorch_python.*
 
@@ -211,7 +240,7 @@ case ${PKG_NAME} in
     cp build/CMakeCache.txt build/CMakeCache.txt.orig
     ;;
   pytorch)
-    $PREFIX/bin/python -m pip install . --no-deps -vvv --no-clean \
+    $PREFIX/bin/python -m pip install . --no-deps --no-build-isolation -vvv --no-clean \
         | sed "s,${CXX},\$\{CXX\},g" \
         | sed "s,${PREFIX},\$\{PREFIX\},g"
     # Keep this in ${PREFIX}/lib so that the library can be found by
diff --git a/recipe/conda_build_config.yaml b/recipe/conda_build_config.yaml
index a0c8d801..a7430233 100644
--- a/recipe/conda_build_config.yaml
+++ b/recipe/conda_build_config.yaml
@@ -23,6 +23,7 @@ blas_impl:
 # https://github.com/conda-forge/.cirun
 github_actions_labels:          # [linux or win]
 - cirun-openstack-gpu-2xlarge   # [linux]
+- cirun-openstack-cpu-2xlarge   # [linux]
 - cirun-azure-windows-2xlarge   # [win]
 
 megabuild:
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index d5fc48f5..60e0945e 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -1,7 +1,10 @@
 # if you wish to build release candidate number X, append the version string with ".rcX"
 {% set version = "2.5.1" %}
-{% set build = 10 %}
+{% set build = 11 %}
 
+# Use a higher build number for the CUDA variant, to ensure that it's
+# preferred by conda's solver, and it's preferentially
+# installed where the platform supports it.
 {% if cuda_compiler_version != "None" %}
 {% set build = build + 200 %}
 {% endif %}
@@ -64,6 +67,7 @@ source:
     - patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch
     # point to headers that are now living in $PREFIX/include instead of $SP_DIR/torch/include
     - patches/0016-point-include-paths-to-PREFIX-include.patch
+    - patches/0017-Add-conda-prefix-to-inductor-include-paths.patch
 
 build:
   number: {{ build }}
@@ -77,6 +81,10 @@ build:
 {% else %}
   skip: true  # [is_rc]
 {% endif %}
+  # CUDA builds on GPU agents, non-CUDA builds on CPU agents; these gymnastics would be avoidable with
+  # https://github.com/conda-forge/conda-forge-pinning-feedstock/pull/6910, but that didn't find consensus
+  skip: true  # [cuda_compiler_version == "None" and github_actions_labels == "cirun-openstack-gpu-2xlarge"]
+  skip: true  # [cuda_compiler_version != "None" and github_actions_labels == "cirun-openstack-cpu-2xlarge"]
   string: cuda{{ cuda_compiler_version | replace('.', '') }}_{{ blas_impl }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}  # [cuda_compiler_version != "None"]
   string: cpu_{{ blas_impl }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}                                                 # [cuda_compiler_version == "None"]
   detect_binary_files_with_prefix: false
@@ -117,6 +125,7 @@ requirements:
     - protobuf
     - make      # [linux]
     - sccache   # [win]
+    - rsync     # [unix]
   host:
     # GPU requirements
     - cudnn                           # [cuda_compiler_version != "None"]
@@ -167,6 +176,8 @@ requirements:
     - libuv
     - pkg-config  # [unix]
     - typing_extensions
+    - pybind11
+    - eigen
   run:
     # GPU requirements without run_exports
     - {{ pin_compatible('cudnn') }}                       # [cuda_compiler_version != "None"]
@@ -299,6 +310,8 @@ outputs:
         - pkg-config  # [unix]
         - typing_extensions
         - {{ pin_subpackage('libtorch', exact=True) }}
+        - pybind11
+        - eigen
       run:
         - llvm-openmp    # [osx]
         - intel-openmp {{ mkl }}  # [win]
@@ -314,6 +327,7 @@ outputs:
         - filelock
         - jinja2
         - networkx
+        - pybind11
         - nomkl                 # [blas_impl != "mkl"]
         - fsspec
         # avoid that people without GPUs needlessly download ~0.5-1GB
@@ -360,6 +374,7 @@ outputs:
         # tools/ is needed to optimise test run
         # as of pytorch=2.0.0, there is a bug when trying to run tests without the tools
         - tools
+        #- .ci/pytorch/smoke_test/smoke_test.py
       commands:
         # Run pip check so as to ensure that all pytorch packages are installed
         # https://github.com/conda-forge/pytorch-cpu-feedstock/issues/24
@@ -367,6 +382,15 @@ outputs:
         - python -c "import torch; print(torch.__version__)"
         - python -c "import torch; assert torch.backends.mkldnn.m.is_available()"  # [x86 and cuda_compiler_version == "None"]
         - python -c "import torch; torch.tensor(1).to('cpu').numpy(); print('numpy support enabled!!!')"
+        # We have had issues with openmp .dylibs being doubly loaded in certain cases. These two tests catch the (observed) issue
+        - python -c "import torch; import numpy"
+        - python -c "import numpy; import torch"
+        # distributed support is enabled by default on linux; for mac, we enable it manually in build.sh
+        - python -c "import torch; assert torch.distributed.is_available()"        # [linux or osx]
+        - python -c "import torch; assert torch.backends.cuda.is_built()"          # [(cuda_compiler_version != "None")]
+        - python -c "import torch; assert torch.backends.cudnn.is_available()"     # [(cuda_compiler_version != "None")]
+        - python -c "import torch; assert torch.cuda.is_available()"               # [(cuda_compiler_version != "None")]
+        - python -c "import torch; assert torch.backends.cudnn.enabled"            # [(cuda_compiler_version != "None")]
         # At conda-forge, we target versions of OSX that are too old for MPS support
         # But if users install a newer version of OSX, they will have MPS support
         # https://github.com/conda-forge/pytorch-cpu-feedstock/pull/123#issuecomment-1186355073
@@ -377,8 +401,32 @@ outputs:
         - if not exist %LIBRARY_BIN%\torch_python.dll exit 1  # [win]
         - if not exist %LIBRARY_LIB%\torch_python.lib exit 1  # [win]
 
+        # See here for environment variables needed by the smoke test script
+        # https://github.com/pytorch/pytorch/blob/266fd35c5842902f6304aa8e7713b252cbfb243c/.ci/pytorch/smoke_test/smoke_test.py#L16
+        - set MATRIX_GPU_ARCH_VERSION="{{ '.'.join((cuda_compiler_version or "").split('.')[:2]) }}"   # [(cuda_compiler_version != "None") and (win)]
+        - set MATRIX_GPU_ARCH_TYPE="cuda"                                                       # [(cuda_compiler_version != "None") and (win)]
+        - set MATRIX_GPU_ARCH_VERSION="none"                                                    # [(cuda_compiler_version == "None") and (win)]
+        - set MATRIX_GPU_ARCH_TYPE="none"                                                       # [(cuda_compiler_version == "None") and (win)]
+        - set MATRIX_CHANNEL="defaults"                                                         # [win]
+        - set MATRIX_STABLE_VERSION={{ version }}                                               # [win]
+        - set MATRIX_PACKAGE_TYPE="conda"                                                       # [win]
+        - set TARGET_OS="windows"                                                               # [win]
+        - set OMP_NUM_THREADS=4                                                                 # [win]
+        - export MATRIX_GPU_ARCH_VERSION="{{ '.'.join((cuda_compiler_version or "").split('.')[:2]) }}"  # [(cuda_compiler_version != "None") and (linux and x86_64)]
+        - export MATRIX_GPU_ARCH_TYPE="cuda"                                                    # [(cuda_compiler_version != "None") and (linux and x86_64)]
+        - export MATRIX_GPU_ARCH_VERSION="none"                                                 # [(cuda_compiler_version == "None") and (not win)]
+        - export MATRIX_GPU_ARCH_TYPE="none"                                                    # [(cuda_compiler_version == "None") and (not win)]
+        - export MATRIX_CHANNEL="defaults"                                                      # [not win]
+        - export MATRIX_STABLE_VERSION="{{ version }}"                                          # [not win]
+        - export MATRIX_PACKAGE_TYPE="conda"                                                    # [not win]
+        - export TARGET_OS="linux"                                                              # [linux]
+        - export TARGET_OS="macos-arm64"                                                        # [(osx and arm64)]
+        - export TARGET_OS="macos-x86_64"                                                       # [(osx and x86_64)]
+        - export OMP_NUM_THREADS=4                                                              # [not win]
+        #- python ./smoke_test/smoke_test.py --package torchonly
+
         # a reasonably safe subset of tests that should run under 15 minutes
-        # disable hypothesis because it randomly yields health check errors
+        # The inductor tests test torch.compile
         {% set tests = " ".join([
             "test/test_autograd.py",
             "test/test_autograd_fallback.py",
@@ -389,8 +437,7 @@ outputs:
             "test/test_nn.py",
             "test/test_torch.py",
             "test/test_xnnpack_integration.py",
-            "-m \"not hypothesis\"",
-        ]) %}
+        ] + (cuda_compiler_version != "None") * ["test/inductor/test_torchinductor.py"]) %}
 
         {% set skips = "(TestTorch and test_print)" %}
         # tolerance violation with openblas
@@ -438,8 +485,9 @@ outputs:
         # for potential packaging problems by running a fixed subset
         - export OMP_NUM_THREADS=4  # [unix]
         # reduced paralellism to avoid OOM; test only one python version on aarch because emulation is super-slow
-        - python -m pytest -n 2 {{ tests }} -k "not ({{ skips }})" --durations=50   # [unix and (not aarch64 or py==312)]
-        - python -m pytest -v -s {{ tests }} -k "not ({{ skips }})" --durations=50  # [win]
+        # disable hypothesis because it randomly yields health check errors
+        - python -m pytest -n 2 {{ tests }} -k "not ({{ skips }})" -m "not hypothesis" --durations=50   # [unix and (not aarch64 or py==312)]
+        - python -m pytest -v -s {{ tests }} -k "not ({{ skips }})" -m "not hypothesis" --durations=50  # [win]
 
         # regression test for https://github.com/conda-forge/pytorch-cpu-feedstock/issues/329, where we picked up
         # duplicate `.pyc` files due to newest py-ver (3.13) in the build environment not matching the one in host;
@@ -479,8 +527,13 @@ about:
   license_file:
     - LICENSE
     - NOTICE
-    - third_party/pybind11/LICENSE
   summary: PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.
+  description: |
+    PyTorch is a Python package that provides two high-level features:
+      - Tensor computation (like NumPy) with strong GPU acceleration
+      - Deep neural networks built on a tape-based autograd system
+    You can reuse your favorite Python packages such as NumPy, SciPy, and Cython to extend PyTorch when needed.
+  doc_url: https://pytorch.org/docs/
 
 extra:
   recipe-maintainers:
diff --git a/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch b/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch
index 4ce6492a..9261e825 100644
--- a/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch
+++ b/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch
@@ -1,14 +1,14 @@
-From 756045fca376345e48afb6a868b502dbfa0c584c Mon Sep 17 00:00:00 2001
+From f3a0f9aab6dce56eea590b946f60256014b61bf7 Mon Sep 17 00:00:00 2001
 From: Mark Harfouche 
 Date: Sun, 1 Sep 2024 17:35:40 -0400
-Subject: [PATCH 01/16] Force usage of python 3 and error without numpy
+Subject: [PATCH 01/17] Force usage of python 3 and error without numpy
 
 ---
  cmake/Dependencies.cmake | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)
 
 diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index e78305e0a..15c625486 100644
+index e78305e0a8e..15c62548601 100644
 --- a/cmake/Dependencies.cmake
 +++ b/cmake/Dependencies.cmake
 @@ -861,9 +861,9 @@ if(BUILD_PYTHON)
@@ -32,6 +32,3 @@ index e78305e0a..15c625486 100644
          caffe2_update_option(USE_NUMPY OFF)
        else()
          caffe2_update_option(USE_NUMPY ON)
--- 
-2.48.1
-
diff --git a/recipe/patches/0002-Help-find-numpy.patch b/recipe/patches/0002-Help-find-numpy.patch
index 6f3fa2c3..ff94df90 100644
--- a/recipe/patches/0002-Help-find-numpy.patch
+++ b/recipe/patches/0002-Help-find-numpy.patch
@@ -1,14 +1,14 @@
-From 70661ad52cb2f0290de3e0758f240560e4b1e769 Mon Sep 17 00:00:00 2001
+From 21c30036b5b86f403c0cf4426165d9a6a50edb1a Mon Sep 17 00:00:00 2001
 From: Mark Harfouche 
 Date: Tue, 1 Oct 2024 00:28:40 -0400
-Subject: [PATCH 02/16] Help find numpy
+Subject: [PATCH 02/17] Help find numpy
 
 ---
  tools/setup_helpers/cmake.py | 6 ++++++
  1 file changed, 6 insertions(+)
 
 diff --git a/tools/setup_helpers/cmake.py b/tools/setup_helpers/cmake.py
-index 4b605fe59..bde41323c 100644
+index 4b605fe5975..bde41323c76 100644
 --- a/tools/setup_helpers/cmake.py
 +++ b/tools/setup_helpers/cmake.py
 @@ -305,9 +305,15 @@ class CMake:
@@ -27,6 +27,3 @@ index 4b605fe59..bde41323c 100644
              TORCH_BUILD_VERSION=version,
              **build_options,
          )
--- 
-2.48.1
-
diff --git a/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch b/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch
index af8662e4..65101763 100644
--- a/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch
+++ b/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch
@@ -1,7 +1,7 @@
-From 4ae61d17c81e9d66e091c2790ac6deae6bf31204 Mon Sep 17 00:00:00 2001
+From d1826af525db41eda5020a1404f5d5521d67a5dc Mon Sep 17 00:00:00 2001
 From: Jeongseok Lee 
 Date: Sat, 19 Oct 2024 04:26:01 +0000
-Subject: [PATCH 03/16] Add USE_SYSTEM_NVTX option (#138287)
+Subject: [PATCH 03/17] Add USE_SYSTEM_NVTX option (#138287)
 
 ## Summary
 
@@ -21,7 +21,7 @@ Approved by: https://github.com/albanD
  3 files changed, 22 insertions(+), 2 deletions(-)
 
 diff --git a/CMakeLists.txt b/CMakeLists.txt
-index 98593c2de..ae3c3f2cb 100644
+index 98593c2de97..ae3c3f2cbd5 100644
 --- a/CMakeLists.txt
 +++ b/CMakeLists.txt
 @@ -470,6 +470,7 @@ option(USE_SYSTEM_FXDIV "Use system-provided fxdiv." OFF)
@@ -41,7 +41,7 @@ index 98593c2de..ae3c3f2cb 100644
  
  # /Z7 override option When generating debug symbols, CMake default to use the
 diff --git a/cmake/public/cuda.cmake b/cmake/public/cuda.cmake
-index afc1bc12a..152fbdbe6 100644
+index afc1bc12abf..152fbdbe6dd 100644
 --- a/cmake/public/cuda.cmake
 +++ b/cmake/public/cuda.cmake
 @@ -170,7 +170,11 @@ else()
@@ -58,7 +58,7 @@ index afc1bc12a..152fbdbe6 100644
  if(nvtx3_FOUND)
    add_library(torch::nvtx3 INTERFACE IMPORTED)
 diff --git a/setup.py b/setup.py
-index 2b0cfa99d..7174777ed 100644
+index 2b0cfa99d71..7174777ed4e 100644
 --- a/setup.py
 +++ b/setup.py
 @@ -183,7 +183,21 @@
@@ -84,6 +84,3 @@ index 2b0cfa99d..7174777ed 100644
  #
  #   USE_MIMALLOC
  #      Static link mimalloc into C10, and use mimalloc in alloc_cpu & alloc_free.
--- 
-2.48.1
-
diff --git a/recipe/patches/0004-Update-sympy-version.patch b/recipe/patches/0004-Update-sympy-version.patch
index 5dd72f7c..eda58511 100644
--- a/recipe/patches/0004-Update-sympy-version.patch
+++ b/recipe/patches/0004-Update-sympy-version.patch
@@ -1,14 +1,14 @@
-From 2c6db02c01ad080c8dc8ae0b78be2b93099c2ac8 Mon Sep 17 00:00:00 2001
+From e3219c5fe8834753b0cf9e92be4d1ef1e874f370 Mon Sep 17 00:00:00 2001
 From: Jeongseok Lee 
 Date: Thu, 17 Oct 2024 15:04:05 -0700
-Subject: [PATCH 04/16] Update sympy version
+Subject: [PATCH 04/17] Update sympy version
 
 ---
  setup.py | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/setup.py b/setup.py
-index 7174777ed..65be34e39 100644
+index 7174777ed4e..65be34e39b1 100644
 --- a/setup.py
 +++ b/setup.py
 @@ -1158,7 +1158,7 @@ def main():
@@ -20,6 +20,3 @@ index 7174777ed..65be34e39 100644
          "networkx",
          "jinja2",
          "fsspec",
--- 
-2.48.1
-
diff --git a/recipe/patches/0005-Fix-duplicate-linker-script.patch b/recipe/patches/0005-Fix-duplicate-linker-script.patch
index 7cc82435..6ab800fe 100644
--- a/recipe/patches/0005-Fix-duplicate-linker-script.patch
+++ b/recipe/patches/0005-Fix-duplicate-linker-script.patch
@@ -1,14 +1,14 @@
-From fa5bb8f1acd0195efadc35c8fbb9199be92932d9 Mon Sep 17 00:00:00 2001
+From 08a1f44fbc81324aa98d720dfb7b87a261923ac2 Mon Sep 17 00:00:00 2001
 From: Jeongseok Lee 
 Date: Sun, 3 Nov 2024 01:12:36 -0700
-Subject: [PATCH 05/16] Fix duplicate linker script
+Subject: [PATCH 05/17] Fix duplicate linker script
 
 ---
  setup.py | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)
 
 diff --git a/setup.py b/setup.py
-index 65be34e39..b0e01e0d1 100644
+index 65be34e39b1..b0e01e0d1ee 100644
 --- a/setup.py
 +++ b/setup.py
 @@ -1184,7 +1184,9 @@ def main():
@@ -22,6 +22,3 @@ index 65be34e39..b0e01e0d1 100644
          os.environ["CFLAGS"] = (
              os.getenv("CFLAGS", "") + " -ffunction-sections -fdata-sections"
          )
--- 
-2.48.1
-
diff --git a/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch b/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch
index cddb8b68..ea928c2d 100644
--- a/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch
+++ b/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch
@@ -1,7 +1,7 @@
-From 6fc695312cd062e13c2482b52ae8d028bd7c043a Mon Sep 17 00:00:00 2001
+From 15df314a41c69a31c0443254d5552aa1b39d708d Mon Sep 17 00:00:00 2001
 From: William Wen 
 Date: Fri, 13 Sep 2024 13:02:33 -0700
-Subject: [PATCH 06/16] fix 3.13 pickle error in serialization.py (#136034)
+Subject: [PATCH 06/17] fix 3.13 pickle error in serialization.py (#136034)
 
 Error encountered when adding dynamo 3.13 support.
 Pull Request resolved: https://github.com/pytorch/pytorch/pull/136034
@@ -11,7 +11,7 @@ Approved by: https://github.com/albanD
  1 file changed, 12 insertions(+), 4 deletions(-)
 
 diff --git a/torch/serialization.py b/torch/serialization.py
-index d936d31d6..d937680c0 100644
+index d936d31d6f5..d937680c031 100644
 --- a/torch/serialization.py
 +++ b/torch/serialization.py
 @@ -1005,8 +1005,12 @@ def _legacy_save(obj, f, pickle_module, pickle_protocol) -> None:
@@ -44,6 +44,3 @@ index d936d31d6..d937680c0 100644
      pickler.dump(obj)
      data_value = data_buf.getvalue()
      zip_file.write_record("data.pkl", data_value, len(data_value))
--- 
-2.48.1
-
diff --git a/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch b/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch
index b847ba1a..046b0d60 100644
--- a/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch
+++ b/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch
@@ -1,7 +1,7 @@
-From d5c8df70422afa07dc212266d420f923f5887f99 Mon Sep 17 00:00:00 2001
+From 655f694854c3eafdd631235b60bc6c1b279218ed Mon Sep 17 00:00:00 2001
 From: Mark Harfouche 
 Date: Thu, 3 Oct 2024 22:49:56 -0400
-Subject: [PATCH 07/16] Allow users to overwrite ld with environment variables
+Subject: [PATCH 07/17] Allow users to overwrite ld with environment variables
 
 This should help in the case of cross compilation.
 
@@ -11,7 +11,7 @@ xref: https://github.com/conda-forge/pytorch-cpu-feedstock/pull/261
  1 file changed, 3 insertions(+), 2 deletions(-)
 
 diff --git a/tools/setup_helpers/generate_linker_script.py b/tools/setup_helpers/generate_linker_script.py
-index 11c397a9e..e66fc1970 100644
+index 11c397a9e5f..e66fc197062 100644
 --- a/tools/setup_helpers/generate_linker_script.py
 +++ b/tools/setup_helpers/generate_linker_script.py
 @@ -1,3 +1,4 @@
@@ -30,6 +30,3 @@ index 11c397a9e..e66fc1970 100644
          "\n"
      )
  
--- 
-2.48.1
-
diff --git a/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch b/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch
index 272d200c..55d92733 100644
--- a/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch
+++ b/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch
@@ -1,7 +1,7 @@
-From da7b07f8e3165bf89b08b5a716e539ae9a7afb1a Mon Sep 17 00:00:00 2001
+From f03bf82d9da9cccb2cf4d4833c1a6349622dc37d Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= 
 Date: Wed, 27 Nov 2024 13:47:23 +0100
-Subject: [PATCH 08/16] Allow overriding CUDA-related paths
+Subject: [PATCH 08/17] Allow overriding CUDA-related paths
 
 ---
  cmake/Modules/FindCUDAToolkit.cmake | 2 +-
@@ -9,7 +9,7 @@ Subject: [PATCH 08/16] Allow overriding CUDA-related paths
  2 files changed, 2 insertions(+), 2 deletions(-)
 
 diff --git a/cmake/Modules/FindCUDAToolkit.cmake b/cmake/Modules/FindCUDAToolkit.cmake
-index ec9ae530a..b7c0bd9fc 100644
+index ec9ae530aa6..b7c0bd9fc51 100644
 --- a/cmake/Modules/FindCUDAToolkit.cmake
 +++ b/cmake/Modules/FindCUDAToolkit.cmake
 @@ -497,7 +497,7 @@ Result variables
@@ -22,7 +22,7 @@ index ec9ae530a..b7c0bd9fc 100644
    set(CUDAToolkit_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_LIBRARY_ROOT}")
    set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_TOOLKIT_VERSION}")
 diff --git a/tools/setup_helpers/cmake.py b/tools/setup_helpers/cmake.py
-index bde41323c..b171837cd 100644
+index bde41323c76..b171837cd4a 100644
 --- a/tools/setup_helpers/cmake.py
 +++ b/tools/setup_helpers/cmake.py
 @@ -252,7 +252,7 @@ class CMake:
@@ -34,6 +34,3 @@ index bde41323c..b171837cd 100644
                  ("EXITCODE", "EXITCODE__TRYRUN_OUTPUT")
              ):
                  build_options[var] = val
--- 
-2.48.1
-
diff --git a/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch b/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch
index e1befef6..f9eef836 100644
--- a/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch
+++ b/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch
@@ -1,7 +1,7 @@
-From 3429795de33cac2e508397dd2d9f5f5c96f185c3 Mon Sep 17 00:00:00 2001
+From 4b1faf6ba142953ce2730766db44f8d98d161ef0 Mon Sep 17 00:00:00 2001
 From: Haifeng Jin 
 Date: Tue, 1 Oct 2024 07:53:24 +0000
-Subject: [PATCH 09/16] Fix test/test_linalg.py for NumPy 2 (#136800)
+Subject: [PATCH 09/17] Fix test/test_linalg.py for NumPy 2 (#136800)
 
 Related to  #107302.
 
@@ -36,7 +36,7 @@ Approved by: https://github.com/lezcano
  1 file changed, 12 insertions(+), 3 deletions(-)
 
 diff --git a/test/test_linalg.py b/test/test_linalg.py
-index e9ec874d6..060bccef2 100644
+index e9ec874d695..060bccef2e5 100644
 --- a/test/test_linalg.py
 +++ b/test/test_linalg.py
 @@ -2351,7 +2351,7 @@ class TestLinalg(TestCase):
@@ -75,6 +75,3 @@ index e9ec874d6..060bccef2 100644
                      reflectors_i[:] = reflectors_tmp.T
                  reflectors = reflectors.view(*A_cpu.shape)
                  tau = tau.view(tau_shape)
--- 
-2.48.1
-
diff --git a/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch b/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch
index bd5aa553..f8a36093 100644
--- a/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch
+++ b/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch
@@ -1,7 +1,7 @@
-From a8ddbe6b682347fdc86c5052b244df4f95b926ac Mon Sep 17 00:00:00 2001
+From 032b9be9ca7f9ae174e75554cecc82600ea3ef54 Mon Sep 17 00:00:00 2001
 From: Haifeng Jin 
 Date: Sat, 12 Oct 2024 02:40:17 +0000
-Subject: [PATCH 10/16] Fixes NumPy 2 test failures in test_torch.py (#137740)
+Subject: [PATCH 10/17] Fixes NumPy 2 test failures in test_torch.py (#137740)
 
 Related to #107302
 
@@ -24,7 +24,7 @@ Approved by: https://github.com/ezyang
  1 file changed, 4 insertions(+), 4 deletions(-)
 
 diff --git a/test/test_torch.py b/test/test_torch.py
-index be4d61808..c6fd6ac9f 100644
+index be4d6180819..c6fd6ac9f19 100644
 --- a/test/test_torch.py
 +++ b/test/test_torch.py
 @@ -2891,7 +2891,7 @@ else:
@@ -58,6 +58,3 @@ index be4d61808..c6fd6ac9f 100644
          )
  
      @skipIfTorchDynamo("np.float64 restored as float32 after graph break.")
--- 
-2.48.1
-
diff --git a/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch b/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
index 2d9b1995..24fb6b74 100644
--- a/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
+++ b/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
@@ -1,7 +1,7 @@
-From 113c9ebec11cba2f1d43bfd4ac03eb02c5c921a8 Mon Sep 17 00:00:00 2001
+From 56f1528fa072023fb2724d5abf8790f2f6cc3aaa Mon Sep 17 00:00:00 2001
 From: Isuru Fernando 
 Date: Wed, 18 Dec 2024 03:59:00 +0000
-Subject: [PATCH 11/16] Use BLAS_USE_CBLAS_DOT for OpenBLAS builds
+Subject: [PATCH 11/17] Use BLAS_USE_CBLAS_DOT for OpenBLAS builds
 
 There are two calling conventions for *dotu functions
 
@@ -31,7 +31,7 @@ functional calls.
  1 file changed, 1 insertion(+)
 
 diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 15c625486..3965416eb 100644
+index 15c62548601..3965416eb29 100644
 --- a/cmake/Dependencies.cmake
 +++ b/cmake/Dependencies.cmake
 @@ -182,6 +182,7 @@ elseif(BLAS STREQUAL "OpenBLAS")
@@ -42,6 +42,3 @@ index 15c625486..3965416eb 100644
  elseif(BLAS STREQUAL "BLIS")
    find_package(BLIS REQUIRED)
    include_directories(SYSTEM ${BLIS_INCLUDE_DIR})
--- 
-2.48.1
-
diff --git a/recipe/patches/0012-fix-issue-142484.patch b/recipe/patches/0012-fix-issue-142484.patch
index bb4a2e6e..f0244194 100644
--- a/recipe/patches/0012-fix-issue-142484.patch
+++ b/recipe/patches/0012-fix-issue-142484.patch
@@ -1,7 +1,7 @@
-From 323bb15a6b1f601d79211bd292c26cb886a5d60e Mon Sep 17 00:00:00 2001
+From beba58d724cc1bd7ca73660b0a5ad9e61ae0c562 Mon Sep 17 00:00:00 2001
 From: "Zheng, Zhaoqiong" 
 Date: Fri, 27 Dec 2024 13:49:36 +0800
-Subject: [PATCH 12/16] fix issue 142484
+Subject: [PATCH 12/17] fix issue 142484
 
 From https://github.com/pytorch/pytorch/pull/143894
 ---
@@ -9,7 +9,7 @@ From https://github.com/pytorch/pytorch/pull/143894
  1 file changed, 11 insertions(+), 1 deletion(-)
 
 diff --git a/aten/src/ATen/native/mkl/SpectralOps.cpp b/aten/src/ATen/native/mkl/SpectralOps.cpp
-index e26cfbf6d..c61b76d32 100644
+index e26cfbf6d8e..c61b76d3205 100644
 --- a/aten/src/ATen/native/mkl/SpectralOps.cpp
 +++ b/aten/src/ATen/native/mkl/SpectralOps.cpp
 @@ -477,7 +477,17 @@ static Tensor& _exec_fft(Tensor& out, const Tensor& self, IntArrayRef out_sizes,
@@ -31,6 +31,3 @@ index e26cfbf6d..c61b76d32 100644
    auto descriptor = _plan_mkl_fft(
        input.strides(), out.strides(), signal_size, input.is_complex(),
        out.is_complex(), normalization, forward, value_type);
--- 
-2.48.1
-
diff --git a/recipe/patches/0013-Fix-FindOpenBLAS.patch b/recipe/patches/0013-Fix-FindOpenBLAS.patch
index 47e34885..dcc5d8c6 100644
--- a/recipe/patches/0013-Fix-FindOpenBLAS.patch
+++ b/recipe/patches/0013-Fix-FindOpenBLAS.patch
@@ -1,14 +1,14 @@
-From 4ca7ade3211380629ab56f3c965edd1b6387d1e0 Mon Sep 17 00:00:00 2001
+From 816a248a4425a97350959e412666e6db9012a52e Mon Sep 17 00:00:00 2001
 From: Bas Zalmstra 
 Date: Thu, 16 May 2024 10:46:49 +0200
-Subject: [PATCH 13/16] Fix FindOpenBLAS
+Subject: [PATCH 13/17] Fix FindOpenBLAS
 
 ---
  cmake/Modules/FindOpenBLAS.cmake | 15 +++++++++------
  1 file changed, 9 insertions(+), 6 deletions(-)
 
 diff --git a/cmake/Modules/FindOpenBLAS.cmake b/cmake/Modules/FindOpenBLAS.cmake
-index 69d8227ae..0d12185c7 100644
+index 69d8227aea5..0d12185c799 100644
 --- a/cmake/Modules/FindOpenBLAS.cmake
 +++ b/cmake/Modules/FindOpenBLAS.cmake
 @@ -31,22 +31,25 @@ SET(Open_BLAS_LIB_SEARCH_PATHS
@@ -43,6 +43,3 @@ index 69d8227ae..0d12185c7 100644
  
  IF (OpenBLAS_FOUND)
    IF (NOT OpenBLAS_FIND_QUIETLY)
--- 
-2.48.1
-
diff --git a/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch b/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch
index 031fce6d..e6c87bfc 100644
--- a/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch
+++ b/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch
@@ -1,7 +1,7 @@
-From 3b32a078793f06e80d88c356871953f254d4d6c3 Mon Sep 17 00:00:00 2001
+From db896f927403f55a18f931b18a6469cb4e37d322 Mon Sep 17 00:00:00 2001
 From: atalman 
 Date: Tue, 12 Nov 2024 12:28:10 +0000
-Subject: [PATCH 14/16] CD Enable Python 3.13 on windows (#138095)
+Subject: [PATCH 14/17] CD Enable Python 3.13 on windows (#138095)
 
 Adding CD windows. Part of: https://github.com/pytorch/pytorch/issues/130249
 Builder PR landed with smoke test: https://github.com/pytorch/builder/pull/2035
@@ -16,7 +16,7 @@ Cherry-pick-note: minus changes in `.github/*`
  2 files changed, 13 insertions(+), 1 deletion(-)
 
 diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp
-index 722618efb..f98818bfd 100644
+index 722618efbb0..f98818bfdcc 100644
 --- a/functorch/csrc/dim/dim.cpp
 +++ b/functorch/csrc/dim/dim.cpp
 @@ -38,6 +38,7 @@ PyObject* Dim_init() {
@@ -28,7 +28,7 @@ index 722618efb..f98818bfd 100644
  #include "internal/pycore_opcode.h"
  #undef Py_BUILD_CORE
 diff --git a/functorch/csrc/dim/dim_opcode.c b/functorch/csrc/dim/dim_opcode.c
-index 81ba62a37..1b5d06773 100644
+index 81ba62a3781..1b5d0677344 100644
 --- a/functorch/csrc/dim/dim_opcode.c
 +++ b/functorch/csrc/dim/dim_opcode.c
 @@ -1,6 +1,17 @@
@@ -50,6 +50,3 @@ index 81ba62a37..1b5d06773 100644
 +#undef NEED_OPCODE_TABLES
 +#undef Py_BUILD_CORE
 +#endif
--- 
-2.48.1
-
diff --git a/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch b/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch
index e8ff9e59..313362f8 100644
--- a/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch
+++ b/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch
@@ -1,7 +1,7 @@
-From 4465b713563855e7eb5475758226f3a90f675f55 Mon Sep 17 00:00:00 2001
+From 33790dfbf966e7d8ea4ff6798d2ff92474d84079 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" 
 Date: Thu, 23 Jan 2025 22:46:58 +1100
-Subject: [PATCH 15/16] simplify torch.utils.cpp_extension.include_paths; use
+Subject: [PATCH 15/17] simplify torch.utils.cpp_extension.include_paths; use
  it in cpp_builder
 
 The /TH headers have not existed since pytorch 1.11
@@ -11,7 +11,7 @@ The /TH headers have not existed since pytorch 1.11
  2 files changed, 3 insertions(+), 14 deletions(-)
 
 diff --git a/torch/_inductor/cpp_builder.py b/torch/_inductor/cpp_builder.py
-index 95a0bff86..860e7fb06 100644
+index 95a0bff86fd..860e7fb062f 100644
 --- a/torch/_inductor/cpp_builder.py
 +++ b/torch/_inductor/cpp_builder.py
 @@ -743,16 +743,9 @@ def _get_build_args_of_chosen_isa(vec_isa: VecISA) -> Tuple[List[str], List[str]
@@ -35,7 +35,7 @@ index 95a0bff86..860e7fb06 100644
      libraries = []
      if sys.platform != "darwin" and not config.is_fbcode():
 diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py
-index aaa45ea4c..3f584ef55 100644
+index aaa45ea4c90..3f584ef5598 100644
 --- a/torch/utils/cpp_extension.py
 +++ b/torch/utils/cpp_extension.py
 @@ -1159,10 +1159,6 @@ def include_paths(cuda: bool = False) -> List[str]:
@@ -49,6 +49,3 @@ index aaa45ea4c..3f584ef55 100644
      ]
      if cuda and IS_HIP_EXTENSION:
          paths.append(os.path.join(lib_include, 'THH'))
--- 
-2.48.1
-
diff --git a/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch b/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch
index fecf4d0f..c3392a83 100644
--- a/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch
+++ b/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch
@@ -1,14 +1,14 @@
-From 4d485fc0a5e3226e528e9dab17b184ff9835a045 Mon Sep 17 00:00:00 2001
+From 799f6fa59dac93dabbbcf72d46f4e1334e3d65d9 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" 
 Date: Thu, 23 Jan 2025 22:58:14 +1100
-Subject: [PATCH 16/16] point include paths to $PREFIX/include
+Subject: [PATCH 16/17] point include paths to $PREFIX/include
 
 ---
  torch/utils/cpp_extension.py | 9 +++++++++
  1 file changed, 9 insertions(+)
 
 diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py
-index 3f584ef55..4210f62b6 100644
+index 3f584ef5598..4210f62b6db 100644
 --- a/torch/utils/cpp_extension.py
 +++ b/torch/utils/cpp_extension.py
 @@ -1155,10 +1155,19 @@ def include_paths(cuda: bool = False) -> List[str]:
@@ -31,6 +31,3 @@ index 3f584ef55..4210f62b6 100644
      ]
      if cuda and IS_HIP_EXTENSION:
          paths.append(os.path.join(lib_include, 'THH'))
--- 
-2.48.1
-
diff --git a/recipe/patches/0017-Add-conda-prefix-to-inductor-include-paths.patch b/recipe/patches/0017-Add-conda-prefix-to-inductor-include-paths.patch
new file mode 100644
index 00000000..9c108494
--- /dev/null
+++ b/recipe/patches/0017-Add-conda-prefix-to-inductor-include-paths.patch
@@ -0,0 +1,27 @@
+From 9f73a02bacf9680833ac64657fde6762d33ab200 Mon Sep 17 00:00:00 2001
+From: Daniel Petry 
+Date: Tue, 21 Jan 2025 17:45:23 -0600
+Subject: [PATCH 17/17] Add conda prefix to inductor include paths
+
+Currently inductor doesn't look in conda's includes and libs. This results in
+errors when it tries to compile, if system versions are being used of
+dependencies (e.g., sleef).
+
+Note that this is for inductor's JIT mode, not its AOT mode, for which the
+end user provides a _compile_flags.json file.
+---
+ torch/_inductor/cpp_builder.py | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/torch/_inductor/cpp_builder.py b/torch/_inductor/cpp_builder.py
+index 860e7fb062f..76c61375d91 100644
+--- a/torch/_inductor/cpp_builder.py
++++ b/torch/_inductor/cpp_builder.py
+@@ -1048,6 +1048,7 @@ def get_cpp_torch_options(
+         + python_include_dirs
+         + torch_include_dirs
+         + omp_include_dir_paths
++        + [os.getenv('CONDA_PREFIX') + '/include']
+     )
+     cflags = sys_libs_cflags + omp_cflags
+     ldflags = omp_ldflags