Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
42c00f3
ready for mkl rebuild
JamesRobertsonGames Aug 18, 2025
59ec548
ensure mkl version defined locally
JamesRobertsonGames Aug 22, 2025
485c666
stdlibc reintroduced
JamesRobertsonGames Aug 28, 2025
b8ecc9f
remove stdlibc in case it's causing false errors
JamesRobertsonGames Sep 2, 2025
05bc188
zip key fix
JamesRobertsonGames Sep 3, 2025
269a447
add lib overlinking issues
JamesRobertsonGames Sep 3, 2025
2c2c524
libraries that reference the issue overlinks skipped rather than the …
JamesRobertsonGames Sep 3, 2025
468f380
more overlinking avoids
JamesRobertsonGames Sep 3, 2025
10fdc92
remove cuda for time being
JamesRobertsonGames Sep 3, 2025
237986a
ci: trigger build
JamesRobertsonGames Sep 15, 2025
7c9b31c
remove megabuild
JamesRobertsonGames Sep 23, 2025
6f34be3
ensure most uptodate openblas-devel to test glibc 2.28 compatibility
JamesRobertsonGames Sep 24, 2025
734c8bf
ensure stdlib{c}
JamesRobertsonGames Sep 24, 2025
d4ff94e
stdlib for libtorch
JamesRobertsonGames Sep 25, 2025
952939d
add mkl windows patch to rectify mathematical error on windows
JamesRobertsonGames Sep 26, 2025
76090e1
ensure changes from 2.7 ported over
JamesRobertsonGames Oct 24, 2025
a45210a
submodule patch
JamesRobertsonGames Oct 24, 2025
c2ad5e7
psimd patch regen
JamesRobertsonGames Oct 24, 2025
65c4073
remove a/ b/
JamesRobertsonGames Oct 24, 2025
06fa948
patch cmake min version
JamesRobertsonGames Oct 24, 2025
5f80b5d
ensure old version requirements are explicitly passing with cmake args
JamesRobertsonGames Oct 24, 2025
ba34b1d
cmake for windows min version fix
JamesRobertsonGames Oct 24, 2025
4011f9e
ensure cxxflags
JamesRobertsonGames Oct 24, 2025
153aa0b
ensure no ittapi
JamesRobertsonGames Oct 24, 2025
8e2f8e0
remove xnnpack for mac
JamesRobertsonGames Oct 24, 2025
99fed25
libcxx 17 for additional osx requirement
JamesRobertsonGames Oct 28, 2025
b7228ef
reduce cores
JamesRobertsonGames Oct 28, 2025
d90d4fe
bld xnnpack removed on win
JamesRobertsonGames Oct 28, 2025
e12cbe9
force old cmake
JamesRobertsonGames Oct 28, 2025
bce6b64
cmake pin to previous 2.6 build
JamesRobertsonGames Oct 28, 2025
49aacb3
remove duplicate patch and submodule patches
JamesRobertsonGames Oct 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions abs.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
build_env_vars:
ANACONDA_ROCKET_ENABLE_PY313 : yes

# macOS 12.3 or above is required for running the GPU variant (MPS support). No way to specify this for only the GPU
# variant, so it's specified for both.
extra_labels_for_os:
Expand Down
6 changes: 5 additions & 1 deletion recipe/bld.bat
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ if "%pytorch_variant%" == "gpu" (
:: cudatoolkit different than the one specified at compile time.
:: https://github.com/conda-forge/pytorch-cpu-feedstock/issues/135
set "USE_KINETO=OFF"
:: ITT fails on submodules due to a stricter cmake policy version requirement
set "USE_ITT=0"

:: =============================== CUDA FLAGS> ======================================
if "%build_with_cuda%" == "" goto cuda_flags_end
Expand All @@ -51,13 +53,15 @@ set DISTUTILS_USE_SDK=1
set BUILD_TEST=0
set INSTALL_TEST=0
:: Don't increase MAX_JOBS to NUMBER_OF_PROCESSORS, as it will run out of heap
set CPU_COUNT=1
set CPU_COUNT=2
set MAX_JOBS=%CPU_COUNT%
:: Use our Pybind11, Eigen
set USE_SYSTEM_PYBIND11=1
set USE_SYSTEM_EIGEN_INSTALL=1

set CMAKE_INCLUDE_PATH=%LIBRARY_PREFIX%\include
set "CMAKE_ARGS=%CMAKE_ARGS% -DCMAKE_POLICY_VERSION_MINIMUM=3.5"

set LIB=%LIBRARY_PREFIX%\lib;%LIB%

:: =============================== CUDA> ======================================
Expand Down
8 changes: 8 additions & 0 deletions recipe/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ export Python3_EXECUTABLE="${PYTHON}"
# export CCACHE_BASEDIR=${PREFIX}/../
# export CCACHE_NOHASHDIR=true

# Tell CMake to treat all old version requirements as 3.5+
export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_POLICY_VERSION_MINIMUM=3.5"

for ARG in $CMAKE_ARGS; do
if [[ "$ARG" == "-DCMAKE_"* ]]; then
cmake_arg=$(echo $ARG | cut -d= -f1)
Expand Down Expand Up @@ -183,13 +186,18 @@ fi

# MacOS build is simple, and will not be for CUDA
if [[ "$OSTYPE" == "darwin"* ]]; then
# XNNPACK causing issues at build time on osx with libcxx 17
export USE_XNNPACK=0
# Produce macOS builds with torch.distributed support.
# This is enabled by default on Linux, but disabled by default on macOS,
# because it requires an non-bundled compile-time dependency (libuv
# through gloo). This dependency is made available through meta.yaml, so
# we can override the default and set USE_DISTRIBUTED=1.
export USE_DISTRIBUTED=1

# c++ includes are not found in the build prefix by default on osx
export CXXFLAGS="$CXXFLAGS -I${BUILD_PREFIX}/include/c++/v1"

if [[ "$target_platform" == "osx-arm64" ]]; then
# MKLDNN did not support on Apple M1 at the time support Apple M1
# was added. Revisit later
Expand Down
11 changes: 6 additions & 5 deletions recipe/conda_build_config.yaml
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
gpu_variant:
- cpu
- metal # [(osx and arm64)]
- cuda-12 # [(linux and x86_64)]
# - cuda-12 # [(linux and x86_64)]
c_compiler_version: # [osx]
- 17 # [osx]
cxx_compiler_version: # [osx]
- 17 # [osx]

# CONDA_BUILD_SYSROOT is defined in the base cbc.yaml, but it's reflected here so we can zip the keys and
# build GPU and CPU at the same time for osx-arm64. It'll need to be manually updated here if the base cbc is changed.
# This could be done using extend_keys instead, with a change to the base cbc.yaml.
# However there's currently a conda-forge bug that prevents this: https://github.com/conda/conda-build/issues/5048
MACOSX_SDK_VERSION: # [(osx and arm64)]
- 11.1 # [(osx and arm64)]
#- 13.3 # [(osx and arm64)]
- 13.3 # [(osx and arm64)]
CONDA_BUILD_SYSROOT: # [(osx and arm64)]
- /Library/Developer/CommandLineTools/SDKs/MacOSX11.1.sdk # [(osx and arm64)]
#- /Library/Developer/CommandLineTools/SDKs/MacOSX13.3.sdk # [(osx and arm64)]
- /Library/Developer/CommandLineTools/SDKs/MacOSX13.3.sdk # [(osx and arm64)]
zip_keys: # [(osx and arm64)]
- gpu_variant # [(osx and arm64)]
- MACOSX_SDK_VERSION # [(osx and arm64)]
Expand All @@ -27,8 +28,8 @@ zip_keys: # [(osx and arm64)]
# Conda-forge didn't do a "megabuild" on osx because it pushed their CI runners over their 6-hour limit. We don't have
# such a limit.
megabuild:
- true
#- false # [osx]
#- true
- false

# The version of python to use when building libtorch in a "megabuild"
megabuild_python:
Expand Down
42 changes: 33 additions & 9 deletions recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
{% set sha256 = "3005690eb7b083c443a38c7657938af63902f524ad87a6c83f1aca38c77e3b57" %}
# Set the RC number to build release candidates. Set to None otherwise
{% set rc = None %}
{% set build = 6 %}
{% set build = 7 %}

# Keep this in sync with the release
{% set smoke_test_commit = "1eba9b3aa3c43f86f4a2c807ac8e12c4a7767340" %}
Expand Down Expand Up @@ -51,9 +51,12 @@ source:
- patches/0010-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
- patches/0011-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch # [win]
- patches_submodules/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch # [win]
# - patches_submodules/0002-psimd-cmake.patch
# - patches_submodules/0003-fp16-cmake.patch
- patches/0013-simplify-torch.utils.cpp_extension.include_paths-use.patch
- patches/0014-point-include-paths-to-PREFIX-include.patch
- patches/0015-point-lib-paths-to-PREFIX-lib.patch
- patches/0016-fix-issue-142484.patch # [blas_impl == "mkl" and win]
{% endif %}
- url: https://raw.githubusercontent.com/pytorch/pytorch/{{ smoke_test_commit }}/.ci/pytorch/smoke_test/smoke_test.py
folder: smoke_test
Expand Down Expand Up @@ -87,7 +90,7 @@ requirements:
- cross-python_{{ target_platform }} # [build_platform != target_platform]
- numpy * # [megabuild and build_platform != target_platform]
- numpy # [not megabuild and build_platform != target_platform]
#- {{ stdlib('c') }}
- {{ stdlib('c') }}
- {{ compiler('c') }}
- {{ compiler('cxx') }}
- {{ compiler('cuda') }} # [(gpu_variant or "").startswith("cuda")]
Expand All @@ -114,8 +117,10 @@ requirements:
# This has a strong run_export so we don't need to put it in `host` or `run`
# We use llvm-openmp for openblas variants on osx.
- llvm-openmp 17 # [osx and not (blas_impl == "mkl")]
- libcxx 17 # [osx]
- libuv # [win]
- cmake
- cmake # [not win]
- cmake 3.31.2 # [win]
- ninja-base
- libabseil
# Keep libprotobuf here so that a compatibile version
Expand Down Expand Up @@ -159,7 +164,7 @@ requirements:
- future # [py<313]
- six
- mkl-devel {{ mkl }} # [blas_impl == "mkl"]
- openblas-devel {{ openblas }} # [blas_impl == "openblas"]
- openblas-devel {{ openblas }} # [blas_impl == "openblas"]
# - libcblas * *_mkl # [blas_impl == "mkl"]
# - libcblas # [blas_impl != "mkl"]
# - liblapack # [blas_impl != "mkl"]
Expand Down Expand Up @@ -197,7 +202,8 @@ test:
# for CMake config to find cuda & nvrtc
- {{ compiler('cuda') }} # [(gpu_variant or "").startswith("cuda")]
- cuda-nvrtc-dev # [(gpu_variant or "").startswith("cuda")]
- cmake
- cmake # [not win]
- cmake 3.31.2 # [win]
- ninja
- pkg-config
files:
Expand All @@ -211,14 +217,20 @@ test:
{% for each_lib in ['libc10_cuda', 'libcaffe2_nvrtc', 'libtorch_cuda', 'libtorch_cuda_linalg'] %}
- test -f $PREFIX/lib/{{ each_lib }}.so # [linux and (gpu_variant or "").startswith("cuda")]
{% endfor %}
# test integrity of CMake metadata
# test integrity of CMake metadata
- cd cmake_test
- cmake -GNinja -DCMAKE_CXX_STANDARD=17 $CMAKE_ARGS . # [unix]
- cmake -GNinja -DCMAKE_CXX_STANDARD=17 %CMAKE_ARGS% . # [win]

outputs:
- name: libtorch
build:
overlinking_ignore_patterns: # [linux and aarch64]
- lib/libc10.so # [linux and aarch64]
- lib/libtorch_cpu.so # [linux and aarch64]
- lib/libshm.so # [linux and aarch64]
- lib/libtorch.so # [linux and aarch64]
- bin/torch_shm_manager # [linux and aarch64]
missing_dso_whitelist:
# The are dynamically loaded from %SP_DIR%\torch\lib\
- "**/asmjit.dll" # [win]
Expand Down Expand Up @@ -261,7 +273,7 @@ outputs:
- python # [build_platform != target_platform]
- cross-python_{{ target_platform }} # [build_platform != target_platform]
- numpy # [build_platform != target_platform]
#- {{ stdlib('c') }}
- {{ stdlib('c') }}
- {{ compiler('c') }}
- {{ compiler('cxx') }}
- {{ compiler('cuda') }} # [(gpu_variant or "").startswith("cuda")]
Expand All @@ -288,7 +300,9 @@ outputs:
# This has a strong run_export so we don't need to put it in `host` or `run`
# We use llvm-openmp for openblas variants on osx.
- llvm-openmp 17 # [osx and not (blas_impl == "mkl")]
- cmake
- libcxx 17 # [osx]
- cmake # [not win]
- cmake 3.31.2 # [win]
- ninja-base
# Keep libprotobuf here so that a compatibile version
# of protobuf is installed between build and host
Expand Down Expand Up @@ -338,6 +352,7 @@ outputs:
# For openblas on win and linux, we don't specify any openmp implementation; it comes from the compiler.
- intel-openmp {{ mkl }} # [blas_impl == "mkl"]
- llvm-openmp 17 # [osx and not (blas_impl == "mkl")]
- libcxx 17 # [osx]
- libabseil
- libprotobuf {{ libprotobuf }}
- sleef 3.5.1
Expand All @@ -353,6 +368,7 @@ outputs:
run:
- {{ pin_compatible('intel-openmp') }} # [blas_impl == "mkl"]
- llvm-openmp # [osx and not (blas_impl == "mkl")]
- libcxx 17 # [osx]
# GPU requirements without run_exports
- {{ pin_compatible('cudnn') }} # [(gpu_variant or "").startswith("cuda")]
# Required for GPU profiler
Expand Down Expand Up @@ -410,6 +426,7 @@ outputs:
- pybind11
# the inductor "test_aoti_eager..." tests require objcopy
- binutils # [linux]
- libcxx 17 # [osx]
imports:
- torch
source_files:
Expand Down Expand Up @@ -452,7 +469,14 @@ outputs:
# Note that the `|| true` expression will make the build continue even if the whole script falls over completely
# (for example, in the case of missing imports). There doesn't seem to be a way of making a script exception return
# non-zero but failing tests return zero.
- python ./test/run_test.py --core --continue-through-error || true
# ------------------------------------------------------------------------------------------------
# Exclude complex tests that are known to be flaky for -k "not (complex and (linalg_vecdot or dot or vdot))"
# https://github.com/pytorch/pytorch/issues/150918
- python ./test/run_test.py --core --continue-through-error -k "not (complex and (linalg_vecdot or dot or vdot))" || true # [not win]
# lgamma or mvlgamma or multigammaln or gammaln all have these issues on a combination of Intel Xeon processors and Windows Server differences.
# enabling these tests on windows will cause numerical differences in the test suite.
# This is a non-deterministic issue where between 80-110 tests fail. This has been observed between Pytorch 2.5 and above.
- python ./test/run_test.py --core --continue-through-error -k "not ((complex and (linalg_vecdot or dot or vdot)) or lgamma or mvlgamma or multigammaln or gammaln)" || exit 0 # [win]
# The inductor tests test the torch.compile backend. Using the options below avoids running distributed tests,
# which would be run if we used the --inductor option. (Distributed tests would only be correctly run on a multi-gpu test platform,
# which we don't have.)
Expand Down
35 changes: 35 additions & 0 deletions recipe/patches/0016-fix-issue-142484.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
From 714ead5bf5c7e7ac0f91934232af2e1966b562fb Mon Sep 17 00:00:00 2001
From: "Zheng, Zhaoqiong" <[email protected]>
Date: Fri, 27 Dec 2024 13:49:36 +0800
Subject: [PATCH] fix issue 142484

From https://github.com/pytorch/pytorch/pull/143894
---
aten/src/ATen/native/mkl/SpectralOps.cpp | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/aten/src/ATen/native/mkl/SpectralOps.cpp b/aten/src/ATen/native/mkl/SpectralOps.cpp
index e26cfbf6d..c61b76d32 100644
--- a/aten/src/ATen/native/mkl/SpectralOps.cpp
+++ b/aten/src/ATen/native/mkl/SpectralOps.cpp
@@ -477,7 +477,17 @@ static Tensor& _exec_fft(Tensor& out, const Tensor& self, IntArrayRef out_sizes,

const auto value_type = c10::toRealValueType(input.scalar_type());
out.resize_(batched_out_sizes, MemoryFormat::Contiguous);
-
+ auto astrides = input.strides();
+ bool all_zero = true;
+ for (const auto& stride : astrides) {
+ if (stride != 0) {
+ all_zero = false;
+ break;
+ }
+ }
+ if (all_zero) {
+ input = input.clone(MemoryFormat::Contiguous);
+ }
auto descriptor = _plan_mkl_fft(
input.strides(), out.strides(), signal_size, input.is_complex(),
out.is_complex(), normalization, forward, value_type);
--
2.47.1 no