diff --git a/README.md b/README.md index 22e879dc..4ce0f750 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,14 @@ Summary: PyTorch is an optimized tensor library for deep learning using GPUs and Development: https://github.com/pytorch/pytorch +Documentation: https://pytorch.org/docs/ + +PyTorch is a Python package that provides two high-level features: + - Tensor computation (like NumPy) with strong GPU acceleration + - Deep neural networks built on a tape-based autograd system +You can reuse your favorite Python packages such as NumPy, SciPy, and Cython to extend PyTorch when needed. + + Current build status ==================== diff --git a/recipe/bld.bat b/recipe/bld.bat index e4d0bae5..5f6f57c6 100644 --- a/recipe/bld.bat +++ b/recipe/bld.bat @@ -7,6 +7,12 @@ if EXIST pyproject.toml ( if %ERRORLEVEL% neq 0 exit 1 ) +@REM The PyTorch test suite includes some symlinks, which aren't resolved on Windows, leading to packaging errors. +@REM ATTN! These change and have to be updated manually, often with each release. +@REM (no current symlinks being packaged. Leaving this information here as it took some months to find the issue. Look out +@REM for a failure with error message: "conda_package_handling.exceptions.ArchiveCreationError: Cannot stat +@REM while writing file") + set PYTORCH_BUILD_VERSION=%PKG_VERSION% @REM Always pass 0 to avoid appending ".post" to version string. @REM https://github.com/conda-forge/pytorch-cpu-feedstock/issues/315 @@ -97,6 +103,10 @@ if not "%cuda_compiler_version%" == "None" ( set DISTUTILS_USE_SDK=1 +@REM Use our Pybind11, Eigen +set USE_SYSTEM_PYBIND11=1 +set USE_SYSTEM_EIGEN_INSTALL=1 + set CMAKE_INCLUDE_PATH=%LIBRARY_PREFIX%\include set LIB=%LIBRARY_PREFIX%\lib;%LIB% @@ -128,7 +138,7 @@ set "USE_LITE_PROTO=ON" set "USE_OPENMP=OFF" @REM The activation script for cuda-nvcc doesnt add the CUDA_CFLAGS on windows. -@REM Therefor we do this manually here. See: +@REM Therefore we do this manually here. See: @REM https://github.com/conda-forge/cuda-nvcc-feedstock/issues/47 echo "CUDA_CFLAGS=%CUDA_CFLAGS%" set "CUDA_CFLAGS=-I%PREFIX%/Library/include -I%BUILD_PREFIX%/Library/include" @@ -183,19 +193,12 @@ if "%PKG_NAME%" == "libtorch" ( pushd torch-%PKG_VERSION% if %ERRORLEVEL% neq 0 exit 1 - @REM Do not package `fmt.lib` (and its metadata); delete it before the move into - @REM %LIBRARY_BIN% because it may exist in host before installation already - del torch\lib\fmt.lib torch\lib\pkgconfig\fmt.pc - if %ERRORLEVEL% neq 0 exit 1 - @REM also delete rest of fmt metadata - rmdir /s /q torch\lib\cmake\fmt - @REM Move the binaries into the packages site-package directory @REM the only content of torch\bin, {asmjit,fbgemm}.dll, also exists in torch\lib - robocopy /NP /NFL /NDL /NJH /E torch\lib\ %LIBRARY_BIN%\ torch*.dll c10.dll shm.dll asmjit.dll fbgemm.dll + robocopy /NP /NFL /NDL /NJH /E torch\bin\ %LIBRARY_BIN%\ torch*.dll c10.dll shm.dll asmjit.dll fbgemm.dll robocopy /NP /NFL /NDL /NJH /E torch\lib\ %LIBRARY_LIB%\ torch*.lib c10.lib shm.lib asmjit.lib fbgemm.lib if not "%cuda_compiler_version%" == "None" ( - robocopy /NP /NFL /NDL /NJH /E torch\lib\ %LIBRARY_BIN%\ c10_cuda.dll caffe2_nvrtc.dll + robocopy /NP /NFL /NDL /NJH /E torch\bin\ %LIBRARY_BIN%\ c10_cuda.dll caffe2_nvrtc.dll robocopy /NP /NFL /NDL /NJH /E torch\lib\ %LIBRARY_LIB%\ c10_cuda.lib caffe2_nvrtc.lib ) robocopy /NP /NFL /NDL /NJH /E torch\share\ %LIBRARY_PREFIX%\share @@ -216,7 +219,7 @@ if "%PKG_NAME%" == "libtorch" ( if %ERRORLEVEL% neq 0 exit 1 ) else if "%PKG_NAME%" == "pytorch" ( @REM Move libtorch_python and remove the other directories afterwards. - robocopy /NP /NFL /NDL /NJH /E %SP_DIR%\torch\lib\ %LIBRARY_BIN%\ torch_python.dll + robocopy /NP /NFL /NDL /NJH /E %SP_DIR%\torch\bin\ %LIBRARY_BIN%\ torch_python.dll robocopy /NP /NFL /NDL /NJH /E %SP_DIR%\torch\lib\ %LIBRARY_LIB%\ torch_python.lib robocopy /NP /NFL /NDL /NJH /E %SP_DIR%\torch\lib\ %LIBRARY_LIB%\ _C.lib rmdir /s /q %SP_DIR%\torch\lib diff --git a/recipe/build.sh b/recipe/build.sh index 57044b09..648763a1 100644 --- a/recipe/build.sh +++ b/recipe/build.sh @@ -1,9 +1,11 @@ #!/bin/bash -echo "=== Building ${PKG_NAME} (py: ${PY_VER}) ===" - set -ex +echo "#########################################################################" +echo "Building ${PKG_NAME} (py: ${PY_VER}) using BLAS implementation $blas_impl" +echo "#########################################################################" + # This is used to detect if it's in the process of building pytorch export IN_PYTORCH_BUILD=1 @@ -20,9 +22,22 @@ rm -rf pyproject.toml export USE_CUFILE=0 export USE_NUMA=0 export USE_ITT=0 + +#################### ADJUST COMPILER AND LINKER FLAGS ##################### +# Pytorch's build system doesn't like us setting the c++ standard through CMAKE_CXX_FLAGS +# and will issue a warning. We need to use at least C++17 to match the abseil ABI, see +# https://github.com/conda-forge/abseil-cpp-feedstock/issues/45, which pytorch 2.5 uses already: +# https://github.com/pytorch/pytorch/blob/v2.5.1/CMakeLists.txt#L36-L48 +export CXXFLAGS="$(echo $CXXFLAGS | sed 's/-std=c++[0-9][0-9]//g')" +# The below three lines expose symbols that would otherwise be hidden or +# optimised away. They were here before, so removing them would potentially +# break users' programs export CFLAGS="$(echo $CFLAGS | sed 's/-fvisibility-inlines-hidden//g')" export CXXFLAGS="$(echo $CXXFLAGS | sed 's/-fvisibility-inlines-hidden//g')" export LDFLAGS="$(echo $LDFLAGS | sed 's/-Wl,--as-needed//g')" +# The default conda LDFLAGs include -Wl,-dead_strip_dylibs, which removes all the +# MKL sequential, core, etc. libraries, resulting in a "Symbol not found: _mkl_blas_caxpy" +# error on osx-64. export LDFLAGS="$(echo $LDFLAGS | sed 's/-Wl,-dead_strip_dylibs//g')" export LDFLAGS_LD="$(echo $LDFLAGS_LD | sed 's/-dead_strip_dylibs//g')" if [[ "$c_compiler" == "clang" ]]; then @@ -45,6 +60,7 @@ fi # can be imported on system without a GPU LDFLAGS="${LDFLAGS//-Wl,-z,now/-Wl,-z,lazy}" +################ CONFIGURE CMAKE FOR CONDA ENVIRONMENT ################### export CMAKE_GENERATOR=Ninja export CMAKE_LIBRARY_PATH=$PREFIX/lib:$PREFIX/include:$CMAKE_LIBRARY_PATH export CMAKE_PREFIX_PATH=$PREFIX @@ -73,6 +89,8 @@ export USE_SYSTEM_SLEEF=1 # use our protobuf export BUILD_CUSTOM_PROTOBUF=OFF rm -rf $PREFIX/bin/protoc +export USE_SYSTEM_PYBIND11=1 +export USE_SYSTEM_EIGEN_INSTALL=1 # prevent six from being downloaded > third_party/NNPACK/cmake/DownloadSix.cmake @@ -98,18 +116,29 @@ if [[ "${CI}" == "github_actions" ]]; then # reduce parallelism to avoid getting OOM-killed on # cirun-openstack-gpu-2xlarge, which has 32GB RAM, 8 CPUs export MAX_JOBS=4 -else +elif [[ "${CI}" == "azure" ]]; then export MAX_JOBS=${CPU_COUNT} -fi - -if [[ "$blas_impl" == "generic" ]]; then - # Fake openblas - export BLAS=OpenBLAS - export OpenBLAS_HOME=${PREFIX} else - export BLAS=MKL + # Leave a spare core for other tasks, per common practice. + # Reducing further can help with out-of-memory errors. + export MAX_JOBS=$((CPU_COUNT > 1 ? CPU_COUNT - 1 : 1)) fi +case "$blas_impl" in + "generic") + # Fake openblas + export BLAS=OpenBLAS + export OpenBLAS_HOME=${PREFIX} + ;; + "mkl") + export BLAS=MKL + ;; + *) + echo "[ERROR] Unsupported BLAS implementation '${blas_impl}'" >&2 + exit 1 + ;; +esac + if [[ "$PKG_NAME" == "pytorch" ]]; then # Trick Cmake into thinking python hasn't changed sed "s/3\.12/$PY_VER/g" build/CMakeCache.txt.orig > build/CMakeCache.txt @@ -163,12 +192,24 @@ elif [[ ${cuda_compiler_version} != "None" ]]; then echo "unknown CUDA arch, edit build.sh" exit 1 esac + + # Compatibility matrix for update: https://en.wikipedia.org/wiki/CUDA#GPUs_supported + # Warning from pytorch v1.12.1: In the future we will require one to + # explicitly pass TORCH_CUDA_ARCH_LIST to cmake instead of implicitly + # setting it as an env variable. + # Doing this is nontrivial given that we're using setup.py as an entry point, but should + # be addressed to pre-empt upstream changing it, as it probably won't result in a failed + # configuration. + # + # See: + # https://pytorch.org/docs/stable/cpp_extension.html (Compute capabilities) + # https://github.com/pytorch/pytorch/blob/main/.ci/manywheel/build_cuda.sh case ${cuda_compiler_version} in - 12.6) + 12.[0-6]) export TORCH_CUDA_ARCH_LIST="5.0;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0+PTX" ;; *) - echo "unsupported cuda version. edit build.sh" + echo "No CUDA architecture list exists for CUDA v${cuda_compiler_version}. See build.sh for information on adding one." exit 1 esac export TORCH_NVCC_FLAGS="-Xfatbin -compress-all" @@ -203,7 +244,8 @@ case ${PKG_NAME} in mv build/lib.*/torch/bin/* ${PREFIX}/bin/ mv build/lib.*/torch/lib/* ${PREFIX}/lib/ - mv build/lib.*/torch/share/* ${PREFIX}/share/ + # need to merge these now because we're using system pybind11, meaning the destination directory is not empty + rsync -a build/lib.*/torch/share/* ${PREFIX}/share/ mv build/lib.*/torch/include/{ATen,caffe2,tensorpipe,torch,c10} ${PREFIX}/include/ rm ${PREFIX}/lib/libtorch_python.* @@ -211,7 +253,7 @@ case ${PKG_NAME} in cp build/CMakeCache.txt build/CMakeCache.txt.orig ;; pytorch) - $PREFIX/bin/python -m pip install . --no-deps -vvv --no-clean \ + $PREFIX/bin/python -m pip install . --no-deps --no-build-isolation -vvv --no-clean \ | sed "s,${CXX},\$\{CXX\},g" \ | sed "s,${PREFIX},\$\{PREFIX\},g" # Keep this in ${PREFIX}/lib so that the library can be found by diff --git a/recipe/cmake_test/CMakeLists.txt b/recipe/cmake_test/CMakeLists.txt new file mode 100644 index 00000000..71684544 --- /dev/null +++ b/recipe/cmake_test/CMakeLists.txt @@ -0,0 +1,4 @@ +project(cf_dummy LANGUAGES C CXX) +cmake_minimum_required(VERSION 3.12) +find_package(Torch CONFIG REQUIRED) +find_package(ATen CONFIG REQUIRED) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index d5fc48f5..81df0c73 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -1,7 +1,10 @@ # if you wish to build release candidate number X, append the version string with ".rcX" {% set version = "2.5.1" %} -{% set build = 10 %} +{% set build = 11 %} +# Use a higher build number for the CUDA variant, to ensure that it's +# preferred by conda's solver, and it's preferentially +# installed where the platform supports it. {% if cuda_compiler_version != "None" %} {% set build = build + 200 %} {% endif %} @@ -64,6 +67,12 @@ source: - patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch # point to headers that are now living in $PREFIX/include instead of $SP_DIR/torch/include - patches/0016-point-include-paths-to-PREFIX-include.patch + - patches/0017-Add-conda-prefix-to-inductor-include-paths.patch + - patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch + - patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch # [win] + # backport https://github.com/pytorch/pytorch/pull/138579.patch + - patches/0020-inductor-Enable-cpp-wrapper-for-test_torchinductor-1.patch + - patches_submodules/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch # [win] build: number: {{ build }} @@ -117,6 +126,7 @@ requirements: - protobuf - make # [linux] - sccache # [win] + - rsync # [unix] host: # GPU requirements - cudnn # [cuda_compiler_version != "None"] @@ -167,6 +177,8 @@ requirements: - libuv - pkg-config # [unix] - typing_extensions + - pybind11 + - eigen run: # GPU requirements without run_exports - {{ pin_compatible('cudnn') }} # [cuda_compiler_version != "None"] @@ -192,6 +204,16 @@ requirements: # a particularity of conda-build, that output is defined in # the global build stage, including tests test: + requires: + # cmake needs a compiler to run package detection, see + # https://discourse.cmake.org/t/questions-about-find-package-cli-msvc/6194 + - {{ compiler('cxx') }} + - {{ compiler('cuda') }} # [cuda_compiler_version != "None"] + - cmake + - ninja + - pkg-config + files: + - cmake_test/ commands: # libraries; peculiar formatting to avoid linter false positives about selectors {% set torch_libs = [ @@ -217,6 +239,11 @@ test: - test -f $PREFIX/share/cmake/Torch/TorchConfig.cmake # [linux] - if not exist %LIBRARY_PREFIX%\share\cmake\Torch\TorchConfig.cmake exit 1 # [win] + # test integrity of CMake metadata + - cd cmake_test + - cmake -GNinja -DCMAKE_CXX_STANDARD=17 $CMAKE_ARGS . # [unix] + - cmake -GNinja -DCMAKE_CXX_STANDARD=17 %CMAKE_ARGS% . # [win] + outputs: - name: libtorch - name: pytorch @@ -299,6 +326,8 @@ outputs: - pkg-config # [unix] - typing_extensions - {{ pin_subpackage('libtorch', exact=True) }} + - pybind11 + - eigen run: - llvm-openmp # [osx] - intel-openmp {{ mkl }} # [win] @@ -314,6 +343,7 @@ outputs: - filelock - jinja2 - networkx + - pybind11 - nomkl # [blas_impl != "mkl"] - fsspec # avoid that people without GPUs needlessly download ~0.5-1GB @@ -360,6 +390,7 @@ outputs: # tools/ is needed to optimise test run # as of pytorch=2.0.0, there is a bug when trying to run tests without the tools - tools + #- .ci/pytorch/smoke_test/smoke_test.py commands: # Run pip check so as to ensure that all pytorch packages are installed # https://github.com/conda-forge/pytorch-cpu-feedstock/issues/24 @@ -367,6 +398,15 @@ outputs: - python -c "import torch; print(torch.__version__)" - python -c "import torch; assert torch.backends.mkldnn.m.is_available()" # [x86 and cuda_compiler_version == "None"] - python -c "import torch; torch.tensor(1).to('cpu').numpy(); print('numpy support enabled!!!')" + # We have had issues with openmp .dylibs being doubly loaded in certain cases. These two tests catch the (observed) issue + - python -c "import torch; import numpy" + - python -c "import numpy; import torch" + # distributed support is enabled by default on linux; for mac, we enable it manually in build.sh + - python -c "import torch; assert torch.distributed.is_available()" # [linux or osx] + - python -c "import torch; assert torch.backends.cuda.is_built()" # [linux64 and (cuda_compiler_version != "None")] + - python -c "import torch; assert torch.backends.cudnn.is_available()" # [linux64 and (cuda_compiler_version != "None")] + - python -c "import torch; assert torch.cuda.is_available()" # [linux64 and (cuda_compiler_version != "None")] + - python -c "import torch; assert torch.backends.cudnn.enabled" # [linux64 and (cuda_compiler_version != "None")] # At conda-forge, we target versions of OSX that are too old for MPS support # But if users install a newer version of OSX, they will have MPS support # https://github.com/conda-forge/pytorch-cpu-feedstock/pull/123#issuecomment-1186355073 @@ -377,8 +417,32 @@ outputs: - if not exist %LIBRARY_BIN%\torch_python.dll exit 1 # [win] - if not exist %LIBRARY_LIB%\torch_python.lib exit 1 # [win] + # See here for environment variables needed by the smoke test script + # https://github.com/pytorch/pytorch/blob/266fd35c5842902f6304aa8e7713b252cbfb243c/.ci/pytorch/smoke_test/smoke_test.py#L16 + - set MATRIX_GPU_ARCH_VERSION="{{ '.'.join((cuda_compiler_version or "").split('.')[:2]) }}" # [(cuda_compiler_version != "None") and (win)] + - set MATRIX_GPU_ARCH_TYPE="cuda" # [(cuda_compiler_version != "None") and (win)] + - set MATRIX_GPU_ARCH_VERSION="none" # [(cuda_compiler_version == "None") and (win)] + - set MATRIX_GPU_ARCH_TYPE="none" # [(cuda_compiler_version == "None") and (win)] + - set MATRIX_CHANNEL="defaults" # [win] + - set MATRIX_STABLE_VERSION={{ version }} # [win] + - set MATRIX_PACKAGE_TYPE="conda" # [win] + - set TARGET_OS="windows" # [win] + - set OMP_NUM_THREADS=4 # [win] + - export MATRIX_GPU_ARCH_VERSION="{{ '.'.join((cuda_compiler_version or "").split('.')[:2]) }}" # [(cuda_compiler_version != "None") and (linux and x86_64)] + - export MATRIX_GPU_ARCH_TYPE="cuda" # [(cuda_compiler_version != "None") and (linux and x86_64)] + - export MATRIX_GPU_ARCH_VERSION="none" # [(cuda_compiler_version == "None") and (not win)] + - export MATRIX_GPU_ARCH_TYPE="none" # [(cuda_compiler_version == "None") and (not win)] + - export MATRIX_CHANNEL="defaults" # [not win] + - export MATRIX_STABLE_VERSION="{{ version }}" # [not win] + - export MATRIX_PACKAGE_TYPE="conda" # [not win] + - export TARGET_OS="linux" # [linux] + - export TARGET_OS="macos-arm64" # [(osx and arm64)] + - export TARGET_OS="macos-x86_64" # [(osx and x86_64)] + - export OMP_NUM_THREADS=4 # [not win] + #- python ./smoke_test/smoke_test.py --package torchonly + # a reasonably safe subset of tests that should run under 15 minutes - # disable hypothesis because it randomly yields health check errors + # The inductor tests test torch.compile {% set tests = " ".join([ "test/test_autograd.py", "test/test_autograd_fallback.py", @@ -389,8 +453,7 @@ outputs: "test/test_nn.py", "test/test_torch.py", "test/test_xnnpack_integration.py", - "-m \"not hypothesis\"", - ]) %} + ] + (cuda_compiler_version != "None") * ["test/inductor/test_torchinductor.py"]) %} {% set skips = "(TestTorch and test_print)" %} # tolerance violation with openblas @@ -416,6 +479,9 @@ outputs: {% set skips = skips ~ " or test_BCELoss_weights_no_reduce_cuda" %} # [unix and cuda_compiler_version != "None"] {% set skips = skips ~ " or test_ctc_loss_cudnn_tensor_cuda " %} # [unix and cuda_compiler_version != "None"] {% set skips = skips ~ " or (TestTorch and test_index_add_correctness)" %} # [unix and cuda_compiler_version != "None"] + # These tests require higher-resource or more recent GPUs than the CI provides + {% set skips = skips ~ " or (TritonCodeGenTests and test_sdpa_inference_mode_aot_compile)" %} # [unix and cuda_compiler_version != "None"] + {% set skips = skips ~ " or (TestNN and test_grid_sample)" %} # [unix and cuda_compiler_version != "None"] # MKL problems {% set skips = skips ~ " or (TestLinalgCPU and test_inverse_errors_large_cpu)" %} # [unix and blas_impl == "mkl" and cuda_compiler_version != "None"] # these tests are failing with low -n values @@ -438,8 +504,9 @@ outputs: # for potential packaging problems by running a fixed subset - export OMP_NUM_THREADS=4 # [unix] # reduced paralellism to avoid OOM; test only one python version on aarch because emulation is super-slow - - python -m pytest -n 2 {{ tests }} -k "not ({{ skips }})" --durations=50 # [unix and (not aarch64 or py==312)] - - python -m pytest -v -s {{ tests }} -k "not ({{ skips }})" --durations=50 # [win] + # disable hypothesis because it randomly yields health check errors + - python -m pytest -n 2 {{ tests }} -k "not ({{ skips }})" -m "not hypothesis" --durations=50 # [unix and (not aarch64 or py==312)] + - python -m pytest -v -s {{ tests }} -k "not ({{ skips }})" -m "not hypothesis" --durations=50 # [win] # regression test for https://github.com/conda-forge/pytorch-cpu-feedstock/issues/329, where we picked up # duplicate `.pyc` files due to newest py-ver (3.13) in the build environment not matching the one in host; @@ -479,8 +546,13 @@ about: license_file: - LICENSE - NOTICE - - third_party/pybind11/LICENSE summary: PyTorch is an optimized tensor library for deep learning using GPUs and CPUs. + description: | + PyTorch is a Python package that provides two high-level features: + - Tensor computation (like NumPy) with strong GPU acceleration + - Deep neural networks built on a tape-based autograd system + You can reuse your favorite Python packages such as NumPy, SciPy, and Cython to extend PyTorch when needed. + doc_url: https://pytorch.org/docs/ extra: recipe-maintainers: diff --git a/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch b/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch index 4ce6492a..b5519b81 100644 --- a/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch +++ b/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch @@ -1,14 +1,14 @@ -From 756045fca376345e48afb6a868b502dbfa0c584c Mon Sep 17 00:00:00 2001 +From f3a0f9aab6dce56eea590b946f60256014b61bf7 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Sun, 1 Sep 2024 17:35:40 -0400 -Subject: [PATCH 01/16] Force usage of python 3 and error without numpy +Subject: [PATCH 01/20] Force usage of python 3 and error without numpy --- cmake/Dependencies.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index e78305e0a..15c625486 100644 +index e78305e0a8e..15c62548601 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -861,9 +861,9 @@ if(BUILD_PYTHON) @@ -32,6 +32,3 @@ index e78305e0a..15c625486 100644 caffe2_update_option(USE_NUMPY OFF) else() caffe2_update_option(USE_NUMPY ON) --- -2.48.1 - diff --git a/recipe/patches/0002-Help-find-numpy.patch b/recipe/patches/0002-Help-find-numpy.patch index 6f3fa2c3..833af9f1 100644 --- a/recipe/patches/0002-Help-find-numpy.patch +++ b/recipe/patches/0002-Help-find-numpy.patch @@ -1,14 +1,14 @@ -From 70661ad52cb2f0290de3e0758f240560e4b1e769 Mon Sep 17 00:00:00 2001 +From 21c30036b5b86f403c0cf4426165d9a6a50edb1a Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Tue, 1 Oct 2024 00:28:40 -0400 -Subject: [PATCH 02/16] Help find numpy +Subject: [PATCH 02/20] Help find numpy --- tools/setup_helpers/cmake.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/setup_helpers/cmake.py b/tools/setup_helpers/cmake.py -index 4b605fe59..bde41323c 100644 +index 4b605fe5975..bde41323c76 100644 --- a/tools/setup_helpers/cmake.py +++ b/tools/setup_helpers/cmake.py @@ -305,9 +305,15 @@ class CMake: @@ -27,6 +27,3 @@ index 4b605fe59..bde41323c 100644 TORCH_BUILD_VERSION=version, **build_options, ) --- -2.48.1 - diff --git a/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch b/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch index af8662e4..a4c44e01 100644 --- a/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch +++ b/recipe/patches/0003-Add-USE_SYSTEM_NVTX-option-138287.patch @@ -1,7 +1,7 @@ -From 4ae61d17c81e9d66e091c2790ac6deae6bf31204 Mon Sep 17 00:00:00 2001 +From d1826af525db41eda5020a1404f5d5521d67a5dc Mon Sep 17 00:00:00 2001 From: Jeongseok Lee Date: Sat, 19 Oct 2024 04:26:01 +0000 -Subject: [PATCH 03/16] Add USE_SYSTEM_NVTX option (#138287) +Subject: [PATCH 03/20] Add USE_SYSTEM_NVTX option (#138287) ## Summary @@ -21,7 +21,7 @@ Approved by: https://github.com/albanD 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt -index 98593c2de..ae3c3f2cb 100644 +index 98593c2de97..ae3c3f2cbd5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -470,6 +470,7 @@ option(USE_SYSTEM_FXDIV "Use system-provided fxdiv." OFF) @@ -41,7 +41,7 @@ index 98593c2de..ae3c3f2cb 100644 # /Z7 override option When generating debug symbols, CMake default to use the diff --git a/cmake/public/cuda.cmake b/cmake/public/cuda.cmake -index afc1bc12a..152fbdbe6 100644 +index afc1bc12abf..152fbdbe6dd 100644 --- a/cmake/public/cuda.cmake +++ b/cmake/public/cuda.cmake @@ -170,7 +170,11 @@ else() @@ -58,7 +58,7 @@ index afc1bc12a..152fbdbe6 100644 if(nvtx3_FOUND) add_library(torch::nvtx3 INTERFACE IMPORTED) diff --git a/setup.py b/setup.py -index 2b0cfa99d..7174777ed 100644 +index 2b0cfa99d71..7174777ed4e 100644 --- a/setup.py +++ b/setup.py @@ -183,7 +183,21 @@ @@ -84,6 +84,3 @@ index 2b0cfa99d..7174777ed 100644 # # USE_MIMALLOC # Static link mimalloc into C10, and use mimalloc in alloc_cpu & alloc_free. --- -2.48.1 - diff --git a/recipe/patches/0004-Update-sympy-version.patch b/recipe/patches/0004-Update-sympy-version.patch index 5dd72f7c..81a66b3f 100644 --- a/recipe/patches/0004-Update-sympy-version.patch +++ b/recipe/patches/0004-Update-sympy-version.patch @@ -1,14 +1,14 @@ -From 2c6db02c01ad080c8dc8ae0b78be2b93099c2ac8 Mon Sep 17 00:00:00 2001 +From e3219c5fe8834753b0cf9e92be4d1ef1e874f370 Mon Sep 17 00:00:00 2001 From: Jeongseok Lee Date: Thu, 17 Oct 2024 15:04:05 -0700 -Subject: [PATCH 04/16] Update sympy version +Subject: [PATCH 04/20] Update sympy version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py -index 7174777ed..65be34e39 100644 +index 7174777ed4e..65be34e39b1 100644 --- a/setup.py +++ b/setup.py @@ -1158,7 +1158,7 @@ def main(): @@ -20,6 +20,3 @@ index 7174777ed..65be34e39 100644 "networkx", "jinja2", "fsspec", --- -2.48.1 - diff --git a/recipe/patches/0005-Fix-duplicate-linker-script.patch b/recipe/patches/0005-Fix-duplicate-linker-script.patch index 7cc82435..cb09dcdf 100644 --- a/recipe/patches/0005-Fix-duplicate-linker-script.patch +++ b/recipe/patches/0005-Fix-duplicate-linker-script.patch @@ -1,14 +1,14 @@ -From fa5bb8f1acd0195efadc35c8fbb9199be92932d9 Mon Sep 17 00:00:00 2001 +From 08a1f44fbc81324aa98d720dfb7b87a261923ac2 Mon Sep 17 00:00:00 2001 From: Jeongseok Lee Date: Sun, 3 Nov 2024 01:12:36 -0700 -Subject: [PATCH 05/16] Fix duplicate linker script +Subject: [PATCH 05/20] Fix duplicate linker script --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py -index 65be34e39..b0e01e0d1 100644 +index 65be34e39b1..b0e01e0d1ee 100644 --- a/setup.py +++ b/setup.py @@ -1184,7 +1184,9 @@ def main(): @@ -22,6 +22,3 @@ index 65be34e39..b0e01e0d1 100644 os.environ["CFLAGS"] = ( os.getenv("CFLAGS", "") + " -ffunction-sections -fdata-sections" ) --- -2.48.1 - diff --git a/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch b/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch index cddb8b68..326e6285 100644 --- a/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch +++ b/recipe/patches/0006-fix-3.13-pickle-error-in-serialization.py-136034.patch @@ -1,7 +1,7 @@ -From 6fc695312cd062e13c2482b52ae8d028bd7c043a Mon Sep 17 00:00:00 2001 +From 15df314a41c69a31c0443254d5552aa1b39d708d Mon Sep 17 00:00:00 2001 From: William Wen Date: Fri, 13 Sep 2024 13:02:33 -0700 -Subject: [PATCH 06/16] fix 3.13 pickle error in serialization.py (#136034) +Subject: [PATCH 06/20] fix 3.13 pickle error in serialization.py (#136034) Error encountered when adding dynamo 3.13 support. Pull Request resolved: https://github.com/pytorch/pytorch/pull/136034 @@ -11,7 +11,7 @@ Approved by: https://github.com/albanD 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/torch/serialization.py b/torch/serialization.py -index d936d31d6..d937680c0 100644 +index d936d31d6f5..d937680c031 100644 --- a/torch/serialization.py +++ b/torch/serialization.py @@ -1005,8 +1005,12 @@ def _legacy_save(obj, f, pickle_module, pickle_protocol) -> None: @@ -44,6 +44,3 @@ index d936d31d6..d937680c0 100644 pickler.dump(obj) data_value = data_buf.getvalue() zip_file.write_record("data.pkl", data_value, len(data_value)) --- -2.48.1 - diff --git a/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch b/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch index b847ba1a..ad215aa9 100644 --- a/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch +++ b/recipe/patches/0007-Allow-users-to-overwrite-ld-with-environment-variabl.patch @@ -1,7 +1,7 @@ -From d5c8df70422afa07dc212266d420f923f5887f99 Mon Sep 17 00:00:00 2001 +From 655f694854c3eafdd631235b60bc6c1b279218ed Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Thu, 3 Oct 2024 22:49:56 -0400 -Subject: [PATCH 07/16] Allow users to overwrite ld with environment variables +Subject: [PATCH 07/20] Allow users to overwrite ld with environment variables This should help in the case of cross compilation. @@ -11,7 +11,7 @@ xref: https://github.com/conda-forge/pytorch-cpu-feedstock/pull/261 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/setup_helpers/generate_linker_script.py b/tools/setup_helpers/generate_linker_script.py -index 11c397a9e..e66fc1970 100644 +index 11c397a9e5f..e66fc197062 100644 --- a/tools/setup_helpers/generate_linker_script.py +++ b/tools/setup_helpers/generate_linker_script.py @@ -1,3 +1,4 @@ @@ -30,6 +30,3 @@ index 11c397a9e..e66fc1970 100644 "\n" ) --- -2.48.1 - diff --git a/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch b/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch index 272d200c..fbfe0560 100644 --- a/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch +++ b/recipe/patches/0008-Allow-overriding-CUDA-related-paths.patch @@ -1,7 +1,7 @@ -From da7b07f8e3165bf89b08b5a716e539ae9a7afb1a Mon Sep 17 00:00:00 2001 +From f03bf82d9da9cccb2cf4d4833c1a6349622dc37d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Wed, 27 Nov 2024 13:47:23 +0100 -Subject: [PATCH 08/16] Allow overriding CUDA-related paths +Subject: [PATCH 08/20] Allow overriding CUDA-related paths --- cmake/Modules/FindCUDAToolkit.cmake | 2 +- @@ -9,7 +9,7 @@ Subject: [PATCH 08/16] Allow overriding CUDA-related paths 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/Modules/FindCUDAToolkit.cmake b/cmake/Modules/FindCUDAToolkit.cmake -index ec9ae530a..b7c0bd9fc 100644 +index ec9ae530aa6..b7c0bd9fc51 100644 --- a/cmake/Modules/FindCUDAToolkit.cmake +++ b/cmake/Modules/FindCUDAToolkit.cmake @@ -497,7 +497,7 @@ Result variables @@ -22,7 +22,7 @@ index ec9ae530a..b7c0bd9fc 100644 set(CUDAToolkit_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_LIBRARY_ROOT}") set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_TOOLKIT_VERSION}") diff --git a/tools/setup_helpers/cmake.py b/tools/setup_helpers/cmake.py -index bde41323c..b171837cd 100644 +index bde41323c76..b171837cd4a 100644 --- a/tools/setup_helpers/cmake.py +++ b/tools/setup_helpers/cmake.py @@ -252,7 +252,7 @@ class CMake: @@ -34,6 +34,3 @@ index bde41323c..b171837cd 100644 ("EXITCODE", "EXITCODE__TRYRUN_OUTPUT") ): build_options[var] = val --- -2.48.1 - diff --git a/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch b/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch index e1befef6..580fe42a 100644 --- a/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch +++ b/recipe/patches/0009-Fix-test-test_linalg.py-for-NumPy-2-136800.patch @@ -1,7 +1,7 @@ -From 3429795de33cac2e508397dd2d9f5f5c96f185c3 Mon Sep 17 00:00:00 2001 +From 4b1faf6ba142953ce2730766db44f8d98d161ef0 Mon Sep 17 00:00:00 2001 From: Haifeng Jin Date: Tue, 1 Oct 2024 07:53:24 +0000 -Subject: [PATCH 09/16] Fix test/test_linalg.py for NumPy 2 (#136800) +Subject: [PATCH 09/20] Fix test/test_linalg.py for NumPy 2 (#136800) Related to #107302. @@ -36,7 +36,7 @@ Approved by: https://github.com/lezcano 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/test/test_linalg.py b/test/test_linalg.py -index e9ec874d6..060bccef2 100644 +index e9ec874d695..060bccef2e5 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -2351,7 +2351,7 @@ class TestLinalg(TestCase): @@ -75,6 +75,3 @@ index e9ec874d6..060bccef2 100644 reflectors_i[:] = reflectors_tmp.T reflectors = reflectors.view(*A_cpu.shape) tau = tau.view(tau_shape) --- -2.48.1 - diff --git a/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch b/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch index bd5aa553..6495b150 100644 --- a/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch +++ b/recipe/patches/0010-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch @@ -1,7 +1,7 @@ -From a8ddbe6b682347fdc86c5052b244df4f95b926ac Mon Sep 17 00:00:00 2001 +From 032b9be9ca7f9ae174e75554cecc82600ea3ef54 Mon Sep 17 00:00:00 2001 From: Haifeng Jin Date: Sat, 12 Oct 2024 02:40:17 +0000 -Subject: [PATCH 10/16] Fixes NumPy 2 test failures in test_torch.py (#137740) +Subject: [PATCH 10/20] Fixes NumPy 2 test failures in test_torch.py (#137740) Related to #107302 @@ -24,7 +24,7 @@ Approved by: https://github.com/ezyang 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_torch.py b/test/test_torch.py -index be4d61808..c6fd6ac9f 100644 +index be4d6180819..c6fd6ac9f19 100644 --- a/test/test_torch.py +++ b/test/test_torch.py @@ -2891,7 +2891,7 @@ else: @@ -58,6 +58,3 @@ index be4d61808..c6fd6ac9f 100644 ) @skipIfTorchDynamo("np.float64 restored as float32 after graph break.") --- -2.48.1 - diff --git a/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch b/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch index 2d9b1995..193ce159 100644 --- a/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch +++ b/recipe/patches/0011-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch @@ -1,7 +1,7 @@ -From 113c9ebec11cba2f1d43bfd4ac03eb02c5c921a8 Mon Sep 17 00:00:00 2001 +From 56f1528fa072023fb2724d5abf8790f2f6cc3aaa Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Wed, 18 Dec 2024 03:59:00 +0000 -Subject: [PATCH 11/16] Use BLAS_USE_CBLAS_DOT for OpenBLAS builds +Subject: [PATCH 11/20] Use BLAS_USE_CBLAS_DOT for OpenBLAS builds There are two calling conventions for *dotu functions @@ -31,7 +31,7 @@ functional calls. 1 file changed, 1 insertion(+) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index 15c625486..3965416eb 100644 +index 15c62548601..3965416eb29 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -182,6 +182,7 @@ elseif(BLAS STREQUAL "OpenBLAS") @@ -42,6 +42,3 @@ index 15c625486..3965416eb 100644 elseif(BLAS STREQUAL "BLIS") find_package(BLIS REQUIRED) include_directories(SYSTEM ${BLIS_INCLUDE_DIR}) --- -2.48.1 - diff --git a/recipe/patches/0012-fix-issue-142484.patch b/recipe/patches/0012-fix-issue-142484.patch index bb4a2e6e..00f1e3d2 100644 --- a/recipe/patches/0012-fix-issue-142484.patch +++ b/recipe/patches/0012-fix-issue-142484.patch @@ -1,7 +1,7 @@ -From 323bb15a6b1f601d79211bd292c26cb886a5d60e Mon Sep 17 00:00:00 2001 +From beba58d724cc1bd7ca73660b0a5ad9e61ae0c562 Mon Sep 17 00:00:00 2001 From: "Zheng, Zhaoqiong" Date: Fri, 27 Dec 2024 13:49:36 +0800 -Subject: [PATCH 12/16] fix issue 142484 +Subject: [PATCH 12/20] fix issue 142484 From https://github.com/pytorch/pytorch/pull/143894 --- @@ -9,7 +9,7 @@ From https://github.com/pytorch/pytorch/pull/143894 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/aten/src/ATen/native/mkl/SpectralOps.cpp b/aten/src/ATen/native/mkl/SpectralOps.cpp -index e26cfbf6d..c61b76d32 100644 +index e26cfbf6d8e..c61b76d3205 100644 --- a/aten/src/ATen/native/mkl/SpectralOps.cpp +++ b/aten/src/ATen/native/mkl/SpectralOps.cpp @@ -477,7 +477,17 @@ static Tensor& _exec_fft(Tensor& out, const Tensor& self, IntArrayRef out_sizes, @@ -31,6 +31,3 @@ index e26cfbf6d..c61b76d32 100644 auto descriptor = _plan_mkl_fft( input.strides(), out.strides(), signal_size, input.is_complex(), out.is_complex(), normalization, forward, value_type); --- -2.48.1 - diff --git a/recipe/patches/0013-Fix-FindOpenBLAS.patch b/recipe/patches/0013-Fix-FindOpenBLAS.patch index 47e34885..f539d0a6 100644 --- a/recipe/patches/0013-Fix-FindOpenBLAS.patch +++ b/recipe/patches/0013-Fix-FindOpenBLAS.patch @@ -1,14 +1,14 @@ -From 4ca7ade3211380629ab56f3c965edd1b6387d1e0 Mon Sep 17 00:00:00 2001 +From 816a248a4425a97350959e412666e6db9012a52e Mon Sep 17 00:00:00 2001 From: Bas Zalmstra Date: Thu, 16 May 2024 10:46:49 +0200 -Subject: [PATCH 13/16] Fix FindOpenBLAS +Subject: [PATCH 13/20] Fix FindOpenBLAS --- cmake/Modules/FindOpenBLAS.cmake | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/cmake/Modules/FindOpenBLAS.cmake b/cmake/Modules/FindOpenBLAS.cmake -index 69d8227ae..0d12185c7 100644 +index 69d8227aea5..0d12185c799 100644 --- a/cmake/Modules/FindOpenBLAS.cmake +++ b/cmake/Modules/FindOpenBLAS.cmake @@ -31,22 +31,25 @@ SET(Open_BLAS_LIB_SEARCH_PATHS @@ -43,6 +43,3 @@ index 69d8227ae..0d12185c7 100644 IF (OpenBLAS_FOUND) IF (NOT OpenBLAS_FIND_QUIETLY) --- -2.48.1 - diff --git a/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch b/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch index 031fce6d..7a2df88f 100644 --- a/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch +++ b/recipe/patches/0014-CD-Enable-Python-3.13-on-windows-138095.patch @@ -1,7 +1,7 @@ -From 3b32a078793f06e80d88c356871953f254d4d6c3 Mon Sep 17 00:00:00 2001 +From db896f927403f55a18f931b18a6469cb4e37d322 Mon Sep 17 00:00:00 2001 From: atalman Date: Tue, 12 Nov 2024 12:28:10 +0000 -Subject: [PATCH 14/16] CD Enable Python 3.13 on windows (#138095) +Subject: [PATCH 14/20] CD Enable Python 3.13 on windows (#138095) Adding CD windows. Part of: https://github.com/pytorch/pytorch/issues/130249 Builder PR landed with smoke test: https://github.com/pytorch/builder/pull/2035 @@ -16,7 +16,7 @@ Cherry-pick-note: minus changes in `.github/*` 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp -index 722618efb..f98818bfd 100644 +index 722618efbb0..f98818bfdcc 100644 --- a/functorch/csrc/dim/dim.cpp +++ b/functorch/csrc/dim/dim.cpp @@ -38,6 +38,7 @@ PyObject* Dim_init() { @@ -28,7 +28,7 @@ index 722618efb..f98818bfd 100644 #include "internal/pycore_opcode.h" #undef Py_BUILD_CORE diff --git a/functorch/csrc/dim/dim_opcode.c b/functorch/csrc/dim/dim_opcode.c -index 81ba62a37..1b5d06773 100644 +index 81ba62a3781..1b5d0677344 100644 --- a/functorch/csrc/dim/dim_opcode.c +++ b/functorch/csrc/dim/dim_opcode.c @@ -1,6 +1,17 @@ @@ -50,6 +50,3 @@ index 81ba62a37..1b5d06773 100644 +#undef NEED_OPCODE_TABLES +#undef Py_BUILD_CORE +#endif --- -2.48.1 - diff --git a/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch b/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch index e8ff9e59..3736ca78 100644 --- a/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch +++ b/recipe/patches/0015-simplify-torch.utils.cpp_extension.include_paths-use.patch @@ -1,7 +1,7 @@ -From 4465b713563855e7eb5475758226f3a90f675f55 Mon Sep 17 00:00:00 2001 +From 33790dfbf966e7d8ea4ff6798d2ff92474d84079 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 23 Jan 2025 22:46:58 +1100 -Subject: [PATCH 15/16] simplify torch.utils.cpp_extension.include_paths; use +Subject: [PATCH 15/20] simplify torch.utils.cpp_extension.include_paths; use it in cpp_builder The /TH headers have not existed since pytorch 1.11 @@ -11,7 +11,7 @@ The /TH headers have not existed since pytorch 1.11 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/torch/_inductor/cpp_builder.py b/torch/_inductor/cpp_builder.py -index 95a0bff86..860e7fb06 100644 +index 95a0bff86fd..860e7fb062f 100644 --- a/torch/_inductor/cpp_builder.py +++ b/torch/_inductor/cpp_builder.py @@ -743,16 +743,9 @@ def _get_build_args_of_chosen_isa(vec_isa: VecISA) -> Tuple[List[str], List[str] @@ -35,7 +35,7 @@ index 95a0bff86..860e7fb06 100644 libraries = [] if sys.platform != "darwin" and not config.is_fbcode(): diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py -index aaa45ea4c..3f584ef55 100644 +index aaa45ea4c90..3f584ef5598 100644 --- a/torch/utils/cpp_extension.py +++ b/torch/utils/cpp_extension.py @@ -1159,10 +1159,6 @@ def include_paths(cuda: bool = False) -> List[str]: @@ -49,6 +49,3 @@ index aaa45ea4c..3f584ef55 100644 ] if cuda and IS_HIP_EXTENSION: paths.append(os.path.join(lib_include, 'THH')) --- -2.48.1 - diff --git a/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch b/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch index fecf4d0f..764e24af 100644 --- a/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch +++ b/recipe/patches/0016-point-include-paths-to-PREFIX-include.patch @@ -1,14 +1,14 @@ -From 4d485fc0a5e3226e528e9dab17b184ff9835a045 Mon Sep 17 00:00:00 2001 +From 799f6fa59dac93dabbbcf72d46f4e1334e3d65d9 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 23 Jan 2025 22:58:14 +1100 -Subject: [PATCH 16/16] point include paths to $PREFIX/include +Subject: [PATCH 16/20] point include paths to $PREFIX/include --- torch/utils/cpp_extension.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py -index 3f584ef55..4210f62b6 100644 +index 3f584ef5598..4210f62b6db 100644 --- a/torch/utils/cpp_extension.py +++ b/torch/utils/cpp_extension.py @@ -1155,10 +1155,19 @@ def include_paths(cuda: bool = False) -> List[str]: @@ -31,6 +31,3 @@ index 3f584ef55..4210f62b6 100644 ] if cuda and IS_HIP_EXTENSION: paths.append(os.path.join(lib_include, 'THH')) --- -2.48.1 - diff --git a/recipe/patches/0017-Add-conda-prefix-to-inductor-include-paths.patch b/recipe/patches/0017-Add-conda-prefix-to-inductor-include-paths.patch new file mode 100644 index 00000000..e2111c54 --- /dev/null +++ b/recipe/patches/0017-Add-conda-prefix-to-inductor-include-paths.patch @@ -0,0 +1,27 @@ +From 9f73a02bacf9680833ac64657fde6762d33ab200 Mon Sep 17 00:00:00 2001 +From: Daniel Petry +Date: Tue, 21 Jan 2025 17:45:23 -0600 +Subject: [PATCH 17/20] Add conda prefix to inductor include paths + +Currently inductor doesn't look in conda's includes and libs. This results in +errors when it tries to compile, if system versions are being used of +dependencies (e.g., sleef). + +Note that this is for inductor's JIT mode, not its AOT mode, for which the +end user provides a _compile_flags.json file. +--- + torch/_inductor/cpp_builder.py | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/torch/_inductor/cpp_builder.py b/torch/_inductor/cpp_builder.py +index 860e7fb062f..76c61375d91 100644 +--- a/torch/_inductor/cpp_builder.py ++++ b/torch/_inductor/cpp_builder.py +@@ -1048,6 +1048,7 @@ def get_cpp_torch_options( + + python_include_dirs + + torch_include_dirs + + omp_include_dir_paths ++ + [os.getenv('CONDA_PREFIX') + '/include'] + ) + cflags = sys_libs_cflags + omp_cflags + ldflags = omp_ldflags diff --git a/recipe/patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch b/recipe/patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch new file mode 100644 index 00000000..028d79be --- /dev/null +++ b/recipe/patches/0018-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch @@ -0,0 +1,25 @@ +From b0cfa0f728e96a3a9d6f7434e2c02d74d6daa9a9 Mon Sep 17 00:00:00 2001 +From: "H. Vetinari" +Date: Tue, 28 Jan 2025 14:15:34 +1100 +Subject: [PATCH 18/20] make ATEN_INCLUDE_DIR relative to TORCH_INSTALL_PREFIX + +we cannot set CMAKE_INSTALL_PREFIX without the pytorch build complaining, but we can +use TORCH_INSTALL_PREFIX, which is set correctly relative to our CMake files already: +https://github.com/pytorch/pytorch/blob/v2.5.1/cmake/TorchConfig.cmake.in#L47 +--- + aten/src/ATen/CMakeLists.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt +index 6d9152a4d07..aa4dd7b05cc 100644 +--- a/aten/src/ATen/CMakeLists.txt ++++ b/aten/src/ATen/CMakeLists.txt +@@ -563,7 +563,7 @@ if(USE_ROCM) + # list(APPEND ATen_HIP_DEPENDENCY_LIBS ATEN_CUDA_FILES_GEN_LIB) + endif() + +-set(ATEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${AT_INSTALL_INCLUDE_DIR}") ++set(ATEN_INCLUDE_DIR "${TORCH_INSTALL_PREFIX}/${AT_INSTALL_INCLUDE_DIR}") + configure_file(ATenConfig.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/ATenConfig.cmake") + install(FILES "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/ATenConfig.cmake" + DESTINATION "${AT_INSTALL_SHARE_DIR}/cmake/ATen") diff --git a/recipe/patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch b/recipe/patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch new file mode 100644 index 00000000..7aa41192 --- /dev/null +++ b/recipe/patches/0019-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch @@ -0,0 +1,158 @@ +From f7db4cbfb0af59027ed8bdcd0387dba6fbcb1192 Mon Sep 17 00:00:00 2001 +From: "H. Vetinari" +Date: Tue, 28 Jan 2025 10:58:29 +1100 +Subject: [PATCH 19/20] remove `DESTINATION lib` from CMake `install(TARGETS` + directives + +Suggested-By: Silvio Traversaro +--- + c10/CMakeLists.txt | 2 +- + c10/cuda/CMakeLists.txt | 2 +- + c10/hip/CMakeLists.txt | 2 +- + c10/xpu/CMakeLists.txt | 2 +- + caffe2/CMakeLists.txt | 18 +++++++++--------- + torch/CMakeLists.txt | 2 +- + torch/lib/libshm_windows/CMakeLists.txt | 2 +- + 7 files changed, 15 insertions(+), 15 deletions(-) + +diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt +index 80e172497d5..d7f8987020d 100644 +--- a/c10/CMakeLists.txt ++++ b/c10/CMakeLists.txt +@@ -162,7 +162,7 @@ if(NOT BUILD_LIBTORCHLESS) + # Note: for now, we will put all export path into one single Caffe2Targets group + # to deal with the cmake deployment need. Inside the Caffe2Targets set, the + # individual libraries like libc10.so and libcaffe2.so are still self-contained. +- install(TARGETS c10 EXPORT Caffe2Targets DESTINATION lib) ++ install(TARGETS c10 EXPORT Caffe2Targets) + endif() + + install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR} +diff --git a/c10/cuda/CMakeLists.txt b/c10/cuda/CMakeLists.txt +index 3327dab4779..9336c9e8f77 100644 +--- a/c10/cuda/CMakeLists.txt ++++ b/c10/cuda/CMakeLists.txt +@@ -82,7 +82,7 @@ if(NOT BUILD_LIBTORCHLESS) + # Note: for now, we will put all export path into one single Caffe2Targets group + # to deal with the cmake deployment need. Inside the Caffe2Targets set, the + # individual libraries like libc10.so and libcaffe2.so are still self-contained. +-install(TARGETS c10_cuda EXPORT Caffe2Targets DESTINATION lib) ++install(TARGETS c10_cuda EXPORT Caffe2Targets) + + endif() + +diff --git a/c10/hip/CMakeLists.txt b/c10/hip/CMakeLists.txt +index f153030e793..514c6d29266 100644 +--- a/c10/hip/CMakeLists.txt ++++ b/c10/hip/CMakeLists.txt +@@ -55,7 +55,7 @@ if(NOT BUILD_LIBTORCHLESS) + $ + $ + $) +- install(TARGETS c10_hip EXPORT Caffe2Targets DESTINATION lib) ++ install(TARGETS c10_hip EXPORT Caffe2Targets) + set(C10_HIP_LIB c10_hip) + endif() + +diff --git a/c10/xpu/CMakeLists.txt b/c10/xpu/CMakeLists.txt +index 01f77d61713..437ade657f9 100644 +--- a/c10/xpu/CMakeLists.txt ++++ b/c10/xpu/CMakeLists.txt +@@ -45,7 +45,7 @@ if(NOT BUILD_LIBTORCHLESS) + $ + $ + ) +- install(TARGETS c10_xpu EXPORT Caffe2Targets DESTINATION lib) ++ install(TARGETS c10_xpu EXPORT Caffe2Targets) + set(C10_XPU_LIB c10_xpu) + add_subdirectory(test) + endif() +diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt +index 9be7f3732f3..b51c7cc637b 100644 +--- a/caffe2/CMakeLists.txt ++++ b/caffe2/CMakeLists.txt +@@ -549,7 +549,7 @@ if(USE_CUDA) + endif() + + target_link_libraries(caffe2_nvrtc PRIVATE caffe2::nvrtc ${DELAY_LOAD_FLAGS}) +- install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}") ++ install(TARGETS caffe2_nvrtc) + if(USE_NCCL) + list(APPEND Caffe2_GPU_SRCS + ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp) +@@ -609,7 +609,7 @@ if(USE_ROCM) + target_link_libraries(caffe2_nvrtc ${PYTORCH_HIP_LIBRARIES} ${ROCM_HIPRTC_LIB}) + target_include_directories(caffe2_nvrtc PRIVATE ${CMAKE_BINARY_DIR}) + target_compile_definitions(caffe2_nvrtc PRIVATE USE_ROCM __HIP_PLATFORM_AMD__) +- install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}") ++ install(TARGETS caffe2_nvrtc) + endif() + + if(NOT NO_API AND NOT BUILD_LITE_INTERPRETER) +@@ -995,7 +995,7 @@ elseif(USE_CUDA) + CUDA::culibos ${CMAKE_DL_LIBS}) + endif() + set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/cuda/LinearAlgebraStubs.cpp PROPERTIES COMPILE_FLAGS "-DBUILD_LAZY_CUDA_LINALG") +- install(TARGETS torch_cuda_linalg DESTINATION "${TORCH_INSTALL_LIB_DIR}") ++ install(TARGETS torch_cuda_linalg) + endif() + + if(USE_PRECOMPILED_HEADERS) +@@ -1467,17 +1467,17 @@ endif() + + caffe2_interface_library(torch torch_library) + +-install(TARGETS torch_cpu torch_cpu_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}") ++install(TARGETS torch_cpu torch_cpu_library EXPORT Caffe2Targets) + + if(USE_CUDA) +- install(TARGETS torch_cuda torch_cuda_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}") ++ install(TARGETS torch_cuda torch_cuda_library EXPORT Caffe2Targets) + elseif(USE_ROCM) +- install(TARGETS torch_hip torch_hip_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}") ++ install(TARGETS torch_hip torch_hip_library EXPORT Caffe2Targets) + elseif(USE_XPU) +- install(TARGETS torch_xpu torch_xpu_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}") ++ install(TARGETS torch_xpu torch_xpu_library EXPORT Caffe2Targets) + endif() + +-install(TARGETS torch torch_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}") ++install(TARGETS torch torch_library EXPORT Caffe2Targets) + + target_link_libraries(torch PUBLIC torch_cpu_library) + +@@ -1616,7 +1616,7 @@ if(BUILD_SHARED_LIBS) + target_link_libraries(torch_global_deps torch::nvtoolsext) + endif() + endif() +- install(TARGETS torch_global_deps DESTINATION "${TORCH_INSTALL_LIB_DIR}") ++ install(TARGETS torch_global_deps) + endif() + + # ---[ Caffe2 HIP sources. +diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt +index c74b45431c9..80fb5e7734e 100644 +--- a/torch/CMakeLists.txt ++++ b/torch/CMakeLists.txt +@@ -447,7 +447,7 @@ if(NOT TORCH_PYTHON_LINK_FLAGS STREQUAL "") + set_target_properties(torch_python PROPERTIES LINK_FLAGS ${TORCH_PYTHON_LINK_FLAGS}) + endif() + +-install(TARGETS torch_python DESTINATION "${TORCH_INSTALL_LIB_DIR}") ++install(TARGETS torch_python) + + # Generate torch/version.py from the appropriate CMake cache variables. + if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") +diff --git a/torch/lib/libshm_windows/CMakeLists.txt b/torch/lib/libshm_windows/CMakeLists.txt +index df2a1064938..5fa15e6be31 100644 +--- a/torch/lib/libshm_windows/CMakeLists.txt ++++ b/torch/lib/libshm_windows/CMakeLists.txt +@@ -19,7 +19,7 @@ target_include_directories(shm PRIVATE + target_link_libraries(shm torch c10) + + +-install(TARGETS shm DESTINATION "${LIBSHM_INSTALL_LIB_SUBDIR}") ++install(TARGETS shm) + install(FILES libshm.h DESTINATION "include") + + if(MSVC AND BUILD_SHARED_LIBS) diff --git a/recipe/patches/0020-inductor-Enable-cpp-wrapper-for-test_torchinductor-1.patch b/recipe/patches/0020-inductor-Enable-cpp-wrapper-for-test_torchinductor-1.patch new file mode 100644 index 00000000..68753e5c --- /dev/null +++ b/recipe/patches/0020-inductor-Enable-cpp-wrapper-for-test_torchinductor-1.patch @@ -0,0 +1,272 @@ +From c06d20d68d0190967494c08df93207828af71628 Mon Sep 17 00:00:00 2001 +From: Bin Bao +Date: Mon, 28 Oct 2024 07:44:46 -0700 +Subject: [PATCH 20/20] [inductor] Enable cpp wrapper for test_torchinductor + (#138579) + +Summary: Expand cpp wrapper testing to test_torchinductor. Using skip_cpp_wrapper to skip failing tests for now, and fixes are coming later. + +Pull Request resolved: https://github.com/pytorch/pytorch/pull/138579 +Approved by: https://github.com/chenyang78, https://github.com/benjaminglass1 + +[Cherry-pick note: dropped changes in .ci/pytorch/test.sh] +--- + test/inductor/test_torchinductor.py | 47 +++++++++++++++++++++++++++-- + 1 file changed, 44 insertions(+), 3 deletions(-) + +diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py +index 610f5d27332..5c3c50e5a70 100644 +--- a/test/inductor/test_torchinductor.py ++++ b/test/inductor/test_torchinductor.py +@@ -689,7 +689,9 @@ def assertGeneratedKernelCountEqual(self: TestCase, expected: int): + # and non-persistent reduction kernels for the same node schedule. + # That will mess up with the kernel count. Just don't check it. + return +- if config.cpp_wrapper: ++ if config.cpp_wrapper and self.device != "cpu": ++ # FIXME: cpp wrapper codegen for cuda is done in two passes. Update ++ # this once we move to the new one-pass solution. + expected *= 2 + self.assertEqual(torch._inductor.metrics.generated_kernel_count, expected) + +@@ -767,6 +769,16 @@ def skip_if_gpu_halide(fn): + return wrapper + + ++def skip_if_cpp_wrapper(fn): ++ @functools.wraps(fn) ++ def wrapper(self): ++ if config.cpp_wrapper: ++ raise unittest.SkipTest("cpp wrapper bug to be fixed") ++ return fn(self) ++ ++ return wrapper ++ ++ + @instantiate_parametrized_tests + class CommonTemplate: + def test_bool(self): +@@ -1362,6 +1374,7 @@ class CommonTemplate: + + @config.patch({"fx_graph_cache": False}) + @skipIfWindows(msg="torch._dynamo.exc.Unsupported") ++ @skip_if_cpp_wrapper + def test_forced_buffer_realize(self): + # Test torch._test_inductor_realize forces a buffer to be realized + def fn(a): +@@ -1373,6 +1386,7 @@ class CommonTemplate: + + @config.patch({"fx_graph_cache": False}) + @skipIfWindows(msg="torch._dynamo.exc.Unsupported") ++ @skip_if_cpp_wrapper + def test_scheduler_vertical_fusion1(self): + realize = test_operators.realize + +@@ -2966,6 +2980,7 @@ class CommonTemplate: + self.common(fn, (torch.randn(8, 8), torch.randn(8, 8))) + + @skip_if_halide # only 32-bit indexing ++ @skip_if_cpp_wrapper # OOM + def test_large_tensor_reduction(self): + if not _has_sufficient_memory(self.device, 4.5 * 1024**3): # 4.5 GiB + raise unittest.SkipTest("insufficient memory") +@@ -2987,6 +3002,7 @@ class CommonTemplate: + self.assertEqual(actual, expect) + + @skip_if_gpu_halide # only 32-bit indexing ++ @skip_if_cpp_wrapper # OOM + def test_large_broadcast_reduction(self): + if self.device == "cpu": + raise unittest.SkipTest("Fails on CPU") +@@ -3009,6 +3025,7 @@ class CommonTemplate: + self.assertEqual(actual, expect) + + @skip_if_halide # only 32-bit indexing ++ @skip_if_cpp_wrapper # OOM + def test_large_pointwise(self): + if not _has_sufficient_memory(self.device, 2 * (2**31 + 1)): + raise unittest.SkipTest("insufficient memory") +@@ -3045,6 +3062,7 @@ class CommonTemplate: + self.assertTrue((actual == 4).all()) + + @skip_if_halide # only 32-bit indexing ++ @skip_if_cpp_wrapper # OOM + def test_large_strided_reduction(self): + # Test 64-bit indexing is used when input numel is less than INT_MAX + # but stride calculations go above INT_MAX +@@ -3317,6 +3335,7 @@ class CommonTemplate: + ) + + @with_tf32_off ++ @skip_if_cpp_wrapper + @config.patch(use_mixed_mm=True) + def test_uint4x2_mixed_mm(self): + def fn(a, b): +@@ -3346,10 +3365,12 @@ class CommonTemplate: + t2 = torch.arange(9, dtype=torch.int64, device=self.device).view(3, 3) + + msg = "expected .* and .* to have the same dtype, but got: .* != .*" +- with self.assertRaisesRegex(RuntimeError, msg): +- torch.compile(fn)(t1, t2) + with self.assertRaisesRegex(RuntimeError, msg): + fn(t1, t2) ++ if config.cpp_wrapper: ++ msg = "aoti_torch_.* API call failed at .*" ++ with self.assertRaisesRegex(RuntimeError, msg): ++ torch.compile(fn)(t1, t2) + + @skipIfXpu + def test_linear_mixed_dtype(self): +@@ -3368,6 +3389,8 @@ class CommonTemplate: + msg = "expected .* and .* to have the same dtype, but got: .* != .*" + with self.assertRaisesRegex(RuntimeError, msg): + fn(t) ++ if config.cpp_wrapper: ++ msg = "aoti_torch_.* API call failed at .*" + with self.assertRaisesRegex(RuntimeError, msg): + with torch.no_grad(): + torch.compile(fn)(t) +@@ -5065,6 +5088,7 @@ class CommonTemplate: + if self.device != "cpu": + assertGeneratedKernelCountEqual(self, 1) + ++ @skip_if_cpp_wrapper + def test_complex_fallback(self): + def fn(x): + return x * x + 10 +@@ -5389,6 +5413,7 @@ class CommonTemplate: + ) + + @torch._dynamo.config.patch(capture_dynamic_output_shape_ops=True) ++ @skip_if_cpp_wrapper + def test_nonzero_unbacked_refinement(self): + def fn(x): + z = x.nonzero() +@@ -5456,6 +5481,7 @@ class CommonTemplate: + (torch.randn([1, 3, 3, 16]).to(memory_format=torch.channels_last),), + ) + ++ @skip_if_cpp_wrapper + def test_cat_uint8(self): + def fn(x): + batch_shape = x.shape[:1] +@@ -7846,6 +7872,7 @@ class CommonTemplate: + self.assertTrue((d < 1).all()) + + @config.patch(implicit_fallbacks=True) ++ @skip_if_cpp_wrapper + def test_fallback_mutable_op_basic(self): + with torch.library._scoped_library("mylib", "FRAGMENT") as m: + +@@ -7956,6 +7983,7 @@ class CommonTemplate: + self.assertEqual(cloned_args, args) + + @config.patch(implicit_fallbacks=True) ++ @skip_if_cpp_wrapper + def test_fallback_mutable_op_list(self): + with torch.library._scoped_library("mylib", "FRAGMENT") as m: + +@@ -8082,6 +8110,7 @@ class CommonTemplate: + + # Already on by default, just want to make sure + @patch.object(torch._inductor.config, "allow_buffer_reuse", True) ++ @skip_if_cpp_wrapper + def test_reuse_buffers_with_aliasing(self): + def f(x): + z = x + 1 +@@ -8164,6 +8193,7 @@ class CommonTemplate: + self.common(fn, [torch.zeros([20, 20])]) + + @config.patch(check_stack_no_cycles_TESTING_ONLY=True) ++ @skip_if_cpp_wrapper + def test_check_stack_no_cycles(self): + @torch.compile() + def fn(x): +@@ -8579,6 +8609,7 @@ class CommonTemplate: + result = fn(torch.randn([1, 2, 16, 4]).requires_grad_()) + result.sum().backward() + ++ @skip_if_cpp_wrapper + def test_dropout2(self): + n = 100000 + weight = torch.ones( +@@ -8638,6 +8669,7 @@ class CommonTemplate: + self.assertTrue(same(g2, g3)) + + @config.patch(search_autotune_cache=False) ++ @skip_if_cpp_wrapper + def test_dropout3(self): + m = torch.nn.Sequential( + torch.nn.Linear(32, 32, bias=False), +@@ -8664,6 +8696,7 @@ class CommonTemplate: + self.assertEqual(bw_code.count("tl.rand"), 0) + self.assertEqual(torch._inductor.metrics.generated_kernel_count, 4) + ++ @skip_if_cpp_wrapper + def test_randint_kernel_count(self): + @torch._dynamo.optimize_assert("inductor") + def fn1(): +@@ -9302,6 +9335,7 @@ class CommonTemplate: + for x in (torch.randn(2, 3), torch.randn(2, 2), torch.randn(3, 2)): + self.common(fn, (x,)) + ++ @skip_if_cpp_wrapper + def test_kwargs(self): + if self.device == GPU_TYPE: + raise unittest.SkipTest("histogramdd only supports cpu") +@@ -10627,6 +10661,7 @@ class CommonTemplate: + + @requires_gpu() + @config.patch(implicit_fallbacks=True) ++ @skip_if_cpp_wrapper + def test_mutable_custom_op_fixed_layout2(self): + with torch.library._scoped_library("mylib", "DEF") as lib: + mod = nn.Conv2d(3, 128, 1, stride=1, bias=False).to(device=GPU_TYPE) +@@ -10680,6 +10715,7 @@ class CommonTemplate: + self.assertNotEqual(bar_strides[0], expected_stride) + + @config.patch(implicit_fallbacks=True) ++ @skip_if_cpp_wrapper + def test_mutable_custom_op_fixed_layout(self): + with torch.library._scoped_library("mylib", "DEF") as lib: + lib.define( +@@ -11007,6 +11043,7 @@ class CommonTemplate: + assertGeneratedKernelCountEqual(self, 1) + + @expectedFailureCodegenDynamic ++ @skip_if_cpp_wrapper + def test_reinterpret_dtypeview(self): + @torch.compile + def fn(x, x2): +@@ -11827,6 +11864,7 @@ if HAS_GPU and not TEST_WITH_ASAN: + self.assertFalse("out_ptr0" in code) + self.assertEqual(fn_opt(*inps), fn(*inps)) + ++ @skip_if_cpp_wrapper + def test_numpy_on_gpu(self): + x = np.arange(10, dtype=np.float32) + +@@ -12210,6 +12248,7 @@ if HAS_GPU and not TEST_WITH_ASAN: + + @patch("torch._inductor.config.comment_origin", True) + @patch("torch._functorch.config.max_dist_from_bw", 0) ++ @skip_if_cpp_wrapper + def test_inductor_sequence_nr(self): + class Model(torch.nn.Module): + def __init__(self) -> None: +@@ -12356,6 +12395,7 @@ if HAS_GPU and not TEST_WITH_ASAN: + + class NanCheckerTest(TestCase): + @config.patch("nan_asserts", True) ++ @skip_if_cpp_wrapper + def test_nan_checker_pass(self): + def f(x): + return torch.softmax(x, dim=-1) +@@ -12375,6 +12415,7 @@ if HAS_GPU and not TEST_WITH_ASAN: + ) + + @config.patch("nan_asserts", True) ++ @skip_if_cpp_wrapper + def test_nan_checker_fail(self): + def f(x): + return torch.softmax(x, dim=-1) diff --git a/recipe/patches_submodules/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch b/recipe/patches_submodules/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch new file mode 100644 index 00000000..665cc74e --- /dev/null +++ b/recipe/patches_submodules/0001-remove-DESTINATION-lib-from-CMake-install-directives.patch @@ -0,0 +1,25 @@ +From a9879bdd5ea793c5301a4b86f163a07e1f28f321 Mon Sep 17 00:00:00 2001 +From: "H. Vetinari" +Date: Tue, 28 Jan 2025 13:32:28 +1100 +Subject: [PATCH] remove `DESTINATION lib` from CMake install directives + +Suggested-By: Silvio Traversaro +--- + CMakeLists.txt | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/third_party/fbgemm/CMakeLists.txt b/third_party/fbgemm/CMakeLists.txt +index 134523e7..86fb8fad 100644 +--- a/third_party/fbgemm/CMakeLists.txt ++++ b/third_party/fbgemm/CMakeLists.txt +@@ -370,8 +370,8 @@ if(MSVC) + FILES $ $ + DESTINATION ${CMAKE_INSTALL_LIBDIR} OPTIONAL) + endif() +- install(TARGETS fbgemm DESTINATION ${CMAKE_INSTALL_LIBDIR}) +- install(TARGETS asmjit DESTINATION ${CMAKE_INSTALL_LIBDIR}) ++ install(TARGETS fbgemm) ++ install(TARGETS asmjit) + endif() + + #Make project importable from the build directory