conda-forge · h-vetinari · Jan 30, 2025 · Jan 7, 2025 · Jan 7, 2025 · Jan 7, 2025
@@ -4,6 +4,12 @@ setlocal enabledelayedexpansion
 REM remove pyproject.toml to avoid installing deps from pip
 if EXIST pyproject.toml DEL pyproject.toml
 
+:: The PyTorch test suite includes some symlinks, which aren't resolved on Windows, leading to packaging errors.
+:: ATTN! These change and have to be updated manually, often with each release.
+:: (no current symlinks being packaged. Leaving this information here as it took some months to find the issue. Look out
+:: for a failure with error message: "conda_package_handling.exceptions.ArchiveCreationError: <somefile> Cannot stat
+:: while writing file")
+
 set TH_BINARY_BUILD=1
 set PYTORCH_BUILD_VERSION=%PKG_VERSION%
 :: Always pass 0 to avoid appending ".post" to version string.
@@ -106,6 +112,10 @@ if not "%cuda_compiler_version%" == "None" (
 
 set DISTUTILS_USE_SDK=1
 
+:: Use our Pybind11, Eigen
+set USE_SYSTEM_PYBIND11=1
+set USE_SYSTEM_EIGEN_INSTALL=1
+
 set CMAKE_INCLUDE_PATH=%LIBRARY_PREFIX%\include
 set LIB=%LIBRARY_PREFIX%\lib;%LIB%
 

@@ -4,6 +4,10 @@ echo "=== Building ${PKG_NAME} (py: ${PY_VER}) ==="
 
 set -ex
 
+echo "####################################################################"
+echo "Building PyTorch using BLAS implementation: $blas_impl              "
+echo "####################################################################"
+
 # This is used to detect if it's in the process of building pytorch
 export IN_PYTORCH_BUILD=1
 
@@ -20,9 +24,21 @@ rm -rf pyproject.toml
 export USE_CUFILE=0
 export USE_NUMA=0
 export USE_ITT=0
+
+#################### ADJUST COMPILER AND LINKER FLAGS #####################
+# Pytorch's build system doesn't like us setting the c++ standard and will
+# issue a warning:
+# https://github.com/pytorch/pytorch/blob/3beb7006dd5a415dfa236081ad5d55ae38346324/CMakeLists.txt#L41
+export CXXFLAGS="$(echo $CXXFLAGS | sed 's/-std=c++[0-9][0-9]//g')"
+# The below three lines expose symbols that would otherwise be hidden or
+# optimised away. They were here before, so removing them would potentially
+# break users' programs
 export CFLAGS="$(echo $CFLAGS | sed 's/-fvisibility-inlines-hidden//g')"
 export CXXFLAGS="$(echo $CXXFLAGS | sed 's/-fvisibility-inlines-hidden//g')"
 export LDFLAGS="$(echo $LDFLAGS | sed 's/-Wl,--as-needed//g')"
+# The default conda LDFLAGs include -Wl,-dead_strip_dylibs, which removes all the
+# MKL sequential, core, etc. libraries, resulting in a "Symbol not found: _mkl_blas_caxpy"
+# error on osx-64.
 export LDFLAGS="$(echo $LDFLAGS | sed 's/-Wl,-dead_strip_dylibs//g')"
 export LDFLAGS_LD="$(echo $LDFLAGS_LD | sed 's/-dead_strip_dylibs//g')"
 if [[ "$c_compiler" == "clang" ]]; then
@@ -45,6 +61,7 @@ fi
 # can be imported on system without a GPU
 LDFLAGS="${LDFLAGS//-Wl,-z,now/-Wl,-z,lazy}"
 
+################ CONFIGURE CMAKE FOR CONDA ENVIRONMENT ###################
 export CMAKE_GENERATOR=Ninja
 export CMAKE_LIBRARY_PATH=$PREFIX/lib:$PREFIX/include:$CMAKE_LIBRARY_PATH
 export CMAKE_PREFIX_PATH=$PREFIX
@@ -62,6 +79,7 @@ done
 CMAKE_FIND_ROOT_PATH+=";$SRC_DIR"
 unset CMAKE_INSTALL_PREFIX
 export TH_BINARY_BUILD=1
+# Use our build version and number for inserting into binaries
 export PYTORCH_BUILD_VERSION=$PKG_VERSION
 # Always pass 0 to avoid appending ".post" to version string.
 # https://github.com/conda-forge/pytorch-cpu-feedstock/issues/315
@@ -74,6 +92,8 @@ export USE_SYSTEM_SLEEF=1
 # use our protobuf
 export BUILD_CUSTOM_PROTOBUF=OFF
 rm -rf $PREFIX/bin/protoc
+export USE_SYSTEM_PYBIND11=1
+export USE_SYSTEM_EIGEN_INSTALL=1
 
 # prevent six from being downloaded
 > third_party/NNPACK/cmake/DownloadSix.cmake
@@ -99,16 +119,23 @@ if [[ "${CI}" == "github_actions" ]]; then
     # reduce parallelism to avoid getting OOM-killed on
     # cirun-openstack-gpu-2xlarge, which has 32GB RAM, 8 CPUs
     export MAX_JOBS=4
-else
+elif [[ "${CI}" == "azure" ]]; then
     export MAX_JOBS=${CPU_COUNT}
+else
+    # Leave a spare core for other tasks. This may need to be reduced further
+    # if we get out of memory errors.
+    export MAX_JOBS=$((CPU_COUNT > 1 ? CPU_COUNT - 1 : 1))
 fi
 
 if [[ "$blas_impl" == "generic" ]]; then
     # Fake openblas
     export BLAS=OpenBLAS
     export OpenBLAS_HOME=${PREFIX}
-else
+elif [[ "$blas_impl" == "mkl" ]]; then
     export BLAS=MKL
+else
+    echo "[ERROR] Unsupported BLAS implementation '${blas_impl}'" >&2
+    exit 1
 fi
 
 if [[ "$PKG_NAME" == "pytorch" ]]; then
@@ -164,12 +191,19 @@ elif [[ ${cuda_compiler_version} != "None" ]]; then
             echo "unknown CUDA arch, edit build.sh"
             exit 1
     esac
+    # Warning from pytorch v1.12.1: In the future we will require one to
+    # explicitly pass TORCH_CUDA_ARCH_LIST to cmake instead of implicitly
+    # setting it as an env variable.
+    #
+    # See:
+    # https://pytorch.org/docs/stable/cpp_extension.html (Compute capabilities)
+    # https://github.com/pytorch/pytorch/blob/main/.ci/manywheel/build_cuda.sh
     case ${cuda_compiler_version} in
         12.6)
             export TORCH_CUDA_ARCH_LIST="5.0;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0+PTX"
             ;;
         *)
-            echo "unsupported cuda version. edit build.sh"
+            echo "No CUDA architecture list exists for CUDA v${cuda_compiler_version}. See build.sh for information on adding one."
             exit 1
     esac
     export TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
@@ -204,15 +238,16 @@ case ${PKG_NAME} in
 
     mv build/lib.*/torch/bin/* ${PREFIX}/bin/
     mv build/lib.*/torch/lib/* ${PREFIX}/lib/
-    mv build/lib.*/torch/share/* ${PREFIX}/share/
+    # need to merge these now because we're using system pybind11, meaning the destination directory is not empty
+    rsync -a build/lib.*/torch/share/* ${PREFIX}/share/
     mv build/lib.*/torch/include/{ATen,caffe2,tensorpipe,torch,c10} ${PREFIX}/include/
     rm ${PREFIX}/lib/libtorch_python.*
 
     # Keep the original backed up to sed later
     cp build/CMakeCache.txt build/CMakeCache.txt.orig
     ;;
   pytorch)
-    $PREFIX/bin/python -m pip install . --no-deps -vvv --no-clean \
+    $PREFIX/bin/python -m pip install . --no-deps --no-build-isolation -vvv --no-clean \
         | sed "s,${CXX},\$\{CXX\},g" \
         | sed "s,${PREFIX},\$\{PREFIX\},g"
     # Keep this in ${PREFIX}/lib so that the library can be found by

@@ -2,6 +2,9 @@
 {% set version = "2.5.1" %}
 {% set build = 9 %}
 
+# Use a higher build number for the CUDA variant, to ensure that it's
+# preferred by conda's solver, and it's preferentially
+# installed where the platform supports it.
 {% if cuda_compiler_version != "None" %}
 {% set build = build + 200 %}
 {% endif %}
@@ -63,6 +66,7 @@ source:
     - patches/0014-Fix-FindOpenBLAS.patch
     # backport https://github.com/pytorch/pytorch/pull/138095
     - patches/0015-CD-Enable-Python-3.13-on-windows-138095.patch
+    - patches/0016-use-prefix-include-for-inductor.patch
 
 build:
   number: {{ build }}
@@ -166,6 +170,8 @@ requirements:
     - libuv
     - pkg-config  # [unix]
     - typing_extensions
+    - pybind11
+    - eigen
   run:
     # GPU requirements without run_exports
     - {{ pin_compatible('cudnn') }}                       # [cuda_compiler_version != "None"]
@@ -304,6 +310,8 @@ outputs:
         - pkg-config  # [unix]
         - typing_extensions
         - {{ pin_subpackage('libtorch', exact=True) }}
+        - pybind11
+        - eigen
       run:
         - llvm-openmp    # [osx]
         - intel-openmp {{ mkl }}  # [win]
@@ -348,7 +356,8 @@ outputs:
         - pydot
         - pip
         - expecttest
-        - xmlrunner
+        # unittest-xml-reporting supersedes xmlrunner
+        - unittest-xml-reporting
         # Required by run_test.py
         - pytest-flakefinder
         - pytest-rerunfailures
@@ -365,13 +374,23 @@ outputs:
         # tools/ is needed to optimise test run
         # as of pytorch=2.0.0, there is a bug when trying to run tests without the tools
         - tools
+        - .ci/pytorch/smoke_test/smoke_test.py
       commands:
         # Run pip check so as to ensure that all pytorch packages are installed
         # https://github.com/conda-forge/pytorch-cpu-feedstock/issues/24
         - pip check
         - python -c "import torch; print(torch.__version__)"
         - python -c "import torch; assert torch.backends.mkldnn.m.is_available()"  # [x86 and cuda_compiler_version == "None"]
         - python -c "import torch; torch.tensor(1).to('cpu').numpy(); print('numpy support enabled!!!')"
+        # We have had issues with openmp .dylibs being doubly loaded in certain cases. These two tests catch the (observed) issue
+        - python -c "import torch; import numpy"
+        - python -c "import numpy; import torch"
+        # distributed support is enabled by default on linux; for mac, we enable it manually in build.sh
+        - python -c "import torch; assert torch.distributed.is_available()"        # [linux or osx]
+        - python -c "import torch; assert torch.backends.cuda.is_built()"          # [(cuda_compiler_version != "None")]
+        - python -c "import torch; assert torch.backends.cudnn.is_available()"     # [(cuda_compiler_version != "None")]
+        - python -c "import torch; assert torch.cuda.is_available()"               # [(cuda_compiler_version != "None")]
+        - python -c "import torch; assert torch.backends.cudnn.enabled"            # [(cuda_compiler_version != "None")]
         # At conda-forge, we target versions of OSX that are too old for MPS support
         # But if users install a newer version of OSX, they will have MPS support
         # https://github.com/conda-forge/pytorch-cpu-feedstock/pull/123#issuecomment-1186355073
@@ -380,6 +399,29 @@ outputs:
         # python-version-specific library (default location in SP_DIR symlinks back to this)
         - test -f $PREFIX/lib/libtorch_python${SHLIB_EXT}     # [unix]
 
+        # the smoke test script takes a bunch of env variables, defined below
+        - set MATRIX_GPU_ARCH_VERSION="{{ '.'.join(cuda_compiler_version.split('.')[:2]) }}"    # [(cuda_compiler_version != "None") and (win)]
+        - set MATRIX_GPU_ARCH_TYPE="cuda"                                                       # [(gpu_variant or "").startswith("cuda") and (win)]
+        - set MATRIX_GPU_ARCH_VERSION="none"                                                    # [(cuda_compiler_version == "None") and (win)]
+        - set MATRIX_GPU_ARCH_TYPE="none"                                                       # [(cuda_compiler_version == "None") and (win)]
+        - set MATRIX_CHANNEL="defaults"                                                         # [win]
+        - set MATRIX_STABLE_VERSION={{ version }}                                               # [win]
+        - set MATRIX_PACKAGE_TYPE="conda"                                                       # [win]
+        - set TARGET_OS="windows"                                                               # [win]
+        - set OMP_NUM_THREADS=4                                                                 # [win]
+        - export MATRIX_GPU_ARCH_VERSION="{{ '.'.join(cuda_compiler_version.split('.')[:2]) }}" # [(cuda_compiler_version != "None") and (linux and x86_64)]
+        - export MATRIX_GPU_ARCH_TYPE="cuda"                                                    # [(cuda_compiler_version != "None") and (linux and x86_64)]
+        - export MATRIX_GPU_ARCH_VERSION="none"                                                 # [(cuda_compiler_version == "None") and (not win)]
+        - export MATRIX_GPU_ARCH_TYPE="none"                                                    # [(cuda_compiler_version == "None") and (not win)]
+        - export MATRIX_CHANNEL="defaults"                                                      # [not win]
+        - export MATRIX_STABLE_VERSION="{{ version }}"                                          # [not win]
+        - export MATRIX_PACKAGE_TYPE="conda"                                                    # [not win]
+        - export TARGET_OS="linux"                                                              # [linux]
+        - export TARGET_OS="macos-arm64"                                                        # [(osx and arm64)]
+        - export TARGET_OS="macos-x86_64"                                                       # [(osx and x86_64)]
+        - export OMP_NUM_THREADS=4                                                              # [not win]
+        - python ./smoke_test/smoke_test.py --package torchonly
+
         # a reasonably safe subset of tests that should run under 15 minutes
         # disable hypothesis because it randomly yields health check errors
         {% set tests = " ".join([
@@ -392,6 +434,8 @@ outputs:
             "test/test_nn.py",
             "test/test_torch.py",
             "test/test_xnnpack_integration.py",
+            # The inductor tests test torch.compile
+            "test/inductor/test_torchinductor.py",                                      # [cuda_compiler_version != "None"]
             "-m \"not hypothesis\"",
         ]) %}
 
@@ -476,7 +520,6 @@ about:
   license_file:
     - LICENSE
     - NOTICE
-    - third_party/pybind11/LICENSE
   summary: PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.
 
 extra:

diff --git a/recipe/patches/0016-use-prefix-include-for-inductor.patch b/recipe/patches/0016-use-prefix-include-for-inductor.patch
@@ -0,0 +1,16 @@
+Currently inductor doesn't look in conda's includes and libs. This results in errors when it tries to compile, if system versions are being used of dependencies (e.g., sleef).
+
+author: [email protected]
+
+Index: pytorch/torch/_inductor/cpp_builder.py
+===================================================================
+--- pytorch.orig/torch/_inductor/cpp_builder.py	2024-12-16 15:16:47.074821258 -0600
++++ pytorch/torch/_inductor/cpp_builder.py	2024-12-16 15:17:33.922130106 -0600
+@@ -1055,6 +1055,7 @@
+         + python_include_dirs
+         + torch_include_dirs
+         + omp_include_dir_paths
++        + [os.getenv('CONDA_PREFIX') + '/include']
+     )
+     cflags = sys_libs_cflags + omp_cflags
+     ldflags = omp_ldflags