diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt index ee800549518..75a95d0522b 100644 --- a/.ci/docker/ci_commit_pins/pytorch.txt +++ b/.ci/docker/ci_commit_pins/pytorch.txt @@ -1 +1 @@ -59d5cf083b4f860dea76fe8936076177f9367f10 +01f1cc44cbbfdf6307aa01b803a4ee22f9ade946 diff --git a/backends/xnnpack/cmake/Dependencies.cmake b/backends/xnnpack/cmake/Dependencies.cmake index fef63badf23..64d2409fb61 100644 --- a/backends/xnnpack/cmake/Dependencies.cmake +++ b/backends/xnnpack/cmake/Dependencies.cmake @@ -35,7 +35,11 @@ set(XNNPACK_BUILD_TESTS set(XNNPACK_ENABLE_AVXVNNI OFF CACHE BOOL "" -) + ) +# Work around observed failure: https://github.com/pytorch/executorch/pull/10362#issuecomment-2906391232 +set(XNNPACK_ENABLE_AVX512VNNIGFNI + OFF + CACHE BOOL "") if(EXECUTORCH_XNNPACK_ENABLE_KLEIDI) set(XNNPACK_ENABLE_KLEIDIAI diff --git a/install_requirements.py b/install_requirements.py index 567dca4ebf1..2fcd65ea338 100644 --- a/install_requirements.py +++ b/install_requirements.py @@ -71,7 +71,7 @@ def python_is_compatible(): # # NOTE: If you're changing, make the corresponding change in .ci/docker/ci_commit_pins/pytorch.txt # by picking the hash from the same date in https://hud.pytorch.org/hud/pytorch/pytorch/nightly/ -NIGHTLY_VERSION = "dev20250524" +NIGHTLY_VERSION = "dev20250422" def install_requirements(use_pytorch_nightly): diff --git a/runtime/core/portable_type/c10/c10/macros/Macros.h b/runtime/core/portable_type/c10/c10/macros/Macros.h index 919eb6c8567..7e61ad7e26b 100644 --- a/runtime/core/portable_type/c10/c10/macros/Macros.h +++ b/runtime/core/portable_type/c10/c10/macros/Macros.h @@ -508,4 +508,14 @@ __host__ __device__ #endif +// This macro is used to find older C++ compilers +// that don't support move optimization for return values. + +#if (defined(__GNUC__) && __GNUC__ < 13) || \ + (defined(__clang_major__) && __clang_major__ < 13) +#define C10_RETURN_MOVE_IF_OLD_COMPILER 1 +#else +#define C10_RETURN_MOVE_IF_OLD_COMPILER 0 +#endif + #endif // C10_MACROS_MACROS_H_ diff --git a/runtime/core/portable_type/c10/c10/util/BFloat16.h b/runtime/core/portable_type/c10/c10/util/BFloat16.h index 09d3051ab71..93d0ec54fb0 100644 --- a/runtime/core/portable_type/c10/c10/util/BFloat16.h +++ b/runtime/core/portable_type/c10/c10/util/BFloat16.h @@ -31,7 +31,7 @@ inline C10_HOST_DEVICE float f32_from_bits(uint16_t src) { uint32_t tmp = src; tmp <<= 16; -#if defined(USE_ROCM) +#if defined(USE_ROCM) && defined(__HIPCC__) float* tempRes; // We should be using memcpy in order to respect the strict aliasing rule @@ -48,7 +48,7 @@ inline C10_HOST_DEVICE float f32_from_bits(uint16_t src) { inline C10_HOST_DEVICE uint16_t bits_from_f32(float src) { uint32_t res = 0; -#if defined(USE_ROCM) +#if defined(USE_ROCM) && defined(__HIPCC__) // We should be using memcpy in order to respect the strict aliasing rule // but it fails in the HIP environment. uint32_t* tempRes = reinterpret_cast(&src); @@ -61,7 +61,7 @@ inline C10_HOST_DEVICE uint16_t bits_from_f32(float src) { } inline C10_HOST_DEVICE uint16_t round_to_nearest_even(float src) { -#if defined(USE_ROCM) +#if defined(USE_ROCM) && defined(__HIPCC__) if (src != src) { #elif defined(_MSC_VER) if (isnan(src)) { @@ -87,7 +87,7 @@ struct alignas(2) BFloat16 { uint16_t x; // HIP wants __host__ __device__ tag, CUDA does not -#if defined(USE_ROCM) +#if defined(USE_ROCM) && defined(__HIPCC__) C10_HOST_DEVICE BFloat16() = default; #else BFloat16() = default;