intel
diff --git a/‎clang/lib/Basic/Targets/AMDGPU.cpp‎
Lines changed: 0 additions & 1 deletion b/‎clang/lib/Basic/Targets/AMDGPU.cpp‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎clang/lib/Frontend/InitPreprocessor.cpp‎
Lines changed: 2 additions & 1 deletion b/‎clang/lib/Frontend/InitPreprocessor.cpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎clang/test/Driver/sycl-cuda-arch-macro.cpp‎
Lines changed: 4 additions & 0 deletions b/‎clang/test/Driver/sycl-cuda-arch-macro.cpp‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎clang/test/Preprocessor/sycl-macro.cpp‎
Lines changed: 1 addition & 1 deletion b/‎clang/test/Preprocessor/sycl-macro.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sycl/cmake/modules/FetchUnifiedRuntime.cmake‎
Lines changed: 5 additions & 7 deletions b/‎sycl/cmake/modules/FetchUnifiedRuntime.cmake‎
Lines changed: 5 additions & 7 deletions
diff --git a/‎sycl/doc/syclcompat/README.md‎
Lines changed: 64 additions & 10 deletions b/‎sycl/doc/syclcompat/README.md‎
Lines changed: 64 additions & 10 deletions
diff --git a/‎sycl/include/sycl/detail/helpers.hpp‎
Lines changed: 2 additions & 2 deletions b/‎sycl/include/sycl/detail/helpers.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎sycl/include/sycl/ext/oneapi/experimental/invoke_simd.hpp‎
Lines changed: 26 additions & 24 deletions b/‎sycl/include/sycl/ext/oneapi/experimental/invoke_simd.hpp‎
Lines changed: 26 additions & 24 deletions
diff --git a/‎sycl/include/syclcompat/device.hpp‎
Lines changed: 46 additions & 41 deletions b/‎sycl/include/syclcompat/device.hpp‎
Lines changed: 46 additions & 41 deletions
@@ -309,7 +309,6 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
                         Twine("\"") + Twine(CanonName) + Twine("\""));
     Builder.defineMacro("__amdgcn_target_id__",
                         Twine("\"") + Twine(*getTargetID()) + Twine("\""));
-    Builder.defineMacro("__CUDA_ARCH__", "0");
     for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
       auto Loc = OffloadArchFeatures.find(F);
       if (Loc != OffloadArchFeatures.end()) {
 
@@ -1491,9 +1491,10 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
   }
 
   // CUDA device path compilaton
-  if (LangOpts.CUDAIsDevice && !LangOpts.HIP) {
+  if (LangOpts.CUDAIsDevice && !LangOpts.HIP && !LangOpts.isSYCL()) {
     // The CUDA_ARCH value is set for the GPU target specified in the NVPTX
     // backend's target defines.
+    // Note: SYCL targeting nvptx-cuda relies on __SYCL_CUDA_ARCH__ instead.
     Builder.defineMacro("__CUDA_ARCH__");
   }
 
 
@@ -1,9 +1,13 @@
 // Verify the __CUDA_ARCH__ macro has not been defined when offloading SYCL on NVPTX
 // RUN: %clangxx -E -dM -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --offload-arch=sm_80 -nocudalib -fno-sycl-libspirv %s 2>&1 \
 // RUN: | FileCheck --check-prefix=CHECK-CUDA-ARCH-MACRO %s
+// Verify the __CUDA_ARCH__ macro has not been defined when offloading SYCL on AMDGPU
+// RUN: %clangxx -E -dM -fsycl -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx90a -nogpulib -fno-sycl-libspirv %s 2>&1 \
+// RUN: | FileCheck --check-prefix=CHECK-CUDA-ARCH-MACRO %s
 // CHECK-CUDA-ARCH-MACRO-NOT: #define __CUDA_ARCH__ {{[0-9]+}}
 
 // Verify that '-fcuda-is-device' is not supplied when offloading SYCL on NVPTX
+// NOTE: AMDGPU targets, i.e. "amdgcn-amd-amdhsa" may rely on "fcuda-is-device"
 // RUN: %clangxx -### -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --offload-arch=sm_80 -nocudalib -fno-sycl-libspirv %s 2>&1 \
 // RUN: | FileCheck --check-prefix=CHECK-CUDA-IS-DEVICE %s
 // CHECK-CUDA-IS-DEVICE: clang{{.*}} "-cc1" "-triple" "nvptx64-nvidia-cuda"
 
@@ -36,6 +36,6 @@
 // CHECK-CUDA:#define __SYCL_CUDA_ARCH__ [[ARCH_CODE]]
 // CHECK-CUDA-NOT:#define __CUDA_ARCH__ {{[0-9]+}}
 
-// CHECK-HIP:#define __CUDA_ARCH__ 0
+// CHECK-HIP-NOT:#define __CUDA_ARCH__ {{[0-9]+}}
 
 // CHECK-CUDA-FEATURE:#define __CUDA_ARCH_FEAT_SM90_ALL 1
@@ -117,13 +117,11 @@ if(SYCL_UR_USE_FETCH_CONTENT)
   endfunction()
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
-  # commit 80fdb0261d4a3623b05d1795f2f59ace9f718d76
-  # Merge: 568a96aa 77b6c4c7
-  # Author: aarongreig <[email protected]>
-  # Date:   Wed Oct 16 13:53:51 2024 +0100
-  #     Merge pull request #2206 from oneapi-src/revert-2138-counter-based-3
-  #     Revert "[L0] Phase 2 of Counter-Based Event Implementation"
-  set(UNIFIED_RUNTIME_TAG 80fdb0261d4a3623b05d1795f2f59ace9f718d76)
+  # commit af7e275b509b41f54a66743ebf748dfb51668abf
+  # Author: Maosu Zhao <[email protected]>
+  # Date:   Thu Oct 17 16:31:21 2024 +0800
+  #     [DeviceSanitizer] Refactor the code to manage shadow memory (#2127)
+  set(UNIFIED_RUNTIME_TAG af7e275b509b41f54a66743ebf748dfb51668abf)
 
   set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
   # Due to the use of dependentloadflag and no installer for UMF and hwloc we need
 
@@ -1048,6 +1048,10 @@ static inline unsigned int get_device_id(const sycl::device &dev);
 // Util function to get the number of available devices
 static inline unsigned int device_count();
 
+// Util function to check whether a device supports some kinds of sycl::aspect.
+static inline void
+has_capability_or_fail(const sycl::device &dev,
+                       const std::initializer_list<sycl::aspect> &props);
 } // syclcompat
 ```
 
@@ -1725,7 +1729,51 @@ second operand, respectively. These three APIs return a single 32-bit value with
 the accumulated result, which is unsigned if both operands are `uint32_t` and
 signed otherwise.
 
+Various maths functions are defined operate on any floating point types.
+`syclcompat::is_floating_point_v` extends the standard library's
+`std::is_floating_point_v` to include `sycl::half` and, where available,
+`sycl::ext::oneapi::bfloat16`. The current version of SYCLcompat also provides
+a specialization of `std::common_type_t` for `sycl::ext::oneapi::bfloat16`,
+though this will be moved to the `sycl_ext_oneapi_bfloat16` extension in
+future.
+
+```cpp
+namespace std {
+template <> struct common_type<sycl::ext::oneapi::bfloat16> {
+  using type = sycl::ext::oneapi::bfloat16;
+};
+
+template <>
+struct common_type<sycl::ext::oneapi::bfloat16, sycl::ext::oneapi::bfloat16> {
+  using type = sycl::ext::oneapi::bfloat16;
+};
+
+template <typename T> struct common_type<sycl::ext::oneapi::bfloat16, T> {
+  using type = sycl::ext::oneapi::bfloat16;
+};
+
+template <typename T> struct common_type<T, sycl::ext::oneapi::bfloat16> {
+  using type = sycl::ext::oneapi::bfloat16;
+};
+} // namespace std
+```
+
 ```cpp
+namespace syclcompat{
+
+// Trait for extended floating point definition
+template <typename T>
+struct is_floating_point : std::is_floating_point<T>{};
+
+template <> struct is_floating_point<sycl::half> : std::true_type {};
+
+#ifdef SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS
+template <> struct is_floating_point<sycl::ext::oneapi::bfloat16> : std::true_type {};
+#endif
+template <typename T>
+
+inline constexpr bool is_floating_point_v = is_floating_point<T>::value;
+
 inline unsigned int funnelshift_l(unsigned int low, unsigned int high,
                                   unsigned int shift); 
 
@@ -1752,11 +1800,9 @@ inline std::enable_if_t<ValueT::size() == 2, ValueT> isnan(const ValueT a);
 // cbrt function wrapper.
 template <typename ValueT>
 inline std::enable_if_t<std::is_floating_point_v<ValueT> ||
-                            std::is_same_v<sycl::half, ValueT>,
+                            std::is_same_v<ValueT, sycl::half>,
                         ValueT>
-cbrt(ValueT val) {
-  return sycl::cbrt(static_cast<ValueT>(val));
-}
+cbrt(ValueT val);
 
 // For floating-point types, `float` or `double` arguments are acceptable.
 // For integer types, `std::uint32_t`, `std::int32_t`, `std::uint64_t` or
@@ -1794,6 +1840,10 @@ template <typename ValueT, typename ValueU>
 inline sycl::vec<std::common_type_t<ValueT, ValueU>, 2>
 fmax_nan(const sycl::vec<ValueT, 2> a, const sycl::vec<ValueU, 2> b);
 
+template <typename ValueT, typename ValueU>
+inline sycl::marray<std::common_type_t<ValueT, ValueU>, 2>
+fmax_nan(const sycl::marray<ValueT, 2> a, const sycl::marray<ValueU, 2> b);
+
 // Performs 2 elements comparison and returns the smaller one. If either of
 // inputs is NaN, then return NaN.
 template <typename ValueT, typename ValueU>
@@ -1803,6 +1853,10 @@ template <typename ValueT, typename ValueU>
 inline sycl::vec<std::common_type_t<ValueT, ValueU>, 2>
 fmin_nan(const sycl::vec<ValueT, 2> a, const sycl::vec<ValueU, 2> b);
 
+template <typename ValueT, typename ValueU>
+inline sycl::marray<std::common_type_t<ValueT, ValueU>, 2>
+fmin_nan(const sycl::marray<ValueT, 2> a, const sycl::marray<ValueU, 2> b);
+
 inline float pow(const float a, const int b) { return sycl::pown(a, b); }
 inline double pow(const double a, const int b) { return sycl::pown(a, b); }
 
@@ -1863,14 +1917,13 @@ unordered_compare_both(const ValueT a, const ValueT b,
                        const BinaryOperation binary_op);
 
 template <typename ValueT, class BinaryOperation>
-inline unsigned compare_mask(const sycl::vec<ValueT, 2> a,
-                             const sycl::vec<ValueT, 2> b,
-                             const BinaryOperation binary_op);
+inline std::enable_if_t<ValueT::size() == 2, unsigned>
+compare_mask(const ValueT a, const ValueT b, const BinaryOperation binary_op);
 
 template <typename ValueT, class BinaryOperation>
-inline unsigned unordered_compare_mask(const sycl::vec<ValueT, 2> a,
-                                       const sycl::vec<ValueT, 2> b,
-                                       const BinaryOperation binary_op);
+inline std::enable_if_t<ValueT::size() == 2, unsigned>
+unordered_compare_mask(const ValueT a, const ValueT b,
+                       const BinaryOperation binary_op);
 
 template <typename S, typename T> inline T vectorized_max(T a, T b);
 
@@ -1924,6 +1977,7 @@ inline dot_product_acc_t<T1, T2> dp2a_hi(T1 a, T2 b,
 template <typename T1, typename T2>
 inline dot_product_acc_t<T1, T2> dp4a(T1 a, T2 b,
                                       dot_product_acc_t<T1, T2> c);
+} // namespace syclcompat
 ```
 
 `vectorized_binary` computes the `BinaryOperation` for two operands,
 
@@ -240,11 +240,11 @@ getSPIRVMemorySemanticsMask(const access::fence_space AccessSpace,
 
 // To ensure loop unrolling is done when processing dimensions.
 template <size_t... Inds, class F>
-void loop_impl(std::integer_sequence<size_t, Inds...>, F &&f) {
+constexpr void loop_impl(std::integer_sequence<size_t, Inds...>, F &&f) {
   (f(std::integral_constant<size_t, Inds>{}), ...);
 }
 
-template <size_t count, class F> void loop(F &&f) {
+template <size_t count, class F> constexpr void loop(F &&f) {
   loop_impl(std::make_index_sequence<count>{}, std::forward<F>(f));
 }
 inline constexpr bool is_power_of_two(int x) { return (x & (x - 1)) == 0; }
 
@@ -14,7 +14,6 @@
 #include <sycl/ext/oneapi/experimental/detail/invoke_simd_types.hpp>
 #include <sycl/ext/oneapi/experimental/uniform.hpp>
 
-#include <sycl/detail/boost/mp11.hpp>
 #include <sycl/sub_group.hpp>
 
 #include <functional>
@@ -71,8 +70,6 @@ namespace ext::oneapi::experimental {
 // --- Helpers
 namespace detail {
 
-namespace __MP11_NS = sycl::detail::boost::mp11;
-
 // This structure performs the SPMD-to-SIMD parameter type conversion as defined
 // by the spec.
 template <class T, int N, class = void> struct spmd2simd;
@@ -154,8 +151,7 @@ struct is_simd_or_mask_type<simd_mask<T, N>> : std::true_type {};
 // Checks if all the types in the parameter pack are uniform<T>.
 template <class... SpmdArgs> struct all_uniform_types {
   constexpr operator bool() {
-    using TypeList = __MP11_NS::mp_list<SpmdArgs...>;
-    return __MP11_NS::mp_all_of<TypeList, is_uniform_type>::value;
+    return ((is_uniform_type<SpmdArgs>::value && ...));
   }
 };
 
@@ -193,26 +189,32 @@ constexpr void verify_return_type_matches_sg_size() {
 // as prescribed by the spec assuming this subgroup size. One and only one
 // subgroup size should conform.
 template <class SimdCallable, class... SpmdArgs> struct sg_size {
-  template <class N>
-  using IsInvocableSgSize = __MP11_NS::mp_bool<std::is_invocable_v<
-      SimdCallable, typename spmd2simd<SpmdArgs, N::value>::type...>>;
-
   __DPCPP_SYCL_EXTERNAL constexpr operator int() {
-    using SupportedSgSizes = __MP11_NS::mp_list_c<int, 1, 2, 4, 8, 16, 32>;
-    using InvocableSgSizes =
-        __MP11_NS::mp_copy_if<SupportedSgSizes, IsInvocableSgSize>;
-    constexpr auto found_invoke_simd_target =
-        __MP11_NS::mp_empty<InvocableSgSizes>::value != 1;
-    if constexpr (found_invoke_simd_target) {
-      static_assert((__MP11_NS::mp_size<InvocableSgSizes>::value == 1) &&
-                    "multiple invoke_simd targets found");
-      return __MP11_NS::mp_front<InvocableSgSizes>::value;
-    }
-    static_assert(
-        found_invoke_simd_target,
-        "No callable invoke_simd target found. Confirm the "
-        "invoke_simd invocation argument types are convertible to the "
-        "invoke_simd target argument types");
+    constexpr auto x = []() constexpr {
+      constexpr int supported_sg_sizes[] = {1, 2, 4, 8, 16, 32};
+      int num_found = 0;
+      int found_sg_size = 0;
+      sycl::detail::loop<std::size(supported_sg_sizes)>([&](auto idx) {
+        constexpr auto sg_size = supported_sg_sizes[idx];
+        if (std::is_invocable_v<
+                SimdCallable, typename spmd2simd<SpmdArgs, sg_size>::type...>) {
+          ++num_found;
+          found_sg_size = sg_size;
+        }
+      });
+      return std::pair{num_found, found_sg_size};
+    }();
+
+    constexpr auto num_found = x.first;
+    constexpr auto found_sg_size = x.second;
+
+    static_assert(num_found != 0,
+                  "No callable invoke_simd target found. Confirm the "
+                  "invoke_simd invocation argument types are convertible to "
+                  "the invoke_simd target argument types");
+    static_assert(num_found == 1, "Multiple invoke_simd targets found!");
+
+    return found_sg_size;
   }
 };
 
 
@@ -334,6 +334,51 @@ static int get_minor_version(const sycl::device &dev) {
   return minor;
 }
 
+static inline void
+has_capability_or_fail(const sycl::device &dev,
+                       const std::initializer_list<sycl::aspect> &props) {
+  for (const auto &it : props) {
+    if (dev.has(it))
+      continue;
+    switch (it) {
+    case sycl::aspect::fp64:
+      throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
+                            "[SYCLcompat] 'double' is not supported in '" +
+                                dev.get_info<sycl::info::device::name>() +
+                                "' device");
+      break;
+    case sycl::aspect::fp16:
+      throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
+                            "[SYCLcompat] 'half' is not supported in '" +
+                                dev.get_info<sycl::info::device::name>() +
+                                "' device");
+      break;
+    default:
+#define __SYCL_ASPECT(ASPECT, ID)                                              \
+  case sycl::aspect::ASPECT:                                                   \
+    return #ASPECT;
+#define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID)
+#define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE)
+      auto getAspectNameStr = [](sycl::aspect AspectNum) -> std::string {
+        switch (AspectNum) {
+#include <sycl/info/aspects.def>
+#include <sycl/info/aspects_deprecated.def>
+        default:
+          return "unknown aspect";
+        }
+      };
+#undef __SYCL_ASPECT_DEPRECATED_ALIAS
+#undef __SYCL_ASPECT_DEPRECATED
+#undef __SYCL_ASPECT
+      throw sycl::exception(
+          sycl::make_error_code(sycl::errc::runtime),
+          "[SYCLcompat] '" + getAspectNameStr(it) + "' is not supported in '" +
+              dev.get_info<sycl::info::device::name>() + "' device");
+    }
+    break;
+  }
+}
+
 /// device extension
 class device_ext : public sycl::device {
 public:
@@ -613,47 +658,7 @@ Use 64 bits as memory_bus_width default value."
   /// sycl::aspect.
   void has_capability_or_fail(
       const std::initializer_list<sycl::aspect> &props) const {
-    for (const auto &it : props) {
-      if (has(it))
-        continue;
-      switch (it) {
-      case sycl::aspect::fp64:
-        throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
-                              "[SYCLcompat] 'double' is not supported in '" +
-                                  get_info<sycl::info::device::name>() +
-                                  "' device");
-        break;
-      case sycl::aspect::fp16:
-        throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
-                              "[SYCLcompat] 'half' is not supported in '" +
-                                  get_info<sycl::info::device::name>() +
-                                  "' device");
-        break;
-      default:
-#define __SYCL_ASPECT(ASPECT, ID)                                              \
-  case sycl::aspect::ASPECT:                                                   \
-    return #ASPECT;
-#define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID)
-#define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE)
-        auto getAspectNameStr = [](sycl::aspect AspectNum) -> std::string {
-          switch (AspectNum) {
-#include <sycl/info/aspects.def>
-#include <sycl/info/aspects_deprecated.def>
-          default:
-            return "unknown aspect";
-          }
-        };
-#undef __SYCL_ASPECT_DEPRECATED_ALIAS
-#undef __SYCL_ASPECT_DEPRECATED
-#undef __SYCL_ASPECT
-        throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
-                              "[SYCLcompat] '" + getAspectNameStr(it) +
-                                  "' is not supported in '" +
-                                  get_info<sycl::info::device::name>() +
-                                  "' device");
-      }
-      break;
-    }
+    ::syclcompat::has_capability_or_fail(*this, props);
   }
 
 private:
Original file line number	Diff line number	Diff line change
`@@ -1491,9 +1491,10 @@ static void InitializePredefinedMacros(const TargetInfo &TI,`
`1491`	`1491`	`}`
`1492`	`1492`
`1493`	`1493`	`// CUDA device path compilaton`
`1494`		`- if (LangOpts.CUDAIsDevice && !LangOpts.HIP) {`
	`1494`	`+ if (LangOpts.CUDAIsDevice && !LangOpts.HIP && !LangOpts.isSYCL()) {`
`1495`	`1495`	`// The CUDA_ARCH value is set for the GPU target specified in the NVPTX`
`1496`	`1496`	`// backend's target defines.`
	`1497`	`+ // Note: SYCL targeting nvptx-cuda relies on __SYCL_CUDA_ARCH__ instead.`
`1497`	`1498`	`Builder.defineMacro("__CUDA_ARCH__");`
`1498`	`1499`	`}`
`1499`	`1500`
Original file line number	Diff line number	Diff line change
`@@ -240,11 +240,11 @@ getSPIRVMemorySemanticsMask(const access::fence_space AccessSpace,`
`240`	`240`
`241`	`241`	`// To ensure loop unrolling is done when processing dimensions.`
`242`	`242`	`template <size_t... Inds, class F>`
`243`		`-void loop_impl(std::integer_sequence<size_t, Inds...>, F &&f) {`
	`243`	`+constexpr void loop_impl(std::integer_sequence<size_t, Inds...>, F &&f) {`
`244`	`244`	`(f(std::integral_constant<size_t, Inds>{}), ...);`
`245`	`245`	`}`
`246`	`246`
`247`		`-template <size_t count, class F> void loop(F &&f) {`
	`247`	`+template <size_t count, class F> constexpr void loop(F &&f) {`
`248`	`248`	`loop_impl(std::make_index_sequence<count>{}, std::forward<F>(f));`
`249`	`249`	`}`
`250`	`250`	`inline constexpr bool is_power_of_two(int x) { return (x & (x - 1)) == 0; }`