Skip to content

Commit 24f87b0

Browse files
authored
Merge branch 'intel:sycl' into work_group_memoy_new
2 parents fd89473 + 8ef14d5 commit 24f87b0

File tree

21 files changed

+596
-246
lines changed

21 files changed

+596
-246
lines changed

clang/lib/Basic/Targets/AMDGPU.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,6 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
309309
Twine("\"") + Twine(CanonName) + Twine("\""));
310310
Builder.defineMacro("__amdgcn_target_id__",
311311
Twine("\"") + Twine(*getTargetID()) + Twine("\""));
312-
Builder.defineMacro("__CUDA_ARCH__", "0");
313312
for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
314313
auto Loc = OffloadArchFeatures.find(F);
315314
if (Loc != OffloadArchFeatures.end()) {

clang/lib/Frontend/InitPreprocessor.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1491,9 +1491,10 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
14911491
}
14921492

14931493
// CUDA device path compilaton
1494-
if (LangOpts.CUDAIsDevice && !LangOpts.HIP) {
1494+
if (LangOpts.CUDAIsDevice && !LangOpts.HIP && !LangOpts.isSYCL()) {
14951495
// The CUDA_ARCH value is set for the GPU target specified in the NVPTX
14961496
// backend's target defines.
1497+
// Note: SYCL targeting nvptx-cuda relies on __SYCL_CUDA_ARCH__ instead.
14971498
Builder.defineMacro("__CUDA_ARCH__");
14981499
}
14991500

clang/test/Driver/sycl-cuda-arch-macro.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
// Verify the __CUDA_ARCH__ macro has not been defined when offloading SYCL on NVPTX
22
// RUN: %clangxx -E -dM -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --offload-arch=sm_80 -nocudalib -fno-sycl-libspirv %s 2>&1 \
33
// RUN: | FileCheck --check-prefix=CHECK-CUDA-ARCH-MACRO %s
4+
// Verify the __CUDA_ARCH__ macro has not been defined when offloading SYCL on AMDGPU
5+
// RUN: %clangxx -E -dM -fsycl -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx90a -nogpulib -fno-sycl-libspirv %s 2>&1 \
6+
// RUN: | FileCheck --check-prefix=CHECK-CUDA-ARCH-MACRO %s
47
// CHECK-CUDA-ARCH-MACRO-NOT: #define __CUDA_ARCH__ {{[0-9]+}}
58

69
// Verify that '-fcuda-is-device' is not supplied when offloading SYCL on NVPTX
10+
// NOTE: AMDGPU targets, i.e. "amdgcn-amd-amdhsa" may rely on "fcuda-is-device"
711
// RUN: %clangxx -### -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --offload-arch=sm_80 -nocudalib -fno-sycl-libspirv %s 2>&1 \
812
// RUN: | FileCheck --check-prefix=CHECK-CUDA-IS-DEVICE %s
913
// CHECK-CUDA-IS-DEVICE: clang{{.*}} "-cc1" "-triple" "nvptx64-nvidia-cuda"

clang/test/Preprocessor/sycl-macro.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,6 @@
3636
// CHECK-CUDA:#define __SYCL_CUDA_ARCH__ [[ARCH_CODE]]
3737
// CHECK-CUDA-NOT:#define __CUDA_ARCH__ {{[0-9]+}}
3838

39-
// CHECK-HIP:#define __CUDA_ARCH__ 0
39+
// CHECK-HIP-NOT:#define __CUDA_ARCH__ {{[0-9]+}}
4040

4141
// CHECK-CUDA-FEATURE:#define __CUDA_ARCH_FEAT_SM90_ALL 1

sycl/cmake/modules/FetchUnifiedRuntime.cmake

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,11 @@ if(SYCL_UR_USE_FETCH_CONTENT)
117117
endfunction()
118118

119119
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
120-
# commit 80fdb0261d4a3623b05d1795f2f59ace9f718d76
121-
# Merge: 568a96aa 77b6c4c7
122-
# Author: aarongreig <[email protected]>
123-
# Date: Wed Oct 16 13:53:51 2024 +0100
124-
# Merge pull request #2206 from oneapi-src/revert-2138-counter-based-3
125-
# Revert "[L0] Phase 2 of Counter-Based Event Implementation"
126-
set(UNIFIED_RUNTIME_TAG 80fdb0261d4a3623b05d1795f2f59ace9f718d76)
120+
# commit af7e275b509b41f54a66743ebf748dfb51668abf
121+
# Author: Maosu Zhao <[email protected]>
122+
# Date: Thu Oct 17 16:31:21 2024 +0800
123+
# [DeviceSanitizer] Refactor the code to manage shadow memory (#2127)
124+
set(UNIFIED_RUNTIME_TAG af7e275b509b41f54a66743ebf748dfb51668abf)
127125

128126
set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
129127
# Due to the use of dependentloadflag and no installer for UMF and hwloc we need

sycl/doc/syclcompat/README.md

Lines changed: 64 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,6 +1048,10 @@ static inline unsigned int get_device_id(const sycl::device &dev);
10481048
// Util function to get the number of available devices
10491049
static inline unsigned int device_count();
10501050
1051+
// Util function to check whether a device supports some kinds of sycl::aspect.
1052+
static inline void
1053+
has_capability_or_fail(const sycl::device &dev,
1054+
const std::initializer_list<sycl::aspect> &props);
10511055
} // syclcompat
10521056
```
10531057

@@ -1725,7 +1729,51 @@ second operand, respectively. These three APIs return a single 32-bit value with
17251729
the accumulated result, which is unsigned if both operands are `uint32_t` and
17261730
signed otherwise.
17271731

1732+
Various maths functions are defined operate on any floating point types.
1733+
`syclcompat::is_floating_point_v` extends the standard library's
1734+
`std::is_floating_point_v` to include `sycl::half` and, where available,
1735+
`sycl::ext::oneapi::bfloat16`. The current version of SYCLcompat also provides
1736+
a specialization of `std::common_type_t` for `sycl::ext::oneapi::bfloat16`,
1737+
though this will be moved to the `sycl_ext_oneapi_bfloat16` extension in
1738+
future.
1739+
1740+
```cpp
1741+
namespace std {
1742+
template <> struct common_type<sycl::ext::oneapi::bfloat16> {
1743+
using type = sycl::ext::oneapi::bfloat16;
1744+
};
1745+
1746+
template <>
1747+
struct common_type<sycl::ext::oneapi::bfloat16, sycl::ext::oneapi::bfloat16> {
1748+
using type = sycl::ext::oneapi::bfloat16;
1749+
};
1750+
1751+
template <typename T> struct common_type<sycl::ext::oneapi::bfloat16, T> {
1752+
using type = sycl::ext::oneapi::bfloat16;
1753+
};
1754+
1755+
template <typename T> struct common_type<T, sycl::ext::oneapi::bfloat16> {
1756+
using type = sycl::ext::oneapi::bfloat16;
1757+
};
1758+
} // namespace std
1759+
```
1760+
17281761
```cpp
1762+
namespace syclcompat{
1763+
1764+
// Trait for extended floating point definition
1765+
template <typename T>
1766+
struct is_floating_point : std::is_floating_point<T>{};
1767+
1768+
template <> struct is_floating_point<sycl::half> : std::true_type {};
1769+
1770+
#ifdef SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS
1771+
template <> struct is_floating_point<sycl::ext::oneapi::bfloat16> : std::true_type {};
1772+
#endif
1773+
template <typename T>
1774+
1775+
inline constexpr bool is_floating_point_v = is_floating_point<T>::value;
1776+
17291777
inline unsigned int funnelshift_l(unsigned int low, unsigned int high,
17301778
unsigned int shift);
17311779
@@ -1752,11 +1800,9 @@ inline std::enable_if_t<ValueT::size() == 2, ValueT> isnan(const ValueT a);
17521800
// cbrt function wrapper.
17531801
template <typename ValueT>
17541802
inline std::enable_if_t<std::is_floating_point_v<ValueT> ||
1755-
std::is_same_v<sycl::half, ValueT>,
1803+
std::is_same_v<ValueT, sycl::half>,
17561804
ValueT>
1757-
cbrt(ValueT val) {
1758-
return sycl::cbrt(static_cast<ValueT>(val));
1759-
}
1805+
cbrt(ValueT val);
17601806
17611807
// For floating-point types, `float` or `double` arguments are acceptable.
17621808
// For integer types, `std::uint32_t`, `std::int32_t`, `std::uint64_t` or
@@ -1794,6 +1840,10 @@ template <typename ValueT, typename ValueU>
17941840
inline sycl::vec<std::common_type_t<ValueT, ValueU>, 2>
17951841
fmax_nan(const sycl::vec<ValueT, 2> a, const sycl::vec<ValueU, 2> b);
17961842
1843+
template <typename ValueT, typename ValueU>
1844+
inline sycl::marray<std::common_type_t<ValueT, ValueU>, 2>
1845+
fmax_nan(const sycl::marray<ValueT, 2> a, const sycl::marray<ValueU, 2> b);
1846+
17971847
// Performs 2 elements comparison and returns the smaller one. If either of
17981848
// inputs is NaN, then return NaN.
17991849
template <typename ValueT, typename ValueU>
@@ -1803,6 +1853,10 @@ template <typename ValueT, typename ValueU>
18031853
inline sycl::vec<std::common_type_t<ValueT, ValueU>, 2>
18041854
fmin_nan(const sycl::vec<ValueT, 2> a, const sycl::vec<ValueU, 2> b);
18051855
1856+
template <typename ValueT, typename ValueU>
1857+
inline sycl::marray<std::common_type_t<ValueT, ValueU>, 2>
1858+
fmin_nan(const sycl::marray<ValueT, 2> a, const sycl::marray<ValueU, 2> b);
1859+
18061860
inline float pow(const float a, const int b) { return sycl::pown(a, b); }
18071861
inline double pow(const double a, const int b) { return sycl::pown(a, b); }
18081862
@@ -1863,14 +1917,13 @@ unordered_compare_both(const ValueT a, const ValueT b,
18631917
const BinaryOperation binary_op);
18641918
18651919
template <typename ValueT, class BinaryOperation>
1866-
inline unsigned compare_mask(const sycl::vec<ValueT, 2> a,
1867-
const sycl::vec<ValueT, 2> b,
1868-
const BinaryOperation binary_op);
1920+
inline std::enable_if_t<ValueT::size() == 2, unsigned>
1921+
compare_mask(const ValueT a, const ValueT b, const BinaryOperation binary_op);
18691922
18701923
template <typename ValueT, class BinaryOperation>
1871-
inline unsigned unordered_compare_mask(const sycl::vec<ValueT, 2> a,
1872-
const sycl::vec<ValueT, 2> b,
1873-
const BinaryOperation binary_op);
1924+
inline std::enable_if_t<ValueT::size() == 2, unsigned>
1925+
unordered_compare_mask(const ValueT a, const ValueT b,
1926+
const BinaryOperation binary_op);
18741927
18751928
template <typename S, typename T> inline T vectorized_max(T a, T b);
18761929
@@ -1924,6 +1977,7 @@ inline dot_product_acc_t<T1, T2> dp2a_hi(T1 a, T2 b,
19241977
template <typename T1, typename T2>
19251978
inline dot_product_acc_t<T1, T2> dp4a(T1 a, T2 b,
19261979
dot_product_acc_t<T1, T2> c);
1980+
} // namespace syclcompat
19271981
```
19281982
19291983
`vectorized_binary` computes the `BinaryOperation` for two operands,

sycl/include/sycl/detail/helpers.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,11 +240,11 @@ getSPIRVMemorySemanticsMask(const access::fence_space AccessSpace,
240240

241241
// To ensure loop unrolling is done when processing dimensions.
242242
template <size_t... Inds, class F>
243-
void loop_impl(std::integer_sequence<size_t, Inds...>, F &&f) {
243+
constexpr void loop_impl(std::integer_sequence<size_t, Inds...>, F &&f) {
244244
(f(std::integral_constant<size_t, Inds>{}), ...);
245245
}
246246

247-
template <size_t count, class F> void loop(F &&f) {
247+
template <size_t count, class F> constexpr void loop(F &&f) {
248248
loop_impl(std::make_index_sequence<count>{}, std::forward<F>(f));
249249
}
250250
inline constexpr bool is_power_of_two(int x) { return (x & (x - 1)) == 0; }

sycl/include/sycl/ext/oneapi/experimental/invoke_simd.hpp

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#include <sycl/ext/oneapi/experimental/detail/invoke_simd_types.hpp>
1515
#include <sycl/ext/oneapi/experimental/uniform.hpp>
1616

17-
#include <sycl/detail/boost/mp11.hpp>
1817
#include <sycl/sub_group.hpp>
1918

2019
#include <functional>
@@ -71,8 +70,6 @@ namespace ext::oneapi::experimental {
7170
// --- Helpers
7271
namespace detail {
7372

74-
namespace __MP11_NS = sycl::detail::boost::mp11;
75-
7673
// This structure performs the SPMD-to-SIMD parameter type conversion as defined
7774
// by the spec.
7875
template <class T, int N, class = void> struct spmd2simd;
@@ -154,8 +151,7 @@ struct is_simd_or_mask_type<simd_mask<T, N>> : std::true_type {};
154151
// Checks if all the types in the parameter pack are uniform<T>.
155152
template <class... SpmdArgs> struct all_uniform_types {
156153
constexpr operator bool() {
157-
using TypeList = __MP11_NS::mp_list<SpmdArgs...>;
158-
return __MP11_NS::mp_all_of<TypeList, is_uniform_type>::value;
154+
return ((is_uniform_type<SpmdArgs>::value && ...));
159155
}
160156
};
161157

@@ -193,26 +189,32 @@ constexpr void verify_return_type_matches_sg_size() {
193189
// as prescribed by the spec assuming this subgroup size. One and only one
194190
// subgroup size should conform.
195191
template <class SimdCallable, class... SpmdArgs> struct sg_size {
196-
template <class N>
197-
using IsInvocableSgSize = __MP11_NS::mp_bool<std::is_invocable_v<
198-
SimdCallable, typename spmd2simd<SpmdArgs, N::value>::type...>>;
199-
200192
__DPCPP_SYCL_EXTERNAL constexpr operator int() {
201-
using SupportedSgSizes = __MP11_NS::mp_list_c<int, 1, 2, 4, 8, 16, 32>;
202-
using InvocableSgSizes =
203-
__MP11_NS::mp_copy_if<SupportedSgSizes, IsInvocableSgSize>;
204-
constexpr auto found_invoke_simd_target =
205-
__MP11_NS::mp_empty<InvocableSgSizes>::value != 1;
206-
if constexpr (found_invoke_simd_target) {
207-
static_assert((__MP11_NS::mp_size<InvocableSgSizes>::value == 1) &&
208-
"multiple invoke_simd targets found");
209-
return __MP11_NS::mp_front<InvocableSgSizes>::value;
210-
}
211-
static_assert(
212-
found_invoke_simd_target,
213-
"No callable invoke_simd target found. Confirm the "
214-
"invoke_simd invocation argument types are convertible to the "
215-
"invoke_simd target argument types");
193+
constexpr auto x = []() constexpr {
194+
constexpr int supported_sg_sizes[] = {1, 2, 4, 8, 16, 32};
195+
int num_found = 0;
196+
int found_sg_size = 0;
197+
sycl::detail::loop<std::size(supported_sg_sizes)>([&](auto idx) {
198+
constexpr auto sg_size = supported_sg_sizes[idx];
199+
if (std::is_invocable_v<
200+
SimdCallable, typename spmd2simd<SpmdArgs, sg_size>::type...>) {
201+
++num_found;
202+
found_sg_size = sg_size;
203+
}
204+
});
205+
return std::pair{num_found, found_sg_size};
206+
}();
207+
208+
constexpr auto num_found = x.first;
209+
constexpr auto found_sg_size = x.second;
210+
211+
static_assert(num_found != 0,
212+
"No callable invoke_simd target found. Confirm the "
213+
"invoke_simd invocation argument types are convertible to "
214+
"the invoke_simd target argument types");
215+
static_assert(num_found == 1, "Multiple invoke_simd targets found!");
216+
217+
return found_sg_size;
216218
}
217219
};
218220

sycl/include/syclcompat/device.hpp

Lines changed: 46 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,51 @@ static int get_minor_version(const sycl::device &dev) {
334334
return minor;
335335
}
336336

337+
static inline void
338+
has_capability_or_fail(const sycl::device &dev,
339+
const std::initializer_list<sycl::aspect> &props) {
340+
for (const auto &it : props) {
341+
if (dev.has(it))
342+
continue;
343+
switch (it) {
344+
case sycl::aspect::fp64:
345+
throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
346+
"[SYCLcompat] 'double' is not supported in '" +
347+
dev.get_info<sycl::info::device::name>() +
348+
"' device");
349+
break;
350+
case sycl::aspect::fp16:
351+
throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
352+
"[SYCLcompat] 'half' is not supported in '" +
353+
dev.get_info<sycl::info::device::name>() +
354+
"' device");
355+
break;
356+
default:
357+
#define __SYCL_ASPECT(ASPECT, ID) \
358+
case sycl::aspect::ASPECT: \
359+
return #ASPECT;
360+
#define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID)
361+
#define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE)
362+
auto getAspectNameStr = [](sycl::aspect AspectNum) -> std::string {
363+
switch (AspectNum) {
364+
#include <sycl/info/aspects.def>
365+
#include <sycl/info/aspects_deprecated.def>
366+
default:
367+
return "unknown aspect";
368+
}
369+
};
370+
#undef __SYCL_ASPECT_DEPRECATED_ALIAS
371+
#undef __SYCL_ASPECT_DEPRECATED
372+
#undef __SYCL_ASPECT
373+
throw sycl::exception(
374+
sycl::make_error_code(sycl::errc::runtime),
375+
"[SYCLcompat] '" + getAspectNameStr(it) + "' is not supported in '" +
376+
dev.get_info<sycl::info::device::name>() + "' device");
377+
}
378+
break;
379+
}
380+
}
381+
337382
/// device extension
338383
class device_ext : public sycl::device {
339384
public:
@@ -613,47 +658,7 @@ Use 64 bits as memory_bus_width default value."
613658
/// sycl::aspect.
614659
void has_capability_or_fail(
615660
const std::initializer_list<sycl::aspect> &props) const {
616-
for (const auto &it : props) {
617-
if (has(it))
618-
continue;
619-
switch (it) {
620-
case sycl::aspect::fp64:
621-
throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
622-
"[SYCLcompat] 'double' is not supported in '" +
623-
get_info<sycl::info::device::name>() +
624-
"' device");
625-
break;
626-
case sycl::aspect::fp16:
627-
throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
628-
"[SYCLcompat] 'half' is not supported in '" +
629-
get_info<sycl::info::device::name>() +
630-
"' device");
631-
break;
632-
default:
633-
#define __SYCL_ASPECT(ASPECT, ID) \
634-
case sycl::aspect::ASPECT: \
635-
return #ASPECT;
636-
#define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID)
637-
#define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE)
638-
auto getAspectNameStr = [](sycl::aspect AspectNum) -> std::string {
639-
switch (AspectNum) {
640-
#include <sycl/info/aspects.def>
641-
#include <sycl/info/aspects_deprecated.def>
642-
default:
643-
return "unknown aspect";
644-
}
645-
};
646-
#undef __SYCL_ASPECT_DEPRECATED_ALIAS
647-
#undef __SYCL_ASPECT_DEPRECATED
648-
#undef __SYCL_ASPECT
649-
throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
650-
"[SYCLcompat] '" + getAspectNameStr(it) +
651-
"' is not supported in '" +
652-
get_info<sycl::info::device::name>() +
653-
"' device");
654-
}
655-
break;
656-
}
661+
::syclcompat::has_capability_or_fail(*this, props);
657662
}
658663
659664
private:

0 commit comments

Comments
 (0)