Skip to content

Commit fb1d33d

Browse files
committed
SYCL] Fixed-size groups and partitions are renamed to "chunks"
1 parent 4abd474 commit fb1d33d

File tree

11 files changed

+45
-38
lines changed

11 files changed

+45
-38
lines changed

llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def AspectExt_oneapi_bindless_images_sample_1d_usm : Aspect<"ext_oneapi_bindless
7171
def AspectExt_oneapi_bindless_images_sample_2d_usm : Aspect<"ext_oneapi_bindless_images_sample_2d_usm">;
7272
def AspectExt_intel_esimd : Aspect<"ext_intel_esimd">;
7373
def AspectExt_oneapi_ballot_group : Aspect<"ext_oneapi_ballot_group">;
74-
def AspectExt_oneapi_fixed_size_group : Aspect<"ext_oneapi_fixed_size_group">;
74+
def AspectExt_oneapi_chunk : Aspect<"ext_oneapi_chunk">;
7575
def AspectExt_oneapi_opportunistic_group : Aspect<"ext_oneapi_opportunistic_group">;
7676
def AspectExt_oneapi_tangle_group : Aspect<"ext_oneapi_tangle_group">;
7777
def AspectExt_intel_matrix : Aspect<"ext_intel_matrix">;
@@ -144,7 +144,7 @@ def : TargetInfo<"__TestAspectList",
144144
AspectExt_oneapi_bindless_sampled_image_fetch_2d_usm, AspectExt_oneapi_bindless_sampled_image_fetch_2d,
145145
AspectExt_oneapi_bindless_sampled_image_fetch_3d,
146146
AspectExt_intel_esimd,
147-
AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group, AspectExt_oneapi_opportunistic_group,
147+
AspectExt_oneapi_ballot_group, AspectExt_oneapi_chunk, AspectExt_oneapi_opportunistic_group,
148148
AspectExt_oneapi_tangle_group, AspectExt_intel_matrix, AspectExt_oneapi_is_composite, AspectExt_oneapi_is_component,
149149
AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph, AspectExt_oneapi_private_alloca,
150150
AspectExt_oneapi_queue_profiling_tag, AspectExt_oneapi_virtual_mem, AspectExt_oneapi_cuda_cluster_group,
@@ -163,7 +163,7 @@ defvar IntelCpuAspects = [
163163
AspectCpu, AspectFp16, AspectFp64, AspectQueue_profiling, AspectAtomic64,
164164
AspectExt_oneapi_srgb, AspectExt_oneapi_native_assert,
165165
AspectExt_intel_legacy_image, AspectExt_oneapi_ballot_group,
166-
AspectExt_oneapi_fixed_size_group, AspectExt_oneapi_opportunistic_group,
166+
AspectExt_oneapi_chunk, AspectExt_oneapi_opportunistic_group,
167167
AspectExt_oneapi_tangle_group, AspectExt_oneapi_private_alloca
168168
] # AllUSMAspects;
169169

@@ -231,7 +231,7 @@ class CudaTargetInfo<string targetName, list<Aspect> aspectList, int subGroupSiz
231231
defvar CudaMinAspects = !listconcat(AllUSMAspects, [AspectGpu, AspectFp64, AspectOnline_compiler, AspectOnline_linker,
232232
AspectQueue_profiling, AspectExt_intel_pci_address, AspectExt_intel_max_mem_bandwidth, AspectExt_intel_memory_bus_width,
233233
AspectExt_intel_device_info_uuid, AspectExt_oneapi_native_assert, AspectExt_intel_free_memory, AspectExt_intel_device_id,
234-
AspectExt_intel_memory_clock_rate, AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group,
234+
AspectExt_intel_memory_clock_rate, AspectExt_oneapi_ballot_group, AspectExt_oneapi_chunk,
235235
AspectExt_oneapi_opportunistic_group, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]);
236236
// Bindless images aspects are partially supported on CUDA and disabled by default at the moment.
237237
defvar CudaBindlessImagesAspects = [AspectExt_oneapi_bindless_images, AspectExt_oneapi_bindless_images_shared_usm,

sycl/include/sycl/detail/spirv.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ struct is_ballot_group<
7676
sycl::ext::oneapi::experimental::ballot_group<ParentGroup>>
7777
: std::true_type {};
7878

79-
template <typename Group> struct is_fixed_size_group : std::false_type {};
79+
template <typename Group> struct is_chunk : std::false_type {};
8080

8181
template <size_t ChunkSize, typename ParentGroup>
8282
struct is_chunk<sycl::ext::oneapi::experimental::chunk<
@@ -888,7 +888,7 @@ inline uint32_t MapShuffleID(GroupT g, id<1> local_id) {
888888
if constexpr (is_tangle_or_opportunistic_group<GroupT>::value ||
889889
is_ballot_group<GroupT>::value)
890890
return detail::IdToMaskPosition(g, local_id);
891-
else if constexpr (is_fixed_size_group<GroupT>::value)
891+
else if constexpr (is_chunk<GroupT>::value)
892892
return g.get_group_linear_id() * g.get_local_range().size() + local_id;
893893
else
894894
return local_id.get(0);
@@ -983,7 +983,7 @@ EnableIfNativeShuffle<T> ShuffleXor(GroupT g, T x, id<1> mask) {
983983
if constexpr (ext::oneapi::experimental::is_user_constructed_group_v<
984984
GroupT>) {
985985
auto MemberMask = detail::ExtractMask(detail::GetMask(g))[0];
986-
if constexpr (is_fixed_size_group_v<GroupT>) {
986+
if constexpr (is_chunk_v<GroupT>) {
987987
return cuda_shfl_sync_bfly_i32(MemberMask, x,
988988
static_cast<uint32_t>(mask.get(0)), 0x1f);
989989

@@ -1031,7 +1031,7 @@ EnableIfNativeShuffle<T> ShuffleDown(GroupT g, T x, uint32_t delta) {
10311031
if constexpr (ext::oneapi::experimental::is_user_constructed_group_v<
10321032
GroupT>) {
10331033
auto MemberMask = detail::ExtractMask(detail::GetMask(g))[0];
1034-
if constexpr (is_fixed_size_group_v<GroupT>) {
1034+
if constexpr (is_chunk_v<GroupT>) {
10351035
return cuda_shfl_sync_down_i32(MemberMask, x, delta, 31);
10361036
} else {
10371037
unsigned localSetBit = g.get_local_id()[0] + 1;
@@ -1075,7 +1075,7 @@ EnableIfNativeShuffle<T> ShuffleUp(GroupT g, T x, uint32_t delta) {
10751075
if constexpr (ext::oneapi::experimental::is_user_constructed_group_v<
10761076
GroupT>) {
10771077
auto MemberMask = detail::ExtractMask(detail::GetMask(g))[0];
1078-
if constexpr (is_fixed_size_group_v<GroupT>) {
1078+
if constexpr (is_chunk_v<GroupT>) {
10791079
return cuda_shfl_sync_up_i32(MemberMask, x, delta, 0);
10801080
} else {
10811081
unsigned localSetBit = g.get_local_id()[0] + 1;
@@ -1301,7 +1301,7 @@ ControlBarrier(Group g, memory_scope FenceScope, memory_order Order) {
13011301
template <__spv::GroupOperation Op, size_t ChunkSize, \
13021302
typename ParentGroup, typename T> \
13031303
inline T Group##Instruction( \
1304-
ext::oneapi::experimental::chunk<ChunkSize, ParentGroup> g, T x) \
1304+
ext::oneapi::experimental::chunk<ChunkSize, ParentGroup> g, T x) { \
13051305
using ConvertedT = detail::ConvertToOpenCLType_t<T>; \
13061306
\
13071307
using OCLT = std::conditional_t< \

sycl/include/sycl/detail/type_traits.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@
2020
namespace sycl {
2121
inline namespace _V1 {
2222
namespace detail {
23-
template <class T> struct is_fixed_size_group : std::false_type {};
23+
template <class T> struct is_chunk : std::false_type {};
2424

2525
template <class T>
26-
inline constexpr bool is_fixed_size_group_v = is_fixed_size_group<T>::value;
26+
inline constexpr bool is_chunk_v = is_chunk<T>::value;
2727

2828
template <typename VecT, typename OperationLeftT, typename OperationRightT,
2929
template <typename> class OperationCurrentT, int... Indexes>

sycl/include/sycl/ext/oneapi/experimental/fixed_size_group.hpp renamed to sycl/include/sycl/ext/oneapi/experimental/chunk.hpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//==--- fixed_size_group.hpp --- SYCL extension for non-uniform groups -----==//
1+
//==--- chunk.hpp --- SYCL extension for non-uniform groups -----==//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,7 +10,7 @@
1010

1111
#include <sycl/aspects.hpp>
1212
#include <sycl/detail/spirv.hpp>
13-
#include <sycl/detail/type_traits.hpp> // for is_fixed_size_group, is_group
13+
#include <sycl/detail/type_traits.hpp> // for is_chunk, is_group
1414
#include <sycl/exception.hpp>
1515
#include <sycl/ext/oneapi/experimental/non_uniform_groups.hpp>
1616
#include <sycl/ext/oneapi/sub_group_mask.hpp> // for sub_group_mask
@@ -30,7 +30,7 @@ template <size_t ChunkSize, typename ParentGroup> class chunk;
3030

3131
template <size_t ChunkSize, typename Group>
3232
#ifdef __SYCL_DEVICE_ONLY__
33-
[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_fixed_size_group)]]
33+
[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_chunk)]]
3434
#endif
3535
inline std::enable_if_t<sycl::is_group_v<std::decay_t<Group>> &&
3636
std::is_same_v<Group, sycl::sub_group>,
@@ -44,7 +44,9 @@ template <size_t ChunkSize, typename ParentGroup> class chunk {
4444
using linear_id_type = typename ParentGroup::linear_id_type;
4545
static constexpr int dimensions = 1;
4646
static constexpr sycl::memory_scope fence_scope = ParentGroup::fence_scope;
47-
47+
/* ToDo:wd
48+
we don't have fragment (operator fragment<ParentGroup>() const;) implementation yet.
49+
*/
4850
id_type get_group_id() const {
4951
#ifdef __SYCL_DEVICE_ONLY__
5052
return __spirv_SubgroupLocalInvocationId() / ChunkSize;
@@ -132,9 +134,9 @@ template <size_t ChunkSize, typename ParentGroup> class chunk {
132134
#endif
133135

134136
#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
135-
fixed_size_group(ext::oneapi::sub_group_mask mask) : Mask(mask) {}
137+
chunk(ext::oneapi::sub_group_mask mask) : Mask(mask) {}
136138
#else
137-
fixed_size_group() {}
139+
chunk() {}
138140
#endif
139141

140142
friend chunk<ChunkSize, ParentGroup>

sycl/include/sycl/ext/oneapi/experimental/cuda/non_uniform_algorithms.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,9 @@ masked_reduction_cuda_sm80(Group g, T x, BinaryOperation binary_op,
9595

9696
//// Shuffle based masked reduction impls
9797

98-
// fixed_size_group group reduction using shfls
98+
// chunk group reduction using shfls
9999
template <typename Group, typename T, class BinaryOperation>
100-
inline __SYCL_ALWAYS_INLINE std::enable_if_t<is_fixed_size_group_v<Group>, T>
100+
inline __SYCL_ALWAYS_INLINE std::enable_if_t<is_chunk_v<Group>, T>
101101
masked_reduction_cuda_shfls(Group g, T x, BinaryOperation binary_op,
102102
const uint32_t MemberMask) {
103103
for (int i = g.get_local_range()[0] / 2; i > 0; i /= 2) {
@@ -111,7 +111,7 @@ masked_reduction_cuda_shfls(Group g, T x, BinaryOperation binary_op,
111111
template <typename Group, typename T, class BinaryOperation>
112112
inline __SYCL_ALWAYS_INLINE std::enable_if_t<
113113
ext::oneapi::experimental::is_user_constructed_group_v<Group> &&
114-
!is_fixed_size_group_v<Group>,
114+
!is_chunk_v<Group>,
115115
T>
116116
masked_reduction_cuda_shfls(Group g, T x, BinaryOperation binary_op,
117117
const uint32_t MemberMask) {
@@ -208,10 +208,10 @@ inline __SYCL_ALWAYS_INLINE
208208

209209
//// Shuffle based masked reduction impls
210210

211-
// fixed_size_group group scan using shfls
211+
// chunk group scan using shfls
212212
template <__spv::GroupOperation Op, typename Group, typename T,
213213
class BinaryOperation>
214-
inline __SYCL_ALWAYS_INLINE std::enable_if_t<is_fixed_size_group_v<Group>, T>
214+
inline __SYCL_ALWAYS_INLINE std::enable_if_t<is_chunk_v<Group>, T>
215215
masked_scan_cuda_shfls(Group g, T x, BinaryOperation binary_op,
216216
const uint32_t MemberMask) {
217217
unsigned localIdVal = g.get_local_id()[0];
@@ -233,7 +233,7 @@ template <__spv::GroupOperation Op, typename Group, typename T,
233233
class BinaryOperation>
234234
inline __SYCL_ALWAYS_INLINE std::enable_if_t<
235235
ext::oneapi::experimental::is_user_constructed_group_v<Group> &&
236-
!is_fixed_size_group_v<Group>,
236+
!is_chunk_v<Group>,
237237
T>
238238
masked_scan_cuda_shfls(Group g, T x, BinaryOperation binary_op,
239239
const uint32_t MemberMask) {

sycl/include/sycl/info/aspects.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ __SYCL_ASPECT(ext_oneapi_mipmap_anisotropy, 51)
4646
__SYCL_ASPECT(ext_oneapi_mipmap_level_reference, 52)
4747
__SYCL_ASPECT(ext_intel_esimd, 53)
4848
__SYCL_ASPECT(ext_oneapi_ballot_group, 54)
49-
__SYCL_ASPECT(ext_oneapi_fixed_size_group, 55)
49+
__SYCL_ASPECT(ext_oneapi_chunk, 55)
5050
__SYCL_ASPECT(ext_oneapi_opportunistic_group, 56)
5151
__SYCL_ASPECT(ext_oneapi_tangle_group, 57)
5252
__SYCL_ASPECT(ext_intel_matrix, 58)

sycl/include/sycl/sycl.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@
9393
#include <sycl/ext/oneapi/experimental/composite_device.hpp>
9494
#include <sycl/ext/oneapi/experimental/cuda/barrier.hpp>
9595
#include <sycl/ext/oneapi/experimental/enqueue_functions.hpp>
96-
#include <sycl/ext/oneapi/experimental/fixed_size_group.hpp>
96+
#include <sycl/ext/oneapi/experimental/chunk.hpp>
9797
#include <sycl/ext/oneapi/experimental/forward_progress.hpp>
9898
#include <sycl/ext/oneapi/experimental/group_load_store.hpp>
9999
#include <sycl/ext/oneapi/experimental/group_sort.hpp>

sycl/source/detail/device_impl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -658,7 +658,7 @@ bool device_impl::has(aspect Aspect) const {
658658
return call_successful && support;
659659
}
660660
case aspect::ext_oneapi_ballot_group:
661-
case aspect::ext_oneapi_fixed_size_group:
661+
case aspect::ext_oneapi_chunk:
662662
case aspect::ext_oneapi_opportunistic_group: {
663663
return (this->getBackend() == backend::ext_oneapi_level_zero) ||
664664
(this->getBackend() == backend::opencl) ||

sycl/test-e2e/NonUniformGroups/chunk.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,16 @@
77
// REQUIRES: cpu || gpu
88
// UNSUPPORTED: hip
99
// REQUIRES: sg-32
10+
// REQUIRES: aspect-ext_oneapi_chunk
1011

11-
#include <sycl/detail/core.hpp>
12-
#include <sycl/ext/oneapi/experimental/fixed_size_group.hpp>
1312
#include <vector>
13+
14+
//#ifdef __SYCL_DEVICE_ONLY__
15+
//[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_chunk)]]
16+
17+
#include <sycl/detail/core.hpp>
18+
#include <sycl/ext/oneapi/experimental/chunk.hpp>
19+
1420
namespace syclex = sycl::ext::oneapi::experimental;
1521

1622
template <size_t ChunkSize> class TestKernel;
@@ -71,3 +77,5 @@ int main() {
7177
test<32>();
7278
return 0;
7379
}
80+
81+
//# endif

sycl/test-e2e/NonUniformGroups/chunk_algorithms.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,16 @@
66
//
77
// REQUIRES: cpu || gpu
88
// REQUIRES: sg-32
9-
// REQUIRES: aspect-ext_oneapi_fixed_size_group
9+
// REQUIRES: aspect-ext_oneapi_chunk
1010

1111
#include <sycl/detail/core.hpp>
12-
#include <sycl/ext/oneapi/experimental/fixed_size_group.hpp>
12+
#include <sycl/ext/oneapi/experimental/chunk.hpp>
1313
#include <sycl/group_algorithm.hpp>
1414
#include <sycl/group_barrier.hpp>
1515
#include <vector>
16-
namespace syclex = sycl::ext::oneapi::experimental;
17-
18-
19-
2016

2117

18+
namespace syclex = sycl::ext::oneapi::experimental;
2219

2320
template <size_t ChunkSize> class TestKernel;
2421

0 commit comments

Comments
 (0)