Skip to content

Commit 45bb32f

Browse files
committed
[SYCL] Fixed-size groups and partitions are renamed to "chunks"
1 parent c833d8a commit 45bb32f

File tree

6 files changed

+80
-77
lines changed

6 files changed

+80
-77
lines changed

sycl/include/sycl/detail/spirv.hpp

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ namespace oneapi {
2626
struct sub_group;
2727
namespace experimental {
2828
template <typename ParentGroup> class ballot_group;
29-
template <size_t PartitionSize, typename ParentGroup> class fixed_size_group;
29+
template <size_t ChunkSize, typename ParentGroup> class chunk;
3030
template <int Dimensions> class root_group;
3131
template <typename ParentGroup> class tangle_group;
3232
class opportunistic_group;
@@ -78,9 +78,9 @@ struct is_ballot_group<
7878

7979
template <typename Group> struct is_fixed_size_group : std::false_type {};
8080

81-
template <size_t PartitionSize, typename ParentGroup>
82-
struct is_fixed_size_group<sycl::ext::oneapi::experimental::fixed_size_group<
83-
PartitionSize, ParentGroup>> : std::true_type {};
81+
template <size_t ChunkSize, typename ParentGroup>
82+
struct is_chunk<sycl::ext::oneapi::experimental::chunk<
83+
ChunkSize, ParentGroup>> : std::true_type {};
8484

8585
template <typename Group> struct group_scope {};
8686

@@ -105,9 +105,9 @@ struct group_scope<sycl::ext::oneapi::experimental::ballot_group<ParentGroup>> {
105105
static constexpr __spv::Scope::Flag value = group_scope<ParentGroup>::value;
106106
};
107107

108-
template <size_t PartitionSize, typename ParentGroup>
109-
struct group_scope<sycl::ext::oneapi::experimental::fixed_size_group<
110-
PartitionSize, ParentGroup>> {
108+
template <size_t ChunkSize, typename ParentGroup>
109+
struct group_scope<sycl::ext::oneapi::experimental::chunk<
110+
ChunkSize, ParentGroup>> {
111111
static constexpr __spv::Scope::Flag value = group_scope<ParentGroup>::value;
112112
};
113113

@@ -174,15 +174,15 @@ bool GroupAll(ext::oneapi::experimental::ballot_group<ParentGroup> g,
174174
return __spirv_GroupNonUniformAll(group_scope<ParentGroup>::value, pred);
175175
}
176176
}
177-
template <size_t PartitionSize, typename ParentGroup>
177+
template <size_t ChunkSize, typename ParentGroup>
178178
bool GroupAll(
179-
ext::oneapi::experimental::fixed_size_group<PartitionSize, ParentGroup>,
179+
ext::oneapi::experimental::chunk<ChunkSize, ParentGroup>,
180180
bool pred) {
181181
// GroupNonUniformAll doesn't support cluster size, so use a reduction
182182
return __spirv_GroupNonUniformBitwiseAnd(
183183
group_scope<ParentGroup>::value,
184184
static_cast<uint32_t>(__spv::GroupOperation::ClusteredReduce),
185-
static_cast<uint32_t>(pred), PartitionSize);
185+
static_cast<uint32_t>(pred), ChunkSize);
186186
}
187187
template <typename ParentGroup>
188188
bool GroupAll(ext::oneapi::experimental::tangle_group<ParentGroup>, bool pred) {
@@ -210,15 +210,15 @@ bool GroupAny(ext::oneapi::experimental::ballot_group<ParentGroup> g,
210210
return __spirv_GroupNonUniformAny(group_scope<ParentGroup>::value, pred);
211211
}
212212
}
213-
template <size_t PartitionSize, typename ParentGroup>
213+
template <size_t ChunkSize, typename ParentGroup>
214214
bool GroupAny(
215-
ext::oneapi::experimental::fixed_size_group<PartitionSize, ParentGroup>,
215+
ext::oneapi::experimental::chunk<ChunkSize, ParentGroup>,
216216
bool pred) {
217217
// GroupNonUniformAny doesn't support cluster size, so use a reduction
218218
return __spirv_GroupNonUniformBitwiseOr(
219219
group_scope<ParentGroup>::value,
220220
static_cast<uint32_t>(__spv::GroupOperation::ClusteredReduce),
221-
static_cast<uint32_t>(pred), PartitionSize);
221+
static_cast<uint32_t>(pred), ChunkSize);
222222
}
223223
template <typename ParentGroup>
224224
bool GroupAny(ext::oneapi::experimental::tangle_group<ParentGroup>, bool pred) {
@@ -327,12 +327,12 @@ GroupBroadcast(sycl::ext::oneapi::experimental::ballot_group<ParentGroup> g,
327327
WideOCLX, OCLId);
328328
}
329329
}
330-
template <size_t PartitionSize, typename ParentGroup, typename T, typename IdT>
330+
template <size_t ChunkSize, typename ParentGroup, typename T, typename IdT>
331331
EnableIfNativeBroadcast<T, IdT> GroupBroadcast(
332-
ext::oneapi::experimental::fixed_size_group<PartitionSize, ParentGroup> g,
332+
ext::oneapi::experimental::chunk<ChunkSize, ParentGroup> g,
333333
T x, IdT local_id) {
334334
// Remap local_id to its original numbering in ParentGroup
335-
auto LocalId = g.get_group_linear_id() * PartitionSize + local_id;
335+
auto LocalId = g.get_group_linear_id() * ChunkSize + local_id;
336336

337337
// TODO: Refactor to avoid duplication after design settles.
338338
auto GroupLocalId = static_cast<typename GroupId<ParentGroup>::type>(LocalId);
@@ -341,9 +341,9 @@ EnableIfNativeBroadcast<T, IdT> GroupBroadcast(
341341
auto OCLId = detail::convertToOpenCLType(GroupLocalId);
342342

343343
// NonUniformBroadcast requires Id to be dynamically uniform, which does not
344-
// hold here; each partition is broadcasting a separate index. We could
344+
// hold here; each chunk is broadcasting a separate index. We could
345345
// fallback to either NonUniformShuffle or a NonUniformBroadcast per
346-
// partition, and it's unclear which will be faster in practice.
346+
// chunk, and it's unclear which will be faster in practice.
347347
return __spirv_GroupNonUniformShuffle(group_scope<ParentGroup>::value,
348348
WideOCLX, OCLId);
349349
}
@@ -1298,12 +1298,10 @@ ControlBarrier(Group g, memory_scope FenceScope, memory_order Order) {
12981298
} \
12991299
} \
13001300
\
1301-
template <__spv::GroupOperation Op, size_t PartitionSize, \
1301+
template <__spv::GroupOperation Op, size_t ChunkSize, \
13021302
typename ParentGroup, typename T> \
13031303
inline T Group##Instruction( \
1304-
ext::oneapi::experimental::fixed_size_group<PartitionSize, ParentGroup> \
1305-
g, \
1306-
T x) { \
1304+
ext::oneapi::experimental::chunk<ChunkSize, ParentGroup> g, T x) \
13071305
using ConvertedT = detail::ConvertToOpenCLType_t<T>; \
13081306
\
13091307
using OCLT = std::conditional_t< \
@@ -1321,7 +1319,7 @@ ControlBarrier(Group g, memory_scope FenceScope, memory_order Order) {
13211319
constexpr auto OpInt = \
13221320
static_cast<unsigned int>(__spv::GroupOperation::ClusteredReduce); \
13231321
return __spirv_GroupNonUniform##Instruction(Scope, OpInt, Arg, \
1324-
PartitionSize); \
1322+
ChunkSize); \
13251323
} else { \
13261324
T tmp; \
13271325
for (size_t Cluster = 0; Cluster < g.get_group_linear_range(); \

sycl/include/sycl/ext/oneapi/experimental/fixed_size_group.hpp

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,18 @@ namespace sycl {
2626
inline namespace _V1 {
2727
namespace ext::oneapi::experimental {
2828

29-
template <size_t PartitionSize, typename ParentGroup> class fixed_size_group;
29+
template <size_t ChunkSize, typename ParentGroup> class chunk;
3030

31-
template <size_t PartitionSize, typename Group>
31+
template <size_t ChunkSize, typename Group>
3232
#ifdef __SYCL_DEVICE_ONLY__
3333
[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_fixed_size_group)]]
3434
#endif
3535
inline std::enable_if_t<sycl::is_group_v<std::decay_t<Group>> &&
3636
std::is_same_v<Group, sycl::sub_group>,
37-
fixed_size_group<PartitionSize, Group>>
38-
get_fixed_size_group(Group group);
37+
chunk<ChunkSize, Group>>
38+
chunked_partition(Group group);
3939

40-
template <size_t PartitionSize, typename ParentGroup> class fixed_size_group {
40+
template <size_t ChunkSize, typename ParentGroup> class chunk {
4141
public:
4242
using id_type = id<1>;
4343
using range_type = range<1>;
@@ -47,7 +47,7 @@ template <size_t PartitionSize, typename ParentGroup> class fixed_size_group {
4747

4848
id_type get_group_id() const {
4949
#ifdef __SYCL_DEVICE_ONLY__
50-
return __spirv_SubgroupLocalInvocationId() / PartitionSize;
50+
return __spirv_SubgroupLocalInvocationId() / ChunkSize;
5151
#else
5252
throw exception(make_error_code(errc::runtime),
5353
"Non-uniform groups are not supported on host.");
@@ -56,7 +56,7 @@ template <size_t PartitionSize, typename ParentGroup> class fixed_size_group {
5656

5757
id_type get_local_id() const {
5858
#ifdef __SYCL_DEVICE_ONLY__
59-
return __spirv_SubgroupLocalInvocationId() % PartitionSize;
59+
return __spirv_SubgroupLocalInvocationId() % ChunkSize;
6060
#else
6161
throw exception(make_error_code(errc::runtime),
6262
"Non-uniform groups are not supported on host.");
@@ -65,7 +65,7 @@ template <size_t PartitionSize, typename ParentGroup> class fixed_size_group {
6565

6666
range_type get_group_range() const {
6767
#ifdef __SYCL_DEVICE_ONLY__
68-
return __spirv_SubgroupSize() / PartitionSize;
68+
return __spirv_SubgroupSize() / ChunkSize;
6969
#else
7070
throw exception(make_error_code(errc::runtime),
7171
"Non-uniform groups are not supported on host.");
@@ -74,7 +74,7 @@ template <size_t PartitionSize, typename ParentGroup> class fixed_size_group {
7474

7575
range_type get_local_range() const {
7676
#ifdef __SYCL_DEVICE_ONLY__
77-
return PartitionSize;
77+
return ChunkSize;
7878
#else
7979
throw exception(make_error_code(errc::runtime),
8080
"Non-uniform groups are not supported on host.");
@@ -137,57 +137,57 @@ template <size_t PartitionSize, typename ParentGroup> class fixed_size_group {
137137
fixed_size_group() {}
138138
#endif
139139

140-
friend fixed_size_group<PartitionSize, ParentGroup>
141-
get_fixed_size_group<PartitionSize, ParentGroup>(ParentGroup g);
140+
friend chunk<ChunkSize, ParentGroup>
141+
chunked_partition<ChunkSize, ParentGroup>(ParentGroup g);
142142

143143
friend sub_group_mask
144-
sycl::detail::GetMask<fixed_size_group<PartitionSize, ParentGroup>>(
145-
fixed_size_group<PartitionSize, ParentGroup> Group);
144+
sycl::detail::GetMask<chunk<ChunkSize, ParentGroup>>(
145+
chunk<ChunkSize, ParentGroup> Group);
146146
};
147147

148-
template <size_t PartitionSize, typename Group>
148+
template <size_t ChunkSize, typename Group>
149149
inline std::enable_if_t<sycl::is_group_v<std::decay_t<Group>> &&
150150
std::is_same_v<Group, sycl::sub_group>,
151-
fixed_size_group<PartitionSize, Group>>
152-
get_fixed_size_group(Group group) {
151+
chunk<ChunkSize, Group>>
152+
chunked_partition(Group group) {
153153
(void)group;
154154
#ifdef __SYCL_DEVICE_ONLY__
155155
#if defined(__NVPTX__)
156156
uint32_t loc_id = group.get_local_linear_id();
157157
uint32_t loc_size = group.get_local_linear_range();
158-
uint32_t bits = PartitionSize == 32
158+
uint32_t bits = ChunkSize == 32
159159
? 0xffffffff
160-
: ((1 << PartitionSize) - 1)
161-
<< ((loc_id / PartitionSize) * PartitionSize);
160+
: ((1 << ChunkSize) - 1)
161+
<< ((loc_id / ChunkSize) * ChunkSize);
162162

163-
return fixed_size_group<PartitionSize, sycl::sub_group>(
163+
return chunk<ChunkSize, sycl::sub_group>(
164164
sycl::detail::Builder::createSubGroupMask<ext::oneapi::sub_group_mask>(
165165
bits, loc_size));
166166
#else
167-
return fixed_size_group<PartitionSize, sycl::sub_group>();
167+
return chunk<ChunkSize, sycl::sub_group>();
168168
#endif
169169
#else
170170
throw exception(make_error_code(errc::runtime),
171171
"Non-uniform groups are not supported on host.");
172172
#endif
173173
}
174174

175-
template <size_t PartitionSize, typename ParentGroup>
176-
struct is_user_constructed_group<fixed_size_group<PartitionSize, ParentGroup>>
175+
template <size_t ChunkSize, typename ParentGroup>
176+
struct is_user_constructed_group<chunk<ChunkSize, ParentGroup>>
177177
: std::true_type {};
178178

179179
} // namespace ext::oneapi::experimental
180180

181181
namespace detail {
182-
template <size_t PartitionSize, typename ParentGroup>
183-
struct is_fixed_size_group<
184-
ext::oneapi::experimental::fixed_size_group<PartitionSize, ParentGroup>>
182+
template <size_t ChunkSize, typename ParentGroup>
183+
struct is_chunk<
184+
ext::oneapi::experimental::chunk<ChunkSize, ParentGroup>>
185185
: std::true_type {};
186186
} // namespace detail
187187

188-
template <size_t PartitionSize, typename ParentGroup>
188+
template <size_t ChunkSize, typename ParentGroup>
189189
struct is_group<
190-
ext::oneapi::experimental::fixed_size_group<PartitionSize, ParentGroup>>
190+
ext::oneapi::experimental::chunk<ChunkSize, ParentGroup>>
191191
: std::true_type {};
192192

193193
} // namespace _V1

sycl/include/sycl/ext/oneapi/experimental/non_uniform_groups.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ namespace ext::oneapi::experimental {
7373

7474
// Forward declarations of non-uniform group types for algorithm definitions
7575
template <typename ParentGroup> class ballot_group;
76-
template <size_t PartitionSize, typename ParentGroup> class fixed_size_group;
76+
template <size_t ChunkSize, typename ParentGroup> class chunk;
7777
template <typename ParentGroup> class tangle_group;
7878
class opportunistic_group;
7979

sycl/test-e2e/NonUniformGroups/fixed_size_group.cpp renamed to sycl/test-e2e/NonUniformGroups/chunk.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,19 @@
1313
#include <vector>
1414
namespace syclex = sycl::ext::oneapi::experimental;
1515

16-
template <size_t PartitionSize> class TestKernel;
16+
template <size_t ChunkSize> class TestKernel;
1717

18-
template <size_t PartitionSize> void test() {
18+
template <size_t ChunkSize> void test() {
1919
sycl::queue Q;
2020

2121
// Test for both the full sub-group size and a case with less work than a full
2222
// sub-group.
2323
for (size_t WGS : std::array<size_t, 2>{32, 16}) {
24-
if (WGS < PartitionSize)
24+
if (WGS < ChunkSize)
2525
continue;
2626

2727
std::cout << "Testing for work size " << WGS << " and partition size "
28-
<< PartitionSize << std::endl;
28+
<< ChunkSize << std::endl;
2929

3030
sycl::buffer<bool, 1> MatchBuf{sycl::range{WGS}};
3131
sycl::buffer<bool, 1> LeaderBuf{sycl::range{WGS}};
@@ -40,24 +40,24 @@ template <size_t PartitionSize> void test() {
4040
auto SG = item.get_sub_group();
4141
auto SGS = SG.get_local_linear_range();
4242

43-
auto Partition = syclex::get_fixed_size_group<PartitionSize>(SG);
43+
auto Partition = syclex::chunked_partition<ChunkSize>(SG);
4444

4545
bool Match = true;
46-
Match &= (Partition.get_group_id() == (WI / PartitionSize));
47-
Match &= (Partition.get_local_id() == (WI % PartitionSize));
48-
Match &= (Partition.get_group_range() == (SGS / PartitionSize));
49-
Match &= (Partition.get_local_range() == PartitionSize);
46+
Match &= (Partition.get_group_id() == (WI / ChunkSize));
47+
Match &= (Partition.get_local_id() == (WI % ChunkSize));
48+
Match &= (Partition.get_group_range() == (SGS / ChunkSize));
49+
Match &= (Partition.get_local_range() == ChunkSize);
5050
MatchAcc[WI] = Match;
5151
LeaderAcc[WI] = Partition.leader();
5252
};
53-
CGH.parallel_for<TestKernel<PartitionSize>>(NDR, KernelFunc);
53+
CGH.parallel_for<TestKernel<ChunkSize>>(NDR, KernelFunc);
5454
});
5555

5656
sycl::host_accessor MatchAcc{MatchBuf, sycl::read_only};
5757
sycl::host_accessor LeaderAcc{LeaderBuf, sycl::read_only};
5858
for (int WI = 0; WI < WGS; ++WI) {
5959
assert(MatchAcc[WI] == true);
60-
assert(LeaderAcc[WI] == ((WI % PartitionSize) == 0));
60+
assert(LeaderAcc[WI] == ((WI % ChunkSize) == 0));
6161
}
6262
}
6363
}

0 commit comments

Comments
 (0)