Skip to content

Commit 6d29c30

Browse files
committed
[SYCL] Fix bug when using no device split and reqd_work_group_size
1 parent ac207a1 commit 6d29c30

File tree

5 files changed

+32
-14
lines changed

5 files changed

+32
-14
lines changed

llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ using EntryPointSet = SetVector<Function *>;
3434

3535
PropSetRegTy computeModuleProperties(const Module &M,
3636
const EntryPointSet &EntryPoints,
37-
const GlobalBinImageProps &GlobProps);
37+
const GlobalBinImageProps &GlobProps,
38+
module_split::IRSplitMode SplitMode);
3839

3940
std::string computeModuleSymbolTable(const Module &M,
4041
const EntryPointSet &EntryPoints);

llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,8 @@ std::optional<T> getKernelSingleEltMetadata(const Function &Func,
152152

153153
PropSetRegTy computeModuleProperties(const Module &M,
154154
const EntryPointSet &EntryPoints,
155-
const GlobalBinImageProps &GlobProps) {
155+
const GlobalBinImageProps &GlobProps,
156+
module_split::IRSplitMode SplitMode) {
156157

157158
PropSetRegTy PropSet;
158159
{
@@ -161,8 +162,16 @@ PropSetRegTy computeModuleProperties(const Module &M,
161162
PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_MASK, RMEntry);
162163
}
163164
{
165+
// Usually, we would only expect one ReqdWGSize, as the module passed to
166+
// this function would be split according to that. However, when splitting
167+
// is disabled, this cannot be guaranteed. In this case, we reset the value,
168+
// which makes so that no value is reqd_work_group_size data is attached in
169+
// in the device image.
170+
SYCLDeviceRequirements DeviceReqs = computeDeviceRequirements(M, EntryPoints);
171+
if (SplitMode == module_split::SPLIT_NONE)
172+
DeviceReqs.ReqdWorkGroupSize.reset();
164173
PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS,
165-
computeDeviceRequirements(M, EntryPoints).asMap());
174+
DeviceReqs.asMap());
166175
}
167176

168177
// extract spec constant maps per each module

llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ SYCLDeviceRequirements
4040
llvm::computeDeviceRequirements(const Module &M,
4141
const SetVector<Function *> &EntryPoints) {
4242
SYCLDeviceRequirements Reqs;
43-
bool MultipleReqdWGSize = false;
4443
// Process all functions in the module
4544
for (const Function &F : M) {
4645
if (auto *MDN = F.getMetadata("sycl_used_aspects")) {
@@ -81,8 +80,6 @@ llvm::computeDeviceRequirements(const Module &M,
8180
ExtractUnsignedIntegerFromMDNodeOperand(MDN, I));
8281
if (!Reqs.ReqdWorkGroupSize.has_value())
8382
Reqs.ReqdWorkGroupSize = NewReqdWorkGroupSize;
84-
if (Reqs.ReqdWorkGroupSize != NewReqdWorkGroupSize)
85-
MultipleReqdWGSize = true;
8683
}
8784

8885
if (auto *MDN = F.getMetadata("sycl_joint_matrix")) {
@@ -119,13 +116,6 @@ llvm::computeDeviceRequirements(const Module &M,
119116
}
120117
}
121118

122-
// Usually, we would only expect one ReqdWGSize, as the module passed to
123-
// this function would be split according to that. However, when splitting
124-
// is disabled, this cannot be guaranteed. In this case, we reset the value,
125-
// which makes so that no value is reqd_work_group_size data is attached in
126-
// in the device image.
127-
if (MultipleReqdWGSize)
128-
Reqs.ReqdWorkGroupSize.reset();
129119
return Reqs;
130120
}
131121

llvm/tools/sycl-post-link/sycl-post-link.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD,
308308
const GlobalBinImageProps &GlobProps, int I,
309309
StringRef Suff, StringRef Target = "") {
310310
auto PropSet =
311-
computeModuleProperties(MD.getModule(), MD.entries(), GlobProps);
311+
computeModuleProperties(MD.getModule(), MD.entries(), GlobProps, SplitMode);
312312

313313
std::string NewSuff = Suff.str();
314314
if (!Target.empty()) {
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// This test checks that with -fsycl-device-code-split=off, kernels
2+
// with different reqd_work_group_size dimensions can be launched.
3+
4+
// RUN: %{build} -fsycl -fsycl-device-code-split=off -o %t.out
5+
// RUN: %{run} %t.out
6+
7+
// UNSUPPORTED: hip
8+
9+
#include <sycl/detail/core.hpp>
10+
11+
using namespace sycl;
12+
13+
int main(int argc, char **argv) {
14+
queue q;
15+
q.single_task([]{});
16+
q.parallel_for(range<2>(24, 1), [=](auto) [[sycl::reqd_work_group_size(24,1)]] {});
17+
return 0;
18+
}

0 commit comments

Comments
 (0)