Skip to content

Commit fe18590

Browse files
authored
[SYCL][NVPTX] Emit reqd_work_group_size attributes as NVVM annotations (#14502)
Only emit the provided values as annotations in the LLVM IR. The NVPTX backend will pad missing values with 1s. This suits the fact that the attribute must provide as many values as the dimensionality of the work-group, and we can assume that the work-group size of unused dimensions is 1.
1 parent d3cdb95 commit fe18590

File tree

2 files changed

+82
-2
lines changed

2 files changed

+82
-2
lines changed

clang/lib/CodeGen/Targets/NVPTX.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,37 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
303303
addNVVMMetadata(F, "maxntidz", MWGS->getXDimVal());
304304
}
305305

306+
if (const auto *RWGS = FD->getAttr<SYCLReqdWorkGroupSizeAttr>()) {
307+
llvm::SmallVector<std::optional<int64_t>, 3> Ops;
308+
// Index-flip and pad out any missing elements. Note the misleading
309+
// nomenclature of the methods: getXDimVal doesn't return the X dimension;
310+
// it returns the left-most dimension (dim0). This could correspond to
311+
// CUDA's X, Y, or Z, depending on the number of operands provided.
312+
if (auto Dim0 = RWGS->getXDimVal())
313+
Ops.push_back(Dim0->getExtValue());
314+
if (auto Dim1 = RWGS->getYDimVal())
315+
Ops.push_back(Dim1->getExtValue());
316+
if (auto Dim2 = RWGS->getZDimVal())
317+
Ops.push_back(Dim2->getExtValue());
318+
std::reverse(Ops.begin(), Ops.end());
319+
Ops.append(3 - Ops.size(), std::nullopt);
320+
321+
// Work-group sizes (in NVVM annotations) must be positive and less than
322+
// INT32_MAX, whereas SYCL can allow for larger work-group sizes (see
323+
// -fno-sycl-id-queries-fit-in-int). If any dimension is too large for
324+
// NVPTX, don't emit any annotation at all.
325+
if (llvm::all_of(Ops, [](std::optional<int64_t> V) {
326+
return !V || llvm::isUInt<31>(*V);
327+
})) {
328+
if (auto X = Ops[0])
329+
addNVVMMetadata(F, "reqntidx", *X);
330+
if (auto Y = Ops[1])
331+
addNVVMMetadata(F, "reqntidy", *Y);
332+
if (auto Z = Ops[2])
333+
addNVVMMetadata(F, "reqntidz", *Z);
334+
}
335+
}
336+
306337
auto attrValue = [&](Expr *E) {
307338
const auto *CE = cast<ConstantExpr>(E);
308339
std::optional<llvm::APInt> Val = CE->getResultAsAPSInt();

clang/test/CodeGenSYCL/reqd-work-group-size.cpp

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
22
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
3-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
4-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
3+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-NVPTX
4+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-NVPTX
55

66
#include "sycl.hpp"
77

@@ -123,6 +123,55 @@ int main() {
123123
// CHECK: define {{.*}} void @{{.*}}kernel_name22() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE1D22:[0-9]+]]
124124
// CHECK: define {{.*}} void @{{.*}}kernel_name24() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE1D2:[0-9]+]]
125125

126+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name1, !"reqntidx", i32 16}
127+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name1, !"reqntidy", i32 16}
128+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name1, !"reqntidz", i32 32}
129+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name3, !"reqntidx", i32 8}
130+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name3, !"reqntidy", i32 8}
131+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name3, !"reqntidz", i32 8}
132+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name4, !"reqntidx", i32 2}
133+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name4, !"reqntidy", i32 2}
134+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name4, !"reqntidz", i32 2}
135+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name6, !"reqntidx", i32 2}
136+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name6, !"reqntidy", i32 8}
137+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name6, !"reqntidz", i32 1}
138+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name7, !"reqntidx", i32 16}
139+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name7, !"reqntidy", i32 16}
140+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name7, !"reqntidz", i32 32}
141+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name9, !"reqntidx", i32 8}
142+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name9, !"reqntidy", i32 8}
143+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name9, !"reqntidz", i32 8}
144+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name10, !"reqntidx", i32 2}
145+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name10, !"reqntidy", i32 2}
146+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name10, !"reqntidz", i32 2}
147+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name12, !"reqntidx", i32 2}
148+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name12, !"reqntidy", i32 8}
149+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name12, !"reqntidz", i32 1}
150+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name13, !"reqntidx", i32 16}
151+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name13, !"reqntidy", i32 32}
152+
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name13, !"reqntidz"
153+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name15, !"reqntidx", i32 8}
154+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name15, !"reqntidy", i32 8}
155+
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name15, !"reqntidz"
156+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name16, !"reqntidx", i32 2}
157+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name16, !"reqntidy", i32 2}
158+
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name16, !"reqntidz"
159+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name18, !"reqntidx", i32 8}
160+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name18, !"reqntidy", i32 1}
161+
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name18, !"reqntidz"
162+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name19, !"reqntidx", i32 32}
163+
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name19, !"reqntidy",
164+
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name19, !"reqntidz",
165+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name21, !"reqntidx", i32 8}
166+
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name21, !"reqntidy",
167+
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name21, !"reqntidz",
168+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name22, !"reqntidx", i32 2}
169+
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name22, !"reqntidy",
170+
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name22, !"reqntidz",
171+
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name24, !"reqntidx", i32 1}
172+
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name24, !"reqntidy",
173+
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name24, !"reqntidz",
174+
126175
// CHECK: ![[NDRWGS3D]] = !{i32 3}
127176
// CHECK: ![[WGSIZE3D32]] = !{i32 16, i32 16, i32 32}
128177
// CHECK: ![[WGSIZE3D88]] = !{i32 8, i32 8, i32 8}

0 commit comments

Comments
 (0)