Skip to content

Commit fad3272

Browse files
authored
[NVPTX] Add support for "blocksareclusters" kernel attr (#152265)
This change introduces a new kernel attribute that allows thread blocks to be mapped to clusters. In addition, it also adds support of `+ptx90` PTX ISA support.
1 parent d0cde32 commit fad3272

File tree

6 files changed

+96
-5
lines changed

6 files changed

+96
-5
lines changed

llvm/docs/NVPTXUsage.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,12 @@ Function Attributes
9292
dimension. Specifying a different cluster dimension at launch will result in
9393
a runtime error or kernel launch failure. Only supported for Hopper+.
9494

95+
``"nvvm.blocksareclusters"``
96+
This attribute implies that the grid launch configuration for the corresponding
97+
kernel function is specifying the number of clusters instead of the number of thread
98+
blocks. This attribute is only allowed for kernel functions and requires
99+
``nvvm.reqntid`` and ``nvvm.cluster_dim`` attributes.
100+
95101
.. _address_spaces:
96102

97103
Address Spaces

llvm/lib/Target/NVPTX/NVPTX.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,10 @@ foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
9797
def SM#sm#a : FeatureSM<""#sm#"a", !add(!mul(sm, 10), 3)>;
9898
}
9999

100-
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
101-
70, 71, 72, 73, 74, 75, 76, 77, 78,
102-
80, 81, 82, 83, 84, 85, 86, 87, 88] in
103-
def PTX#version: FeaturePTX<version>;
100+
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65, 70, 71, 72,
101+
73, 74, 75, 76, 77, 78, 80, 81, 82, 83, 84, 85, 86, 87, 88,
102+
90] in
103+
def PTX#version : FeaturePTX<version>;
104104

105105
//===----------------------------------------------------------------------===//
106106
// NVPTX supported processors.

llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,9 +436,13 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
436436

437437
if (STI->getSmVersion() >= 90) {
438438
const auto ClusterDim = getClusterDim(F);
439+
const bool BlocksAreClusters = hasBlocksAreClusters(F);
439440

440441
if (!ClusterDim.empty()) {
441-
O << ".explicitcluster\n";
442+
443+
if (!BlocksAreClusters)
444+
O << ".explicitcluster\n";
445+
442446
if (ClusterDim[0] != 0) {
443447
assert(llvm::all_of(ClusterDim, [](unsigned D) { return D != 0; }) &&
444448
"cluster_dim_x != 0 implies cluster_dim_y and cluster_dim_z "
@@ -452,6 +456,21 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
452456
"should be 0 as well");
453457
}
454458
}
459+
460+
if (BlocksAreClusters) {
461+
LLVMContext &Ctx = F.getContext();
462+
if (ReqNTID.empty() || ClusterDim.empty())
463+
Ctx.diagnose(DiagnosticInfoUnsupported(
464+
F, "blocksareclusters requires reqntid and cluster_dim attributes",
465+
F.getSubprogram()));
466+
else if (STI->getPTXVersion() < 90)
467+
Ctx.diagnose(DiagnosticInfoUnsupported(
468+
F, "blocksareclusters requires PTX version >= 9.0",
469+
F.getSubprogram()));
470+
else
471+
O << ".blocksareclusters\n";
472+
}
473+
455474
if (const auto Maxclusterrank = getMaxClusterRank(F))
456475
O << ".maxclusterrank " << *Maxclusterrank << "\n";
457476
}

llvm/lib/Target/NVPTX/NVPTXUtilities.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,10 @@ std::optional<unsigned> getMaxNReg(const Function &F) {
352352
return getFnAttrParsedInt(F, "nvvm.maxnreg");
353353
}
354354

355+
bool hasBlocksAreClusters(const Function &F) {
356+
return F.hasFnAttribute("nvvm.blocksareclusters");
357+
}
358+
355359
MaybeAlign getAlign(const CallInst &I, unsigned Index) {
356360
// First check the alignstack metadata
357361
if (MaybeAlign StackAlign =

llvm/lib/Target/NVPTX/NVPTXUtilities.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ std::optional<unsigned> getMaxClusterRank(const Function &);
6060
std::optional<unsigned> getMinCTASm(const Function &);
6161
std::optional<unsigned> getMaxNReg(const Function &);
6262

63+
bool hasBlocksAreClusters(const Function &);
64+
6365
inline bool isKernelFunction(const Function &F) {
6466
return F.getCallingConv() == CallingConv::PTX_Kernel;
6567
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx90 | FileCheck %s
3+
4+
target triple = "nvptx64-nvidia-cuda"
5+
6+
; Test "blocksareclusters" attribute with full "reqntid" and "cluster_dim"
7+
; attributes.
8+
define ptx_kernel void @kernel1(ptr %input, ptr %output) #0 #1 #2 {
9+
; CHECK-LABEL: kernel1(
10+
; CHECK: .reqntid 1024, 1, 1
11+
; CHECK-NEXT: .reqnctapercluster 2, 2, 2
12+
; CHECK-NEXT: .blocksareclusters
13+
; CHECK-NEXT: {
14+
; CHECK-EMPTY:
15+
; CHECK-EMPTY:
16+
; CHECK-NEXT: // %bb.0:
17+
; CHECK-NEXT: ret;
18+
ret void
19+
}
20+
21+
; Test "blocksareclusters" attribute with single dimension "reqntid" and
22+
; "cluster_dim" attributes.
23+
define ptx_kernel void @kernel2(ptr %input, ptr %output) #0 #3 #4 {
24+
; CHECK-LABEL: kernel2(
25+
; CHECK: .reqntid 1024
26+
; CHECK-NEXT: .reqnctapercluster 2
27+
; CHECK-NEXT: .blocksareclusters // @kernel2
28+
; CHECK-NEXT: {
29+
; CHECK-EMPTY:
30+
; CHECK-EMPTY:
31+
; CHECK-NEXT: // %bb.0:
32+
; CHECK-NEXT: ret;
33+
ret void
34+
}
35+
36+
; Test "blocksareclusters" attribute with two dimensions(not z dimension)
37+
; "reqntid" and "cluster_dim" attributes.
38+
define ptx_kernel void @kernel3(ptr %input, ptr %output) #0 #5 #6 {
39+
; CHECK-LABEL: kernel3(
40+
; CHECK: .reqntid 512, 2
41+
; CHECK-NEXT: .reqnctapercluster 2, 2
42+
; CHECK-NEXT: .blocksareclusters // @kernel3
43+
; CHECK-NEXT: {
44+
; CHECK-EMPTY:
45+
; CHECK-EMPTY:
46+
; CHECK-NEXT: // %bb.0:
47+
; CHECK-NEXT: ret;
48+
ret void
49+
}
50+
51+
attributes #0 = { "nvvm.blocksareclusters" }
52+
53+
attributes #1 = { "nvvm.reqntid"="1024,1,1" }
54+
attributes #2 = { "nvvm.cluster_dim"="2,2,2" }
55+
56+
attributes #3 = { "nvvm.reqntid"="1024" }
57+
attributes #4 = { "nvvm.cluster_dim"="2" }
58+
59+
attributes #5 = { "nvvm.reqntid"="512,2" }
60+
attributes #6 = { "nvvm.cluster_dim"="2,2" }

0 commit comments

Comments
 (0)