Skip to content

Commit 85cd17f

Browse files
committed
Addressed review comments
In addition to "blocksareclusters" kernel attr this change also add "ptx90" support in NVPTX backend.
1 parent a8b53a4 commit 85cd17f

File tree

5 files changed

+33
-45
lines changed

5 files changed

+33
-45
lines changed

llvm/lib/Target/NVPTX/NVPTX.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,10 @@ foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
9797
def SM#sm#a : FeatureSM<""#sm#"a", !add(!mul(sm, 10), 3)>;
9898
}
9999

100-
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
101-
70, 71, 72, 73, 74, 75, 76, 77, 78,
102-
80, 81, 82, 83, 84, 85, 86, 87, 88] in
103-
def PTX#version: FeaturePTX<version>;
100+
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65, 70, 71, 72,
101+
73, 74, 75, 76, 77, 78, 80, 81, 82, 83, 84, 85, 86, 87, 88,
102+
90] in
103+
def PTX#version : FeaturePTX<version>;
104104

105105
//===----------------------------------------------------------------------===//
106106
// NVPTX supported processors.

llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -414,17 +414,6 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
414414
// the reqntid directive, and set the unspecified ones to 1.
415415
// If none of Reqntid* is specified, don't output reqntid directive.
416416
const auto ReqNTID = getReqNTID(F);
417-
418-
const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
419-
const auto *STI = static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
420-
421-
const bool BlocksAreClusters = F.hasFnAttribute("nvvm.blocksareclusters");
422-
if (BlocksAreClusters && STI->getSmVersion() >= 90) {
423-
if (ReqNTID.empty() || getClusterDim(F).empty())
424-
report_fatal_error("blocksareclusters requires reqntid and cluster_dim");
425-
O << ".blocksareclusters\n";
426-
}
427-
428417
if (!ReqNTID.empty())
429418
O << formatv(".reqntid {0:$[, ]}\n",
430419
make_range(ReqNTID.begin(), ReqNTID.end()));
@@ -442,8 +431,12 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
442431

443432
// .maxclusterrank directive requires SM_90 or higher, make sure that we
444433
// filter it out for lower SM versions, as it causes a hard ptxas crash.
434+
const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
435+
const auto *STI = static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
436+
445437
if (STI->getSmVersion() >= 90) {
446438
const auto ClusterDim = getClusterDim(F);
439+
const bool BlocksAreClusters = hasBlocksAreClusters(F);
447440

448441
if (!ClusterDim.empty()) {
449442

@@ -463,6 +456,13 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
463456
"should be 0 as well");
464457
}
465458
}
459+
460+
if (BlocksAreClusters && STI->getPTXVersion() >= 90) {
461+
assert(!(ReqNTID.empty() || getClusterDim(F).empty()) &&
462+
"blocksareclusters requires reqntid and cluster_dim");
463+
O << ".blocksareclusters\n";
464+
}
465+
466466
if (const auto Maxclusterrank = getMaxClusterRank(F))
467467
O << ".maxclusterrank " << *Maxclusterrank << "\n";
468468
}

llvm/lib/Target/NVPTX/NVPTXUtilities.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,10 @@ std::optional<unsigned> getMaxNReg(const Function &F) {
352352
return getFnAttrParsedInt(F, "nvvm.maxnreg");
353353
}
354354

355+
bool hasBlocksAreClusters(const Function &F) {
356+
return F.hasFnAttribute("nvvm.blocksareclusters");
357+
}
358+
355359
MaybeAlign getAlign(const CallInst &I, unsigned Index) {
356360
// First check the alignstack metadata
357361
if (MaybeAlign StackAlign =

llvm/lib/Target/NVPTX/NVPTXUtilities.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ std::optional<unsigned> getMaxClusterRank(const Function &);
6060
std::optional<unsigned> getMinCTASm(const Function &);
6161
std::optional<unsigned> getMaxNReg(const Function &);
6262

63+
bool hasBlocksAreClusters(const Function &);
64+
6365
inline bool isKernelFunction(const Function &F) {
6466
return F.getCallingConv() == CallingConv::PTX_Kernel;
6567
}
Lines changed: 12 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100 | FileCheck %s
2+
; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx90 | FileCheck %s
33

44
target triple = "nvptx64-nvidia-cuda"
55

66
; Test "blocksareclusters" attribute with full "reqntid" and "cluster_dim"
77
; attributes.
8-
define ptx_kernel void @kernel1(i32* %input, i32* %output) #0 #1 #2 {
8+
define ptx_kernel void @kernel1(ptr %input, ptr %output) #0 #1 #2 {
99
; CHECK-LABEL: kernel1(
10-
; CHECK: .blocksareclusters
11-
; CHECK-NEXT: .reqntid 1024, 1, 1
10+
; CHECK: .reqntid 1024, 1, 1
1211
; CHECK-NEXT: .reqnctapercluster 2, 2, 2
12+
; CHECK-NEXT: .blocksareclusters
1313
; CHECK-NEXT: {
1414
; CHECK-EMPTY:
1515
; CHECK-EMPTY:
@@ -20,11 +20,11 @@ define ptx_kernel void @kernel1(i32* %input, i32* %output) #0 #1 #2 {
2020

2121
; Test "blocksareclusters" attribute with single dimension "reqntid" and
2222
; "cluster_dim" attributes.
23-
define ptx_kernel void @kernel2(i32* %input, i32* %output) #0 #3 #4 {
23+
define ptx_kernel void @kernel2(ptr %input, ptr %output) #0 #3 #4 {
2424
; CHECK-LABEL: kernel2(
25-
; CHECK: .blocksareclusters
26-
; CHECK-NEXT: .reqntid 1024
27-
; CHECK-NEXT: .reqnctapercluster 2 // @kernel2
25+
; CHECK: .reqntid 1024
26+
; CHECK-NEXT: .reqnctapercluster 2
27+
; CHECK-NEXT: .blocksareclusters // @kernel2
2828
; CHECK-NEXT: {
2929
; CHECK-EMPTY:
3030
; CHECK-EMPTY:
@@ -35,26 +35,11 @@ define ptx_kernel void @kernel2(i32* %input, i32* %output) #0 #3 #4 {
3535

3636
; Test "blocksareclusters" attribute with two dimensions(not z dimension)
3737
; "reqntid" and "cluster_dim" attributes.
38-
define ptx_kernel void @kernel3(i32* %input, i32* %output) #0 #5 #6 {
38+
define ptx_kernel void @kernel3(ptr %input, ptr %output) #0 #5 #6 {
3939
; CHECK-LABEL: kernel3(
40-
; CHECK: .blocksareclusters
41-
; CHECK-NEXT: .reqntid 512, 2
42-
; CHECK-NEXT: .reqnctapercluster 2, 2 // @kernel3
43-
; CHECK-NEXT: {
44-
; CHECK-EMPTY:
45-
; CHECK-EMPTY:
46-
; CHECK-NEXT: // %bb.0:
47-
; CHECK-NEXT: ret;
48-
ret void
49-
}
50-
51-
; Test "blocksareclusters" attribute with full "reqntid" and "cluster_dim"
52-
; attributes where kernel attribute is provided through metadata.
53-
define void @kernel4(i32* %input, i32* %output) #0 #1 #2 {
54-
; CHECK-LABEL: kernel4(
55-
; CHECK: .blocksareclusters
56-
; CHECK-NEXT: .reqntid 1024, 1, 1
57-
; CHECK-NEXT: .reqnctapercluster 2, 2, 2 // @kernel4
40+
; CHECK: .reqntid 512, 2
41+
; CHECK-NEXT: .reqnctapercluster 2, 2
42+
; CHECK-NEXT: .blocksareclusters // @kernel3
5843
; CHECK-NEXT: {
5944
; CHECK-EMPTY:
6045
; CHECK-EMPTY:
@@ -73,6 +58,3 @@ attributes #4 = { "nvvm.cluster_dim"="2" }
7358

7459
attributes #5 = { "nvvm.reqntid"="512,2" }
7560
attributes #6 = { "nvvm.cluster_dim"="2,2" }
76-
77-
!0 = !{void (i32*, i32*)* @kernel4, !"kernel", i32 1 }
78-
!nvvm.annotations = !{!0}

0 commit comments

Comments
 (0)