diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst index 2dc8f9ff6a57f..629bf2ea5afb4 100644 --- a/llvm/docs/NVPTXUsage.rst +++ b/llvm/docs/NVPTXUsage.rst @@ -92,6 +92,12 @@ Function Attributes dimension. Specifying a different cluster dimension at launch will result in a runtime error or kernel launch failure. Only supported for Hopper+. +``"nvvm.blocksareclusters"`` + This attribute implies that the grid launch configuration for the corresponding + kernel function is specifying the number of clusters instead of the number of thread + blocks. This attribute is only allowed for kernel functions and requires + ``nvvm.reqntid`` and ``nvvm.cluster_dim`` attributes. + .. _address_spaces: Address Spaces diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td index 83992606bc419..8a445f82e7001 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.td +++ b/llvm/lib/Target/NVPTX/NVPTX.td @@ -97,10 +97,10 @@ foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53, def SM#sm#a : FeatureSM<""#sm#"a", !add(!mul(sm, 10), 3)>; } -foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65, - 70, 71, 72, 73, 74, 75, 76, 77, 78, - 80, 81, 82, 83, 84, 85, 86, 87, 88] in - def PTX#version: FeaturePTX; +foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65, 70, 71, 72, + 73, 74, 75, 76, 77, 78, 80, 81, 82, 83, 84, 85, 86, 87, 88, + 90] in + def PTX#version : FeaturePTX; //===----------------------------------------------------------------------===// // NVPTX supported processors. diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 38912a7f09e30..f1b4398431a34 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -436,9 +436,13 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, if (STI->getSmVersion() >= 90) { const auto ClusterDim = getClusterDim(F); + const bool BlocksAreClusters = hasBlocksAreClusters(F); if (!ClusterDim.empty()) { - O << ".explicitcluster\n"; + + if (!BlocksAreClusters) + O << ".explicitcluster\n"; + if (ClusterDim[0] != 0) { assert(llvm::all_of(ClusterDim, [](unsigned D) { return D != 0; }) && "cluster_dim_x != 0 implies cluster_dim_y and cluster_dim_z " @@ -452,6 +456,19 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, "should be 0 as well"); } } + + if (BlocksAreClusters) { + LLVMContext &Ctx = F.getContext(); + if (ReqNTID.empty() || ClusterDim.empty()) { + Ctx.emitError( + "blocksareclusters requires reqntid and cluster_dim attributes"); + } else if (STI->getPTXVersion() < 90) { + Ctx.emitError("blocksareclusters requires PTX version >= 9.0"); + } else { + O << ".blocksareclusters\n"; + } + } + if (const auto Maxclusterrank = getMaxClusterRank(F)) O << ".maxclusterrank " << *Maxclusterrank << "\n"; } diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp index 6586f925504f1..274b04fdd30b5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -352,6 +352,10 @@ std::optional getMaxNReg(const Function &F) { return getFnAttrParsedInt(F, "nvvm.maxnreg"); } +bool hasBlocksAreClusters(const Function &F) { + return F.hasFnAttribute("nvvm.blocksareclusters"); +} + MaybeAlign getAlign(const CallInst &I, unsigned Index) { // First check the alignstack metadata if (MaybeAlign StackAlign = diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h index 4eb452f398220..9421f9f54d0a6 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h @@ -60,6 +60,8 @@ std::optional getMaxClusterRank(const Function &); std::optional getMinCTASm(const Function &); std::optional getMaxNReg(const Function &); +bool hasBlocksAreClusters(const Function &); + inline bool isKernelFunction(const Function &F) { return F.getCallingConv() == CallingConv::PTX_Kernel; } diff --git a/llvm/test/CodeGen/NVPTX/blocksareclusters-kernel-attr.ll b/llvm/test/CodeGen/NVPTX/blocksareclusters-kernel-attr.ll new file mode 100644 index 0000000000000..a0a99fe55654f --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/blocksareclusters-kernel-attr.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx90 | FileCheck %s + +target triple = "nvptx64-nvidia-cuda" + +; Test "blocksareclusters" attribute with full "reqntid" and "cluster_dim" +; attributes. +define ptx_kernel void @kernel1(ptr %input, ptr %output) #0 #1 #2 { +; CHECK-LABEL: kernel1( +; CHECK: .reqntid 1024, 1, 1 +; CHECK-NEXT: .reqnctapercluster 2, 2, 2 +; CHECK-NEXT: .blocksareclusters +; CHECK-NEXT: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ret; + ret void +} + +; Test "blocksareclusters" attribute with single dimension "reqntid" and +; "cluster_dim" attributes. +define ptx_kernel void @kernel2(ptr %input, ptr %output) #0 #3 #4 { +; CHECK-LABEL: kernel2( +; CHECK: .reqntid 1024 +; CHECK-NEXT: .reqnctapercluster 2 +; CHECK-NEXT: .blocksareclusters // @kernel2 +; CHECK-NEXT: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ret; + ret void +} + +; Test "blocksareclusters" attribute with two dimensions(not z dimension) +; "reqntid" and "cluster_dim" attributes. +define ptx_kernel void @kernel3(ptr %input, ptr %output) #0 #5 #6 { +; CHECK-LABEL: kernel3( +; CHECK: .reqntid 512, 2 +; CHECK-NEXT: .reqnctapercluster 2, 2 +; CHECK-NEXT: .blocksareclusters // @kernel3 +; CHECK-NEXT: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ret; + ret void +} + +attributes #0 = { "nvvm.blocksareclusters" } + +attributes #1 = { "nvvm.reqntid"="1024,1,1" } +attributes #2 = { "nvvm.cluster_dim"="2,2,2" } + +attributes #3 = { "nvvm.reqntid"="1024" } +attributes #4 = { "nvvm.cluster_dim"="2" } + +attributes #5 = { "nvvm.reqntid"="512,2" } +attributes #6 = { "nvvm.cluster_dim"="2,2" }