Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 6 additions & 36 deletions clang/lib/CodeGen/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,6 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
int Operand);

static void
addGridConstantNVVMMetadata(llvm::GlobalValue *GV,
const SmallVectorImpl<int> &GridConstantArgs);

private:
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
LValue Src) {
Expand Down Expand Up @@ -266,27 +262,24 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
// By default, all functions are device functions
if (FD->hasAttr<DeviceKernelAttr>() || FD->hasAttr<CUDAGlobalAttr>()) {
// OpenCL/CUDA kernel functions get kernel metadata
// Create !{<func-ref>, metadata !"kernel", i32 1} node
// And kernel functions are not subject to inlining
F->addFnAttr(llvm::Attribute::NoInline);
if (FD->hasAttr<CUDAGlobalAttr>()) {
SmallVector<int, 10> GCI;
F->setCallingConv(llvm::CallingConv::PTX_Kernel);

for (auto IV : llvm::enumerate(FD->parameters()))
if (IV.value()->hasAttr<CUDAGridConstantAttr>())
// For some reason arg indices are 1-based in NVVM
GCI.push_back(IV.index() + 1);
// Create !{<func-ref>, metadata !"kernel", i32 1} node
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
addGridConstantNVVMMetadata(F, GCI);
F->addParamAttr(
IV.index(),
llvm::Attribute::get(F->getContext(), "nvvm.grid_constant"));
}
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
M.handleCUDALaunchBoundsAttr(F, Attr);
}
}
// Attach kernel metadata directly if compiling for NVPTX.
if (FD->hasAttr<DeviceKernelAttr>()) {
if (FD->hasAttr<DeviceKernelAttr>())
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
}
}

void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
Expand All @@ -306,29 +299,6 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}

void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata(
llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) {

llvm::Module *M = GV->getParent();
llvm::LLVMContext &Ctx = M->getContext();

// Get "nvvm.annotations" metadata node
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");

SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(GV)};
if (!GridConstantArgs.empty()) {
SmallVector<llvm::Metadata *, 10> GCM;
for (int I : GridConstantArgs)
GCM.push_back(llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), I)));
MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
llvm::MDNode::get(Ctx, GCM)});
}

// Append metadata to nvvm.annotations
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}

bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
return false;
}
Expand Down
16 changes: 6 additions & 10 deletions clang/test/CodeGenCUDA/grid-constant.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,9 @@ void foo() {
tkernel_const<S><<<1,1>>>({});
tkernel<const S><<<1,1>>>(1, {});
}
//.
//.
// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"grid_constant", [[META1:![0-9]+]]}
// CHECK: [[META1]] = !{i32 1, i32 3}
// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"grid_constant", [[META3:![0-9]+]]}
// CHECK: [[META3]] = !{i32 1}
// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"grid_constant", [[META3]]}
// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"grid_constant", [[META6:![0-9]+]]}
// CHECK: [[META6]] = !{i32 2}
//.

// CHECK: define dso_local ptx_kernel void @_Z6kernel1Sii(ptr noundef byval(%struct.S) align 1 "nvvm.grid_constant" %gc_arg1, i32 noundef %arg2, i32 noundef "nvvm.grid_constant" %gc_arg3)
// CHECK: define ptx_kernel void @_Z13tkernel_constIK1SEvT_(ptr noundef byval(%struct.S) align 1 "nvvm.grid_constant" %arg)
// CHECK: define ptx_kernel void @_Z13tkernel_constI1SEvT_(ptr noundef byval(%struct.S) align 1 "nvvm.grid_constant" %arg)
// CHECK: define ptx_kernel void @_Z7tkernelIK1SEviT_(i32 noundef %dummy, ptr noundef byval(%struct.S) align 1 "nvvm.grid_constant" %arg)

59 changes: 19 additions & 40 deletions llvm/docs/NVPTXUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,19 @@ not.

When compiled, the PTX kernel functions are callable by host-side code.


Parameter Attributes
--------------------

``"nvvm.grid_constant"``
This attribute may be attached to a ``byval`` parameter of a kernel function
to indicate that the parameter should be lowered as a direct reference to
the grid-constant memory of the parameter, as opposed to a copy of the
parameter in local memory. Writing to a grid-constant parameter is
undefined behavior. Unlike a normal ``byval`` parameter, the address of a
grid-constant parameter is not unique to a given function invocation but
instead is shared by all kernels in the grid.

.. _nvptx_fnattrs:

Function Attributes
Expand Down Expand Up @@ -2289,9 +2302,9 @@ The Kernel
; Intrinsic to read X component of thread ID
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind

define void @kernel(ptr addrspace(1) %A,
ptr addrspace(1) %B,
ptr addrspace(1) %C) {
define ptx_kernel void @kernel(ptr addrspace(1) %A,
ptr addrspace(1) %B,
ptr addrspace(1) %C) {
entry:
; What is my ID?
%id = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
Expand All @@ -2314,9 +2327,6 @@ The Kernel
ret void
}

!nvvm.annotations = !{!0}
!0 = !{ptr @kernel, !"kernel", i32 1}


We can use the LLVM ``llc`` tool to directly run the NVPTX code generator:

Expand Down Expand Up @@ -2442,34 +2452,6 @@ and non-generic address spaces.
See :ref:`address_spaces` and :ref:`nvptx_intrinsics` for more information.


Kernel Metadata
^^^^^^^^^^^^^^^

In PTX, a function can be either a `kernel` function (callable from the host
program), or a `device` function (callable only from GPU code). You can think
of `kernel` functions as entry-points in the GPU program. To mark an LLVM IR
function as a `kernel` function, we make use of special LLVM metadata. The
NVPTX back-end will look for a named metadata node called
``nvvm.annotations``. This named metadata must contain a list of metadata that
describe the IR. For our purposes, we need to declare a metadata node that
assigns the "kernel" attribute to the LLVM IR function that should be emitted
as a PTX `kernel` function. These metadata nodes take the form:

.. code-block:: text

!{<function ref>, metadata !"kernel", i32 1}

For the previous example, we have:

.. code-block:: llvm

!nvvm.annotations = !{!0}
!0 = !{ptr @kernel, !"kernel", i32 1}

Here, we have a single metadata declaration in ``nvvm.annotations``. This
metadata annotates our ``@kernel`` function with the ``kernel`` attribute.


Running the Kernel
------------------

Expand Down Expand Up @@ -2669,9 +2651,9 @@ Libdevice provides an ``__nv_powf`` function that we will use.
; libdevice function
declare float @__nv_powf(float, float)

define void @kernel(ptr addrspace(1) %A,
ptr addrspace(1) %B,
ptr addrspace(1) %C) {
define ptx_kernel void @kernel(ptr addrspace(1) %A,
ptr addrspace(1) %B,
ptr addrspace(1) %C) {
entry:
; What is my ID?
%id = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
Expand All @@ -2694,9 +2676,6 @@ Libdevice provides an ``__nv_powf`` function that we will use.
ret void
}

!nvvm.annotations = !{!0}
!0 = !{ptr @kernel, !"kernel", i32 1}


To compile this kernel, we perform the following steps:

Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/IR/AutoUpgrade.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5381,6 +5381,16 @@ bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
return true;
}
if (K == "grid_constant") {
const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
for (const auto &Op : cast<MDNode>(V)->operands()) {
// For some reason, the index is 1-based in the metadata. Good thing we're
// able to auto-upgrade it!
const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
cast<Function>(GV)->addParamAttr(Index, Attr);
}
return true;
}

return false;
}
Expand Down
32 changes: 4 additions & 28 deletions llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,6 @@ void clearAnnotationCache(const Module *Mod) {
AC.Cache.erase(Mod);
}

static void readIntVecFromMDNode(const MDNode *MetadataNode,
std::vector<unsigned> &Vec) {
for (unsigned i = 0, e = MetadataNode->getNumOperands(); i != e; ++i) {
ConstantInt *Val =
mdconst::extract<ConstantInt>(MetadataNode->getOperand(i));
Vec.push_back(Val->getZExtValue());
}
}

static void cacheAnnotationFromMD(const MDNode *MetadataNode,
key_val_pair_t &retval) {
auto &AC = getAnnotationCache();
Expand All @@ -83,19 +74,8 @@ static void cacheAnnotationFromMD(const MDNode *MetadataNode,
if (ConstantInt *Val = mdconst::dyn_extract<ConstantInt>(
MetadataNode->getOperand(i + 1))) {
retval[Key].push_back(Val->getZExtValue());
} else if (MDNode *VecMd =
dyn_cast<MDNode>(MetadataNode->getOperand(i + 1))) {
// note: only "grid_constant" annotations support vector MDNodes.
// assert: there can only exist one unique key value pair of
// the form (string key, MDNode node). Operands of such a node
// shall always be unsigned ints.
auto [It, Inserted] = retval.try_emplace(Key);
if (Inserted) {
readIntVecFromMDNode(VecMd, It->second);
continue;
}
} else {
llvm_unreachable("Value operand not a constant int or an mdnode");
llvm_unreachable("Value operand not a constant int");
}
}
}
Expand Down Expand Up @@ -179,16 +159,13 @@ static bool globalHasNVVMAnnotation(const Value &V, const std::string &Prop) {
}

static bool argHasNVVMAnnotation(const Value &Val,
const std::string &Annotation,
const bool StartArgIndexAtOne = false) {
const std::string &Annotation) {
if (const Argument *Arg = dyn_cast<Argument>(&Val)) {
const Function *Func = Arg->getParent();
std::vector<unsigned> Annot;
if (findAllNVVMAnnotation(Func, Annotation, Annot)) {
const unsigned BaseOffset = StartArgIndexAtOne ? 1 : 0;
if (is_contained(Annot, BaseOffset + Arg->getArgNo())) {
if (is_contained(Annot, Arg->getArgNo()))
return true;
}
}
}
return false;
Expand Down Expand Up @@ -250,8 +227,7 @@ bool isParamGridConstant(const Argument &Arg) {
}

// "grid_constant" counts argument indices starting from 1
if (argHasNVVMAnnotation(Arg, "grid_constant",
/*StartArgIndexAtOne*/ true))
if (Arg.hasAttribute("nvvm.grid_constant"))
return true;

return false;
Expand Down
Loading
Loading