Skip to content

Commit e1bdf8a

Browse files
authored
[CIR][CUDA] Add target-specific attributes (#1457)
This implements the missing feature `cir::setTargetAttributes`. Although other targets might also need attributes, this PR focuses on the CUDA-specific ones. For CUDA kernels (on device side, not stubs), they must have a calling convention of `ptx_kernel`. It is added here. CUDA kernels, as well as global variables, also involves lots of NVVM metadata, which is intended to be dealt with at the same place. It's marked with a new missing feature here.
1 parent 6e914ba commit e1bdf8a

File tree

7 files changed

+67
-26
lines changed

7 files changed

+67
-26
lines changed

clang/include/clang/CIR/Dialect/IR/CIROps.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3560,11 +3560,12 @@ def CC_C : I32EnumAttrCase<"C", 1, "c">;
35603560
def CC_SpirKernel : I32EnumAttrCase<"SpirKernel", 2, "spir_kernel">;
35613561
def CC_SpirFunction : I32EnumAttrCase<"SpirFunction", 3, "spir_function">;
35623562
def CC_OpenCLKernel : I32EnumAttrCase<"OpenCLKernel", 4, "opencl_kernel">;
3563+
def CC_PTXKernel : I32EnumAttrCase<"PTXKernel", 5, "ptx_kernel">;
35633564

35643565
def CallingConv : I32EnumAttr<
35653566
"CallingConv",
35663567
"calling convention",
3567-
[CC_C, CC_SpirKernel, CC_SpirFunction, CC_OpenCLKernel]> {
3568+
[CC_C, CC_SpirKernel, CC_SpirFunction, CC_OpenCLKernel, CC_PTXKernel]> {
35683569
let cppNamespace = "::cir";
35693570
}
35703571

clang/include/clang/CIR/MissingFeatures.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ struct MissingFeatures {
236236
static bool exceptions() { return false; }
237237
static bool metaDataNode() { return false; }
238238
static bool emitDeclMetadata() { return false; }
239+
static bool emitNVVMMetadata() { return false; }
239240
static bool emitScalarRangeCheck() { return false; }
240241
static bool stmtExprEvaluation() { return false; }
241242
static bool setCallingConv() { return false; }
@@ -470,9 +471,6 @@ struct MissingFeatures {
470471
// can optimize away the store and load ops. Seems like an early optimization.
471472
static bool returnValueDominatingStoreOptmiization() { return false; }
472473

473-
// Globals (vars and functions) may have attributes that are target depedent.
474-
static bool setTargetAttributes() { return false; }
475-
476474
// CIR modules parsed from text form may not carry the triple or data layout
477475
// specs. We should make it always present.
478476
static bool makeTripleAlwaysPresent() { return false; }

clang/lib/CIR/CodeGen/CIRGenModule.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -903,7 +903,7 @@ void CIRGenModule::setNonAliasAttributes(GlobalDecl gd, mlir::Operation *go) {
903903
if (f)
904904
assert(!cir::MissingFeatures::setSectionForFuncOp());
905905
}
906-
assert(!cir::MissingFeatures::setTargetAttributes());
906+
getTargetCIRGenInfo().setTargetAttributes(d, go, *this);
907907
}
908908

909909
static llvm::SmallVector<int64_t> indexesOfArrayAttr(mlir::ArrayAttr indexes) {
@@ -1211,10 +1211,8 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty,
12111211
// something closer to GlobalValue::isDeclaration instead of checking for
12121212
// initializer.
12131213
if (gv.isDeclaration()) {
1214-
// TODO(cir): set target attributes
1214+
getTargetCIRGenInfo().setTargetAttributes(d, gv, *this);
12151215

1216-
// External HIP managed variables needed to be recorded for transformation
1217-
// in both device and host compilations.
12181216
// External HIP managed variables needed to be recorded for transformation
12191217
// in both device and host compilations.
12201218
if (getLangOpts().CUDA && d && d->hasAttr<HIPManagedAttr>() &&
@@ -2920,6 +2918,10 @@ void CIRGenModule::setFunctionAttributes(GlobalDecl globalDecl,
29202918
// TODO(cir): Complete the remaining part of the function.
29212919
assert(!cir::MissingFeatures::setFunctionAttributes());
29222920

2921+
if (!isIncompleteFunction && func.isDeclaration())
2922+
getTargetCIRGenInfo().setTargetAttributes(globalDecl.getDecl(), func,
2923+
*this);
2924+
29232925
// TODO(cir): This needs a lot of work to better match CodeGen. That
29242926
// ultimately ends up in setGlobalVisibility, which already has the linkage of
29252927
// the LLVM GV (corresponding to our FuncOp) computed, so it doesn't have to

clang/lib/CIR/CodeGen/TargetInfo.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,11 +345,39 @@ class NVPTXTargetCIRGenInfo : public TargetCIRGenInfo {
345345
public:
346346
NVPTXTargetCIRGenInfo(CIRGenTypes &cgt)
347347
: TargetCIRGenInfo(std::make_unique<NVPTXABIInfo>(cgt)) {}
348+
348349
mlir::Type getCUDADeviceBuiltinSurfaceDeviceType() const override {
349350
// On the device side, texture reference is represented as an object handle
350351
// in 64-bit integer.
351352
return cir::IntType::get(&getABIInfo().CGT.getMLIRContext(), 64, true);
352353
}
354+
355+
void setTargetAttributes(const clang::Decl *decl, mlir::Operation *global,
356+
CIRGenModule &cgm) const override {
357+
if (const auto *vd = clang::dyn_cast_or_null<clang::VarDecl>(decl)) {
358+
assert(!cir::MissingFeatures::emitNVVMMetadata());
359+
return;
360+
}
361+
362+
if (const auto *fd = clang::dyn_cast_or_null<clang::FunctionDecl>(decl)) {
363+
cir::FuncOp func = mlir::cast<cir::FuncOp>(global);
364+
if (func.isDeclaration())
365+
return;
366+
367+
if (cgm.getLangOpts().CUDA) {
368+
if (fd->hasAttr<CUDAGlobalAttr>()) {
369+
func.setCallingConv(cir::CallingConv::PTXKernel);
370+
371+
// In LLVM we should create metadata like:
372+
// !{<func-ref>, metadata !"kernel", i32 1}
373+
assert(!cir::MissingFeatures::emitNVVMMetadata());
374+
}
375+
}
376+
377+
if (fd->getAttr<CUDALaunchBoundsAttr>())
378+
llvm_unreachable("NYI");
379+
}
380+
}
353381
};
354382

355383
} // namespace

clang/lib/CIR/CodeGen/TargetInfo.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,15 @@ class TargetCIRGenInfo {
4141
/// Returns ABI info helper for the target.
4242
const ABIInfo &getABIInfo() const { return *Info; }
4343

44+
/// Provides a convenient hook to handle extra target-specific attributes
45+
/// for the given global.
46+
/// In OG, the function receives an llvm::GlobalValue. However, functions
47+
/// and global variables are separate types in Clang IR, so we use a general
48+
/// mlir::Operation*.
49+
virtual void setTargetAttributes(const clang::Decl *decl,
50+
mlir::Operation *global,
51+
CIRGenModule &module) const {}
52+
4453
virtual bool isScalarizableAsmOperand(CIRGenFunction &CGF,
4554
mlir::Type Ty) const {
4655
return false;

clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -763,11 +763,11 @@ mlir::LLVM::Linkage convertLinkage(cir::GlobalLinkageKind linkage) {
763763
};
764764
}
765765

766-
mlir::LLVM::CConv convertCallingConv(cir::CallingConv callinvConv) {
766+
mlir::LLVM::CConv convertCallingConv(cir::CallingConv callingConv) {
767767
using CIR = cir::CallingConv;
768768
using LLVM = mlir::LLVM::CConv;
769769

770-
switch (callinvConv) {
770+
switch (callingConv) {
771771
case CIR::C:
772772
return LLVM::C;
773773
case CIR::SpirKernel:
@@ -776,6 +776,8 @@ mlir::LLVM::CConv convertCallingConv(cir::CallingConv callinvConv) {
776776
return LLVM::SPIR_FUNC;
777777
case CIR::OpenCLKernel:
778778
llvm_unreachable("NYI");
779+
case CIR::PTXKernel:
780+
return LLVM::PTX_Kernel;
779781
}
780782
llvm_unreachable("Unknown calling convention");
781783
}

clang/test/CIR/CodeGen/CUDA/simple.cu

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ __device__ void device_fn(int* a, double b, float c) {}
2222
// CIR-DEVICE: cir.func @_Z9device_fnPidf
2323

2424
__global__ void global_fn(int a) {}
25-
// CIR-DEVICE: @_Z9global_fni
25+
// CIR-DEVICE: @_Z9global_fni({{.*}} cc(ptx_kernel)
26+
// LLVM-DEVICE: define dso_local ptx_kernel void @_Z9global_fni
2627

2728
// Check for device stub emission.
2829

@@ -32,9 +33,9 @@ __global__ void global_fn(int a) {}
3233
// CIR-HOST: cir.get_global @_Z24__device_stub__global_fni
3334
// CIR-HOST: cir.call @cudaLaunchKernel
3435

35-
// COM: LLVM-HOST: void @_Z24__device_stub__global_fni
36-
// COM: LLVM-HOST: call i32 @__cudaPopCallConfiguration
37-
// COM: LLVM-HOST: call i32 @cudaLaunchKernel(ptr @_Z24__device_stub__global_fni
36+
// LLVM-HOST: void @_Z24__device_stub__global_fni
37+
// LLVM-HOST: call i32 @__cudaPopCallConfiguration
38+
// LLVM-HOST: call i32 @cudaLaunchKernel(ptr @_Z24__device_stub__global_fni
3839

3940
int main() {
4041
global_fn<<<1, 1>>>(1);
@@ -51,15 +52,15 @@ int main() {
5152
// CIR-HOST: cir.call @_Z24__device_stub__global_fni([[Arg]])
5253
// CIR-HOST: }
5354

54-
// COM: LLVM-HOST: define dso_local i32 @main
55-
// COM: LLVM-HOST: alloca %struct.dim3
56-
// COM: LLVM-HOST: alloca %struct.dim3
57-
// COM: LLVM-HOST: call void @_ZN4dim3C1Ejjj
58-
// COM: LLVM-HOST: call void @_ZN4dim3C1Ejjj
59-
// COM: LLVM-HOST: [[LLVMConfigOK:%[0-9]+]] = call i32 @__cudaPushCallConfiguration
60-
// COM: LLVM-HOST: br [[LLVMConfigOK]], label %[[Good:[0-9]+]], label [[Bad:[0-9]+]]
61-
// COM: LLVM-HOST: [[Good]]:
62-
// COM: LLVM-HOST: call void @_Z24__device_stub__global_fni
63-
// COM: LLVM-HOST: br label [[Bad]]
64-
// COM: LLVM-HOST: [[Bad]]:
65-
// COM: LLVM-HOST: ret i32
55+
// LLVM-HOST: define dso_local i32 @main
56+
// LLVM-HOST: alloca %struct.dim3
57+
// LLVM-HOST: alloca %struct.dim3
58+
// LLVM-HOST: call void @_ZN4dim3C1Ejjj
59+
// LLVM-HOST: call void @_ZN4dim3C1Ejjj
60+
// LLVM-HOST: [[LLVMConfigOK:%[0-9]+]] = call i32 @__cudaPushCallConfiguration
61+
// LLVM-HOST: br [[LLVMConfigOK]], label %[[Good:[0-9]+]], label [[Bad:[0-9]+]]
62+
// LLVM-HOST: [[Good]]:
63+
// LLVM-HOST: call void @_Z24__device_stub__global_fni
64+
// LLVM-HOST: br label [[Bad]]
65+
// LLVM-HOST: [[Bad]]:
66+
// LLVM-HOST: ret i32

0 commit comments

Comments
 (0)