Skip to content

Commit fffcd5a

Browse files
authored
[CIR][CUDA] Decorate global CUDA shadow variables with appropriate CIR attribute. (#1467)
Started decorating CUDA shadow variables with the shadow_name CIR attribute which will be used for registering the globals.
1 parent 4d480b5 commit fffcd5a

File tree

5 files changed

+118
-1
lines changed

5 files changed

+118
-1
lines changed

clang/lib/CIR/CodeGen/CIRGenCUDARuntime.cpp

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
//===----------------------------------------------------------------------===//
1414

1515
#include "CIRGenCUDARuntime.h"
16+
#include "CIRGenCXXABI.h"
1617
#include "CIRGenFunction.h"
1718
#include "mlir/IR/Operation.h"
1819
#include "clang/Basic/Cuda.h"
@@ -23,9 +24,26 @@
2324
using namespace clang;
2425
using namespace clang::CIRGen;
2526

27+
static std::unique_ptr<MangleContext> initDeviceMC(CIRGenModule &cgm) {
28+
// If the host and device have different C++ ABIs, mark it as the device
29+
// mangle context so that the mangling needs to retrieve the additional
30+
// device lambda mangling number instead of the regular host one.
31+
if (cgm.getASTContext().getAuxTargetInfo() &&
32+
cgm.getASTContext().getTargetInfo().getCXXABI().isMicrosoft() &&
33+
cgm.getASTContext().getAuxTargetInfo()->getCXXABI().isItaniumFamily()) {
34+
return std::unique_ptr<MangleContext>(
35+
cgm.getASTContext().createDeviceMangleContext(
36+
*cgm.getASTContext().getAuxTargetInfo()));
37+
}
38+
39+
return std::unique_ptr<MangleContext>(cgm.getASTContext().createMangleContext(
40+
cgm.getASTContext().getAuxTargetInfo()));
41+
}
42+
2643
CIRGenCUDARuntime::~CIRGenCUDARuntime() {}
2744

28-
CIRGenCUDARuntime::CIRGenCUDARuntime(CIRGenModule &cgm) : cgm(cgm) {
45+
CIRGenCUDARuntime::CIRGenCUDARuntime(CIRGenModule &cgm)
46+
: cgm(cgm), deviceMC(initDeviceMC(cgm)) {
2947
if (cgm.getLangOpts().OffloadViaLLVM)
3048
llvm_unreachable("NYI");
3149
else if (cgm.getLangOpts().HIP)
@@ -289,6 +307,39 @@ mlir::Operation *CIRGenCUDARuntime::getKernelHandle(cir::FuncOp fn,
289307
return globalOp;
290308
}
291309

310+
std::string CIRGenCUDARuntime::getDeviceSideName(const NamedDecl *nd) {
311+
GlobalDecl gd;
312+
// nd could be either a kernel or a variable.
313+
if (auto *fd = dyn_cast<FunctionDecl>(nd))
314+
gd = GlobalDecl(fd, KernelReferenceKind::Kernel);
315+
else
316+
gd = GlobalDecl(nd);
317+
std::string deviceSideName;
318+
MangleContext *mc;
319+
if (cgm.getLangOpts().CUDAIsDevice)
320+
mc = &cgm.getCXXABI().getMangleContext();
321+
else
322+
mc = deviceMC.get();
323+
if (mc->shouldMangleDeclName(nd)) {
324+
SmallString<256> buffer;
325+
llvm::raw_svector_ostream out(buffer);
326+
mc->mangleName(gd, out);
327+
deviceSideName = std::string(out.str());
328+
} else
329+
deviceSideName = std::string(nd->getIdentifier()->getName());
330+
331+
// Make unique name for device side static file-scope variable for HIP.
332+
if (cgm.getASTContext().shouldExternalize(nd) &&
333+
cgm.getLangOpts().GPURelocatableDeviceCode) {
334+
SmallString<256> buffer;
335+
llvm::raw_svector_ostream out(buffer);
336+
out << deviceSideName;
337+
cgm.printPostfixForExternalizedDecl(out, nd);
338+
deviceSideName = std::string(out.str());
339+
}
340+
return deviceSideName;
341+
}
342+
292343
void CIRGenCUDARuntime::internalizeDeviceSideVar(
293344
const VarDecl *d, cir::GlobalLinkageKind &linkage) {
294345
if (cgm.getLangOpts().GPURelocatableDeviceCode)

clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ class CIRGenCUDARuntime {
4747
std::string addPrefixToName(StringRef FuncName) const;
4848
std::string addUnderscoredPrefixToName(StringRef FuncName) const;
4949

50+
// Mangle context for device.
51+
std::unique_ptr<MangleContext> deviceMC;
52+
5053
public:
5154
CIRGenCUDARuntime(CIRGenModule &cgm);
5255
virtual ~CIRGenCUDARuntime();
@@ -60,6 +63,9 @@ class CIRGenCUDARuntime {
6063
virtual mlir::Operation *getKernelHandle(cir::FuncOp fn, GlobalDecl GD);
6164
virtual void internalizeDeviceSideVar(const VarDecl *d,
6265
cir::GlobalLinkageKind &linkage);
66+
/// Returns function or variable name on device side even if the current
67+
/// compilation is for host.
68+
virtual std::string getDeviceSideName(const NamedDecl *nd);
6369
};
6470

6571
} // namespace clang::CIRGen

clang/lib/CIR/CodeGen/CIRGenModule.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,25 @@ bool CIRGenModule::shouldEmitCUDAGlobalVar(const VarDecl *global) const {
576576
global->getType()->isCUDADeviceBuiltinTextureType();
577577
}
578578

579+
void CIRGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &os,
580+
const Decl *d) const {
581+
// ptxas does not allow '.' in symbol names. On the other hand, HIP prefers
582+
// postfix beginning with '.' since the symbol name can be demangled.
583+
if (langOpts.HIP)
584+
os << (isa<VarDecl>(d) ? ".static." : ".intern.");
585+
else
586+
os << (isa<VarDecl>(d) ? "__static__" : "__intern__");
587+
588+
// If the CUID is not specified we try to generate a unique postfix.
589+
if (getLangOpts().CUID.empty()) {
590+
// TODO: Once we add 'PreprocessorOpts' into CIRGenModule this part can be
591+
// brought in from OG.
592+
llvm_unreachable("NYI");
593+
} else {
594+
os << getASTContext().getCUIDHash();
595+
}
596+
}
597+
579598
void CIRGenModule::emitGlobal(GlobalDecl gd) {
580599
llvm::TimeTraceScope scope("build CIR Global", [&]() -> std::string {
581600
auto *nd = dyn_cast<NamedDecl>(gd.getDecl());
@@ -1496,6 +1515,32 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *d,
14961515
}
14971516
}
14981517

1518+
// Decorate CUDA shadow variables with the cu.shadow_name attribute so we know
1519+
// how to register them when lowering.
1520+
if (langOpts.CUDA && !langOpts.CUDAIsDevice &&
1521+
(d->hasAttr<CUDAConstantAttr>() || d->hasAttr<CUDADeviceAttr>())) {
1522+
// Shadow variables and their properties must be registered with CUDA
1523+
// runtime. Skip Extern global variables, which will be registered in
1524+
// the TU where they are defined.
1525+
//
1526+
// Don't register a C++17 inline variable. The local symbol can be
1527+
// discarded and referencing a discarded local symbol from outside the
1528+
// comdat (__cuda_register_globals) is disallowed by the ELF spec.
1529+
//
1530+
// HIP managed variables need to be always recorded in device and host
1531+
// compilations for transformation.
1532+
//
1533+
// HIP managed variables and variables in CUDADeviceVarODRUsedByHost are
1534+
// added to llvm.compiler-used, therefore they are safe to be registered.
1535+
if ((!d->hasExternalStorage() && !d->isInline()) ||
1536+
getASTContext().CUDADeviceVarODRUsedByHost.contains(d) ||
1537+
d->hasAttr<HIPManagedAttr>()) {
1538+
auto shadowName = cudaRuntime->getDeviceSideName(cast<NamedDecl>(d));
1539+
auto attr = CUDAShadowNameAttr::get(&getMLIRContext(), shadowName);
1540+
gv->setAttr(CUDAShadowNameAttr::getMnemonic(), attr);
1541+
}
1542+
}
1543+
14991544
// Set initializer and finalize emission
15001545
CIRGenModule::setInitializer(gv, init);
15011546
if (emitter)

clang/lib/CIR/CodeGen/CIRGenModule.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,14 @@ class CIRGenModule : public CIRGenTypeCache {
274274

275275
bool shouldEmitCUDAGlobalVar(const VarDecl *global) const;
276276

277+
/// Print the postfix for externalized static variable or kernels for single
278+
/// source offloading languages CUDA and HIP. The unique postfix is created
279+
/// using either the CUID argument, or the file's UniqueID and active macros.
280+
/// The fallback method without a CUID requires that the offloading toolchain
281+
/// does not define separate macros via the -cc1 options.
282+
void printPostfixForExternalizedDecl(llvm::raw_ostream &OS,
283+
const Decl *D) const;
284+
277285
bool shouldEmitConvergenceTokens() const {
278286
// TODO: this shuld probably become unconditional once the controlled
279287
// convergence becomes the norm.

clang/test/CIR/CodeGen/CUDA/global-vars.cu

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,15 @@
1010
// RUN: %s -o %t.cir
1111
// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.cir %s
1212

13+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
14+
// RUN: -x cuda -emit-cir -target-sdk-version=12.3 \
15+
// RUN: %s -o %t.cir
16+
// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
17+
1318
__device__ int a;
1419
// CIR-DEVICE: cir.global external addrspace(offload_global) @a = #cir.int<0>
1520
// LLVM-DEVICE: @a = addrspace(1) externally_initialized global i32 0, align 4
21+
// CIR-HOST: {{.*}}cir.global external @a = #cir.undef : !s32i {alignment = 4 : i64, cu.shadow_name = #cir.cu.shadow_name<a>}{{.*}}
1622

1723
__shared__ int shared;
1824
// CIR-DEVICE: cir.global external addrspace(offload_local) @shared = #cir.undef
@@ -21,3 +27,4 @@ __shared__ int shared;
2127
__constant__ int b;
2228
// CIR-DEVICE: cir.global constant external addrspace(offload_constant) @b = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized}
2329
// LLVM-DEVICE: @b = addrspace(4) externally_initialized constant i32 0, align 4
30+
// CIR-HOST: {{.*}}cir.global external @b = #cir.undef : !s32i {alignment = 4 : i64, cu.shadow_name = #cir.cu.shadow_name<b>}{{.*}}

0 commit comments

Comments
 (0)