Skip to content

Commit e6de9ed

Browse files
yxsamliutstellar
authored andcommitted
[CUDA][HIP] Externalize kernels in anonymous name space
kernels in anonymous name space needs to have unique name to avoid duplicate symbols. Fixes: #54560 Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D123353 (cherry picked from commit 4ea1d43)
1 parent fecfc83 commit e6de9ed

File tree

6 files changed

+43
-15
lines changed

6 files changed

+43
-15
lines changed

clang/include/clang/AST/ASTContext.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3279,10 +3279,10 @@ OPT_LIST(V)
32793279
/// Return a new OMPTraitInfo object owned by this context.
32803280
OMPTraitInfo &getNewOMPTraitInfo();
32813281

3282-
/// Whether a C++ static variable may be externalized.
3282+
/// Whether a C++ static variable or CUDA/HIP kernel may be externalized.
32833283
bool mayExternalizeStaticVar(const Decl *D) const;
32843284

3285-
/// Whether a C++ static variable should be externalized.
3285+
/// Whether a C++ static variable or CUDA/HIP kernel should be externalized.
32863286
bool shouldExternalizeStaticVar(const Decl *D) const;
32873287

32883288
StringRef getCUIDHash() const;

clang/lib/AST/ASTContext.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12263,14 +12263,16 @@ bool ASTContext::mayExternalizeStaticVar(const Decl *D) const {
1226312263
(D->hasAttr<CUDAConstantAttr>() &&
1226412264
!D->getAttr<CUDAConstantAttr>()->isImplicit());
1226512265
// CUDA/HIP: static managed variables need to be externalized since it is
12266-
// a declaration in IR, therefore cannot have internal linkage.
12267-
return IsStaticVar &&
12268-
(D->hasAttr<HIPManagedAttr>() || IsExplicitDeviceVar);
12266+
// a declaration in IR, therefore cannot have internal linkage. Kernels in
12267+
// anonymous name space needs to be externalized to avoid duplicate symbols.
12268+
return (IsStaticVar &&
12269+
(D->hasAttr<HIPManagedAttr>() || IsExplicitDeviceVar)) ||
12270+
(D->hasAttr<CUDAGlobalAttr>() && D->isInAnonymousNamespace());
1226912271
}
1227012272

1227112273
bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const {
1227212274
return mayExternalizeStaticVar(D) &&
12273-
(D->hasAttr<HIPManagedAttr>() ||
12275+
(D->hasAttr<HIPManagedAttr>() || D->hasAttr<CUDAGlobalAttr>() ||
1227412276
CUDADeviceVarODRUsedByHost.count(cast<VarDecl>(D)));
1227512277
}
1227612278

clang/lib/CodeGen/CGCUDANV.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
287287
SmallString<256> Buffer;
288288
llvm::raw_svector_ostream Out(Buffer);
289289
Out << DeviceSideName;
290-
CGM.printPostfixForExternalizedStaticVar(Out);
290+
CGM.printPostfixForExternalizedDecl(Out, ND);
291291
DeviceSideName = std::string(Out.str());
292292
}
293293
return DeviceSideName;

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,7 +1367,7 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
13671367
if (CGM.getContext().shouldExternalizeStaticVar(ND) &&
13681368
CGM.getLangOpts().GPURelocatableDeviceCode &&
13691369
CGM.getLangOpts().CUDAIsDevice && !CGM.getLangOpts().CUID.empty())
1370-
CGM.printPostfixForExternalizedStaticVar(Out);
1370+
CGM.printPostfixForExternalizedDecl(Out, ND);
13711371
return std::string(Out.str());
13721372
}
13731373

@@ -1455,7 +1455,7 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
14551455
// directly between host- and device-compilations, the host- and
14561456
// device-mangling in host compilation could help catching certain ones.
14571457
assert(!isa<FunctionDecl>(ND) || !ND->hasAttr<CUDAGlobalAttr>() ||
1458-
getLangOpts().CUDAIsDevice ||
1458+
getContext().shouldExternalizeStaticVar(ND) || getLangOpts().CUDAIsDevice ||
14591459
(getContext().getAuxTargetInfo() &&
14601460
(getContext().getAuxTargetInfo()->getCXXABI() !=
14611461
getContext().getTargetInfo().getCXXABI())) ||
@@ -6645,7 +6645,8 @@ bool CodeGenModule::stopAutoInit() {
66456645
return false;
66466646
}
66476647

6648-
void CodeGenModule::printPostfixForExternalizedStaticVar(
6649-
llvm::raw_ostream &OS) const {
6650-
OS << "__static__" << getContext().getCUIDHash();
6648+
void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS,
6649+
const Decl *D) const {
6650+
OS << (isa<VarDecl>(D) ? "__static__" : ".anon.")
6651+
<< getContext().getCUIDHash();
66516652
}

clang/lib/CodeGen/CodeGenModule.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1447,9 +1447,10 @@ class CodeGenModule : public CodeGenTypeCache {
14471447
TBAAAccessInfo *TBAAInfo = nullptr);
14481448
bool stopAutoInit();
14491449

1450-
/// Print the postfix for externalized static variable for single source
1451-
/// offloading languages CUDA and HIP.
1452-
void printPostfixForExternalizedStaticVar(llvm::raw_ostream &OS) const;
1450+
/// Print the postfix for externalized static variable or kernels for single
1451+
/// source offloading languages CUDA and HIP.
1452+
void printPostfixForExternalizedDecl(llvm::raw_ostream &OS,
1453+
const Decl *D) const;
14531454

14541455
private:
14551456
llvm::Constant *GetOrCreateLLVMFunction(
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
2+
// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \
3+
// RUN: -emit-llvm -o - -x hip %s > %t.dev
4+
5+
// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
6+
// RUN: -aux-triple amdgcn-amd-amdhsa -std=c++11 -fgpu-rdc \
7+
// RUN: -emit-llvm -o - -x hip %s > %t.host
8+
9+
// RUN: cat %t.dev %t.host | FileCheck %s
10+
11+
#include "Inputs/cuda.h"
12+
13+
// CHECK: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv\.anon\.b04fd23c98500190]](
14+
// CHECK: @[[STR:.*]] = {{.*}} c"[[KERN]]\00"
15+
// CHECK: call i32 @__hipRegisterFunction({{.*}}@[[STR]]
16+
17+
namespace {
18+
__global__ void kernel() {
19+
}
20+
}
21+
22+
void test() {
23+
kernel<<<1, 1>>>();
24+
}

0 commit comments

Comments
 (0)