Skip to content

Commit 4394aa6

Browse files
[OpenMP][clang][HIP][CUDA] fix weak alias emit on device compilation (#164326)
This PR adds checks for when emitting weak aliases in: `void CodeGenModule::EmitGlobal(GlobalDecl GD)`, before for device compilation for OpenMP, HIP and Cuda, clang would look for the aliasee even if it was never marked for device compilation. For OpenMP the following case now works: > Failed before when compiling with device, ie: `clang -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa` > ``` > int __Two(void) { return 2; } > int Two(void) __attribute__ ((weak, alias("__Two"))); > ``` For HIP / Cuda: > > ``` > int __HostFunc(void) { return 42; } > int HostFunc(void) __attribute__ ((weak, alias("__HostFunc"))); > ``` For HIP: >Failed before on HIP, Cuda fails due to: `NVPTX aliasee must not be '.weak'` error > ``` > __device__ int __One(void) { return 2; } > __device__ int One(void) __attribute__ ((weak, alias("__One"))); > ``` Included are Codegen LIT tests for the above cases, and also cases for weak alias cases that currently work in clang. Fixes #117369
1 parent 6412184 commit 4394aa6

File tree

7 files changed

+455
-1
lines changed

7 files changed

+455
-1
lines changed

clang/docs/HIPSupport.rst

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,43 @@ Example Usage
376376
basePtr->virtualFunction(); // Allowed since obj is constructed in device code
377377
}
378378

379+
Alias Attribute Support
380+
=======================
381+
382+
Clang supports alias attributes in HIP code, allowing creation of alternative names for functions and variables.
383+
- Aliases work with ``__host__``, ``__device__``, and ``__host__ __device__`` functions and variables.
384+
- The alias attribute uses the syntax ``__attribute__((alias("target_name")))``. Both weak and strong aliases are supported.
385+
- Outside of ``extern "C"``, the alias target must use the mangled name of the aliasee
386+
- The alias is only emitted if the aliasee is emitted on the same side (ie __host__ or __device__), otherwise it is ignored.
387+
388+
Example Usage
389+
-------------
390+
391+
.. code-block:: c++
392+
393+
extern "C" {
394+
// Host function alias
395+
int __HostFunc(void) { return 0; }
396+
int HostFunc(void) __attribute__((weak, alias("__HostFunc")));
397+
398+
// Device function alias
399+
__device__ int __DeviceFunc(void) { return 1; }
400+
__device__ int DeviceFunc(void) __attribute__((weak, alias("__DeviceFunc")));
401+
402+
// Host-device function alias
403+
__host__ __device__ int __BothFunc(void) { return 2; }
404+
__host__ __device__ int BothFunc(void) __attribute__((alias("__BothFunc")));
405+
406+
// Variable alias
407+
int __host_var = 3;
408+
extern int __attribute__((weak, alias("__host_var"))) host_var;
409+
}
410+
// Mangled / overload alias
411+
__host__ __device__ float __Four(float f) { return 2.0f * f; }
412+
__host__ __device__ int Four(void) __attribute__((weak, alias("_Z6__Fourv")));
413+
__host__ __device__ float Four(float f) __attribute__((weak, alias("_Z6__Fourf")));
414+
415+
379416
Host and Device Attributes of Default Destructors
380417
===================================================
381418

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4107,6 +4107,38 @@ template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *D) {
41074107
return D->isImplicit();
41084108
}
41094109

4110+
static bool shouldSkipAliasEmission(const CodeGenModule &CGM,
4111+
const ValueDecl *Global) {
4112+
const LangOptions &LangOpts = CGM.getLangOpts();
4113+
if (!LangOpts.OpenMPIsTargetDevice && !LangOpts.CUDA)
4114+
return false;
4115+
4116+
const auto *AA = Global->getAttr<AliasAttr>();
4117+
GlobalDecl AliaseeGD;
4118+
4119+
// Check if the aliasee exists, if the aliasee is not found, skip the alias
4120+
// emission. This is executed for both the host and device.
4121+
if (!CGM.lookupRepresentativeDecl(AA->getAliasee(), AliaseeGD))
4122+
return true;
4123+
4124+
const auto *AliaseeDecl = dyn_cast<ValueDecl>(AliaseeGD.getDecl());
4125+
if (LangOpts.OpenMPIsTargetDevice)
4126+
return !AliaseeDecl ||
4127+
!OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(AliaseeDecl);
4128+
4129+
// CUDA / HIP
4130+
const bool HasDeviceAttr = Global->hasAttr<CUDADeviceAttr>();
4131+
const bool AliaseeHasDeviceAttr =
4132+
AliaseeDecl && AliaseeDecl->hasAttr<CUDADeviceAttr>();
4133+
4134+
if (LangOpts.CUDAIsDevice)
4135+
return !HasDeviceAttr || !AliaseeHasDeviceAttr;
4136+
4137+
// CUDA / HIP Host
4138+
// we know that the aliasee exists from above, so we know to emit
4139+
return false;
4140+
}
4141+
41104142
bool CodeGenModule::shouldEmitCUDAGlobalVar(const VarDecl *Global) const {
41114143
assert(LangOpts.CUDA && "Should not be called by non-CUDA languages");
41124144
// We need to emit host-side 'shadows' for all global
@@ -4129,8 +4161,11 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
41294161

41304162
// If this is an alias definition (which otherwise looks like a declaration)
41314163
// emit it now.
4132-
if (Global->hasAttr<AliasAttr>())
4164+
if (Global->hasAttr<AliasAttr>()) {
4165+
if (shouldSkipAliasEmission(*this, Global))
4166+
return;
41334167
return EmitAliasDefinition(GD);
4168+
}
41344169

41354170
// IFunc like an alias whose value is resolved at runtime by calling resolver.
41364171
if (Global->hasAttr<IFuncAttr>())
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6
2+
// RUN: %clang_cc1 -x cuda -triple x86_64-unknown-linux-gnu -aux-triple nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
3+
4+
extern "C" {
5+
6+
//.
7+
// HOST: @HostFunc = weak alias i32 (), ptr @__HostFunc
8+
//.
9+
// HOST-LABEL: define dso_local i32 @__HostFunc(
10+
// HOST-SAME: ) #[[ATTR0:[0-9]+]] {
11+
// HOST-NEXT: [[ENTRY:.*:]]
12+
// HOST-NEXT: ret i32 42
13+
//
14+
int __HostFunc(void) { return 42; }
15+
int HostFunc(void) __attribute__ ((weak, alias("__HostFunc")));
16+
17+
}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6
2+
// REQUIRES: amdgpu-registered-target
3+
// RUN: %clang_cc1 -x hip -triple x86_64-unknown-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
4+
// RUN: %clang_cc1 -x hip -triple x86_64-unknown-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
5+
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -emit-llvm %s -fcuda-is-device -o - | FileCheck %s --check-prefix=DEVICE
6+
7+
#define __device__ __attribute__((device))
8+
#define __host__ __attribute__((host))
9+
10+
extern "C" {
11+
//.
12+
// HOST: @HostFunc = weak alias i32 (), ptr @__HostFunc
13+
// HOST: @HostFunc_ = alias i32 (), ptr @__HostFunc
14+
// HOST: @HostVar = weak alias i32, ptr @__HostVar
15+
// HOST: @HostVar_ = alias i32, ptr @__HostVar
16+
// HOST: @Two = weak alias i32 (), ptr @__Two
17+
// HOST: @Two_ = alias i32 (), ptr @__Two
18+
// HOST: @_Z5Threev = weak alias i32 (), ptr @_Z7__Threev
19+
// HOST: @_Z6Three_v = alias i32 (), ptr @_Z7__Threev
20+
// HOST: @_Z4Fourv = weak alias i32 (), ptr @_Z6__Fourv
21+
// HOST: @_Z4Fourf = weak alias float (float), ptr @_Z6__Fourf
22+
//.
23+
// DEVICE: @One = weak alias i32 (), ptr @__One
24+
// DEVICE: @One_ = alias i32 (), ptr @__One
25+
// DEVICE: @Two = weak alias i32 (), ptr @__Two
26+
// DEVICE: @Two_ = alias i32 (), ptr @__Two
27+
// DEVICE: @_Z5Threev = weak alias i32 (), ptr @_Z7__Threev
28+
// DEVICE: @_Z6Three_v = alias i32 (), ptr @_Z7__Threev
29+
// DEVICE: @_Z4Fourv = weak alias i32 (), ptr @_Z6__Fourv
30+
// DEVICE: @_Z4Fourf = weak alias float (float), ptr @_Z6__Fourf
31+
//.
32+
// HOST-LABEL: define dso_local i32 @__HostFunc(
33+
// HOST-SAME: ) #[[ATTR0:[0-9]+]] {
34+
// HOST-NEXT: [[ENTRY:.*:]]
35+
// HOST-NEXT: ret i32 42
36+
//
37+
int __HostFunc(void) { return 42; }
38+
int __HostVar = 1;
39+
int HostFunc(void) __attribute__((weak, alias("__HostFunc")));
40+
int HostFunc_(void) __attribute__((alias("__HostFunc")));
41+
extern int __attribute__((weak, alias("__HostVar"))) HostVar;
42+
extern int __attribute__((alias("__HostVar"))) HostVar_;
43+
44+
// DEVICE-LABEL: define dso_local i32 @__One(
45+
// DEVICE-SAME: ) #[[ATTR0:[0-9]+]] {
46+
// DEVICE-NEXT: [[ENTRY:.*:]]
47+
// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
48+
// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
49+
// DEVICE-NEXT: ret i32 1
50+
//
51+
__device__ int __One(void) { return 1; }
52+
__device__ int One(void) __attribute__((weak, alias("__One")));
53+
__device__ int One_(void) __attribute__((alias("__One")));
54+
55+
// HOST-LABEL: define dso_local i32 @__Two(
56+
// HOST-SAME: ) #[[ATTR0]] {
57+
// HOST-NEXT: [[ENTRY:.*:]]
58+
// HOST-NEXT: ret i32 2
59+
//
60+
// DEVICE-LABEL: define dso_local i32 @__Two(
61+
// DEVICE-SAME: ) #[[ATTR0]] {
62+
// DEVICE-NEXT: [[ENTRY:.*:]]
63+
// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
64+
// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
65+
// DEVICE-NEXT: ret i32 2
66+
//
67+
__host__ __device__ int __Two(void) { return 2; }
68+
__host__ __device__ int Two(void) __attribute__((weak, alias("__Two")));
69+
__host__ __device__ int Two_(void) __attribute__((alias("__Two")));
70+
}
71+
72+
// HOST-LABEL: define linkonce_odr noundef i32 @_Z7__Threev(
73+
// HOST-SAME: ) #[[ATTR0]] comdat {
74+
// HOST-NEXT: [[ENTRY:.*:]]
75+
// HOST-NEXT: ret i32 5
76+
//
77+
// DEVICE-LABEL: define linkonce_odr noundef i32 @_Z7__Threev(
78+
// DEVICE-SAME: ) #[[ATTR0]] comdat {
79+
// DEVICE-NEXT: [[ENTRY:.*:]]
80+
// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
81+
// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
82+
// DEVICE-NEXT: ret i32 5
83+
//
84+
__host__ __device__ constexpr int __Three(void) { return 5; }
85+
__host__ __device__ int Three(void) __attribute__((weak, alias("_Z7__Threev")));
86+
__host__ __device__ int Three_(void) __attribute__((alias("_Z7__Threev")));
87+
88+
89+
// HOST-LABEL: define dso_local noundef i32 @_Z6__Fourv(
90+
// HOST-SAME: ) #[[ATTR0]] {
91+
// HOST-NEXT: [[ENTRY:.*:]]
92+
// HOST-NEXT: ret i32 2
93+
//
94+
// DEVICE-LABEL: define dso_local noundef i32 @_Z6__Fourv(
95+
// DEVICE-SAME: ) #[[ATTR0]] {
96+
// DEVICE-NEXT: [[ENTRY:.*:]]
97+
// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
98+
// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
99+
// DEVICE-NEXT: ret i32 2
100+
//
101+
__host__ __device__ int __Four(void) { return 2; }
102+
// HOST-LABEL: define dso_local noundef float @_Z6__Fourf(
103+
// HOST-SAME: float noundef [[F:%.*]]) #[[ATTR0]] {
104+
// HOST-NEXT: [[ENTRY:.*:]]
105+
// HOST-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
106+
// HOST-NEXT: store float [[F]], ptr [[F_ADDR]], align 4
107+
// HOST-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
108+
// HOST-NEXT: [[MUL:%.*]] = fmul contract float 2.000000e+00, [[TMP0]]
109+
// HOST-NEXT: ret float [[MUL]]
110+
//
111+
// DEVICE-LABEL: define dso_local noundef float @_Z6__Fourf(
112+
// DEVICE-SAME: float noundef [[F:%.*]]) #[[ATTR0]] {
113+
// DEVICE-NEXT: [[ENTRY:.*:]]
114+
// DEVICE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
115+
// DEVICE-NEXT: [[F_ADDR:%.*]] = alloca float, align 4, addrspace(5)
116+
// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
117+
// DEVICE-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
118+
// DEVICE-NEXT: store float [[F]], ptr [[F_ADDR_ASCAST]], align 4
119+
// DEVICE-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR_ASCAST]], align 4
120+
// DEVICE-NEXT: [[MUL:%.*]] = fmul contract float 2.000000e+00, [[TMP0]]
121+
// DEVICE-NEXT: ret float [[MUL]]
122+
//
123+
__host__ __device__ float __Four(float f) { return 2.0f * f; }
124+
__host__ __device__ int Four(void) __attribute__((weak, alias("_Z6__Fourv")));
125+
__host__ __device__ float Four(float f) __attribute__((weak, alias("_Z6__Fourf")));
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6
2+
// REQUIRES: amdgpu-registered-target
3+
4+
// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
5+
// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
6+
// RUN: %clang_cc1 -fopenmp -x c -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=DEVICE
7+
8+
//.
9+
// HOST: @__One_var = global i32 1, align 4
10+
// HOST: @__Two_var = global i32 2, align 4
11+
// HOST: @__Three_var = global i32 3, align 4
12+
// HOST: @.offloading.entry_name = internal unnamed_addr constant [10 x i8] c"__Two_var\00", section ".llvm.rodata.offloading", align 1
13+
// HOST: @.offloading.entry.__Two_var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @__Two_var, ptr @.offloading.entry_name, i64 4, i64 0, ptr null }, section "llvm_offload_entries", align 8
14+
// HOST: @.offloading.entry_name.1 = internal unnamed_addr constant [12 x i8] c"__Three_var\00", section ".llvm.rodata.offloading", align 1
15+
// HOST: @.offloading.entry.__Three_var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @__Three_var, ptr @.offloading.entry_name.1, i64 4, i64 0, ptr null }, section "llvm_offload_entries", align 8
16+
// HOST: @One = weak alias i32 (), ptr @__One
17+
// HOST: @One_ = alias i32 (), ptr @__One
18+
// HOST: @One_var = weak alias i32, ptr @__One_var
19+
// HOST: @One_var_ = alias i32, ptr @__One_var
20+
// HOST: @Two = weak alias i32 (), ptr @__Two
21+
// HOST: @Two_ = alias i32 (), ptr @__Two
22+
// HOST: @Two_var = weak alias i32, ptr @__Two_var
23+
// HOST: @Two_var_ = alias i32, ptr @__Two_var
24+
// HOST: @Three = weak alias i32 (), ptr @__Three
25+
// HOST: @Three_ = alias i32 (), ptr @__Three
26+
// HOST: @Three_var = weak alias i32, ptr @__Three_var
27+
// HOST: @Three_var_ = alias i32, ptr @__Three_var
28+
//.
29+
// DEVICE: @__Two_var = addrspace(1) global i32 2, align 4
30+
// DEVICE: @__Three_var = addrspace(1) global i32 3, align 4
31+
// DEVICE: @Two = weak hidden alias i32 (), ptr @__Two
32+
// DEVICE: @Two_ = hidden alias i32 (), ptr @__Two
33+
// DEVICE: @Two_var = weak alias i32, addrspacecast (ptr addrspace(1) @__Two_var to ptr)
34+
// DEVICE: @Two_var_ = alias i32, addrspacecast (ptr addrspace(1) @__Two_var to ptr)
35+
// DEVICE: @Three = weak hidden alias i32 (), ptr @__Three
36+
// DEVICE: @Three.1 = weak hidden alias i32 (), ptr @__Three
37+
// DEVICE: @Three_ = hidden alias i32 (), ptr @__Three
38+
// DEVICE: @Three_.2 = hidden alias i32 (), ptr @__Three
39+
// DEVICE: @Three_var = weak alias i32, addrspacecast (ptr addrspace(1) @__Three_var to ptr)
40+
// DEVICE: @Three_var_ = alias i32, addrspacecast (ptr addrspace(1) @__Three_var to ptr)
41+
//.
42+
// HOST-LABEL: define dso_local i32 @__One(
43+
// HOST-SAME: ) #[[ATTR0:[0-9]+]] {
44+
// HOST-NEXT: [[ENTRY:.*:]]
45+
// HOST-NEXT: ret i32 1
46+
//
47+
int __One(void) { return 1; }
48+
int One(void) __attribute__ ((weak, alias("__One")));
49+
int One_(void) __attribute__ ((alias("__One")));
50+
51+
int __One_var = 1;
52+
extern int __attribute__((weak, alias("__One_var"))) One_var;
53+
extern int __attribute__((alias("__One_var"))) One_var_;
54+
55+
#pragma omp declare target
56+
// HOST-LABEL: define dso_local i32 @__Two(
57+
// HOST-SAME: ) #[[ATTR0]] {
58+
// HOST-NEXT: [[ENTRY:.*:]]
59+
// HOST-NEXT: ret i32 2
60+
//
61+
// DEVICE-LABEL: define hidden i32 @__Two(
62+
// DEVICE-SAME: ) #[[ATTR0:[0-9]+]] {
63+
// DEVICE-NEXT: [[ENTRY:.*:]]
64+
// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
65+
// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
66+
// DEVICE-NEXT: ret i32 2
67+
//
68+
int __Two(void) { return 2; }
69+
int Two(void) __attribute__ ((weak, alias("__Two")));
70+
int Two_(void) __attribute__ ((alias("__Two")));
71+
72+
int __Two_var = 2;
73+
extern int __attribute__((weak, alias("__Two_var"))) Two_var;
74+
extern int __attribute__((alias("__Two_var"))) Two_var_;
75+
#pragma omp end declare target
76+
77+
#pragma omp declare target
78+
// HOST-LABEL: define dso_local i32 @__Three(
79+
// HOST-SAME: ) #[[ATTR0]] {
80+
// HOST-NEXT: [[ENTRY:.*:]]
81+
// HOST-NEXT: ret i32 3
82+
//
83+
// DEVICE-LABEL: define hidden i32 @__Three(
84+
// DEVICE-SAME: ) #[[ATTR0]] {
85+
// DEVICE-NEXT: [[ENTRY:.*:]]
86+
// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
87+
// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
88+
// DEVICE-NEXT: ret i32 3
89+
//
90+
int __Three(void) { return 3; }
91+
int __Three_var = 3;
92+
#pragma omp end declare target
93+
int Three(void) __attribute__ ((weak, alias("__Three")));
94+
int Three_(void) __attribute__ ((alias("__Three")));
95+
extern int __attribute__((weak, alias("__Three_var"))) Three_var;
96+
extern int __attribute__((alias("__Three_var"))) Three_var_;
97+
//.
98+
// HOST: [[META0:![0-9]+]] = !{i32 1, !"__Two_var", i32 0, i32 0}
99+
// HOST: [[META1:![0-9]+]] = !{i32 1, !"__Three_var", i32 0, i32 1}
100+
//.
101+
// DEVICE: [[META0:![0-9]+]] = !{i32 1, !"__Two_var", i32 0, i32 0}
102+
// DEVICE: [[META1:![0-9]+]] = !{i32 1, !"__Three_var", i32 0, i32 1}
103+
//.

0 commit comments

Comments
 (0)