-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[OpenMP][clang][HIP][CUDA] fix weak alias emit on device compilation #164326
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
26b90e0
16c1a68
73f219f
7fd51cc
2949485
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4065,8 +4065,40 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { | |
|
|
||
| // If this is an alias definition (which otherwise looks like a declaration) | ||
| // emit it now. | ||
| if (Global->hasAttr<AliasAttr>()) | ||
| if (Global->hasAttr<AliasAttr>()) { | ||
| if (LangOpts.OpenMPIsTargetDevice || LangOpts.CUDA) { | ||
| const auto *AA = Global->getAttr<AliasAttr>(); | ||
| assert(AA && "Not an alias?"); | ||
|
||
| GlobalDecl AliaseeGD; | ||
| if (!lookupRepresentativeDecl(AA->getAliasee(), AliaseeGD)) { | ||
| if (LangOpts.CUDA) | ||
| // Failed to find aliasee on device side, skip emitting | ||
|
||
| return; | ||
| } else { | ||
| const auto *AliaseeDecl = dyn_cast<ValueDecl>(AliaseeGD.getDecl()); | ||
| if (LangOpts.OpenMPIsTargetDevice) { | ||
| if (!AliaseeDecl || | ||
| !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( | ||
| AliaseeDecl)) | ||
| // Not a target declaration, skip emitting | ||
| return; | ||
| } else { | ||
| // HIP/CUDA | ||
| const bool HasDeviceAttr = Global->hasAttr<CUDADeviceAttr>(); | ||
| const bool AliaseeHasDeviceAttr = | ||
| AliaseeDecl && AliaseeDecl->hasAttr<CUDADeviceAttr>(); | ||
| if (LangOpts.CUDAIsDevice) { | ||
| if (!HasDeviceAttr || !AliaseeHasDeviceAttr) | ||
| return; | ||
| } else if (HasDeviceAttr && AliaseeHasDeviceAttr) { | ||
| // Alias is only on device side, skip emitting on host side | ||
| return; | ||
|
||
| } | ||
| } | ||
| } | ||
| } | ||
| return EmitAliasDefinition(GD); | ||
| } | ||
|
|
||
| // IFunc like an alias whose value is resolved at runtime by calling resolver. | ||
| if (Global->hasAttr<IFuncAttr>()) | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
| @@ -0,0 +1,36 @@ | ||||
| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6 | ||||
| // REQUIRES: nvptx-registered-target | ||||
|
||||
| // REQUIRES: nvptx-registered-target |
This shouldn't be necessary since we're not hitting PTX codegen, here and elsewhere. Also I know these tests are autogenerated, but we don't need to check the attributes. Totally find to trim things from the autogenerated output.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Trimmed up the checks, in 2949485, I kept the function definitions as they technically relate to the issue, but I also see an argument to also removing those.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why main?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
removed in: 16c1a68
I'm still new to LIT tests, did not know main() was not needed.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,63 @@ | ||
| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6 | ||
| // REQUIRES: amdgpu-registered-target | ||
| // RUN: %clang_cc1 -x hip -triple x86_64-unknown-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc | ||
| // RUN: %clang_cc1 -x hip -triple x86_64-unknown-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST | ||
| // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -emit-llvm %s -fcuda-is-device -o - | FileCheck %s --check-prefix=DEVICE | ||
|
|
||
| #define __device__ __attribute__((device)) | ||
|
|
||
| extern "C" { | ||
|
|
||
| //. | ||
| // HOST: @__hip_cuid_ = global i8 0 | ||
| // HOST: @llvm.compiler.used = appending global [1 x ptr] [ptr @__hip_cuid_], section "llvm.metadata" | ||
| // HOST: @HostFunc = weak alias i32 (), ptr @__HostFunc | ||
| //. | ||
| // DEVICE: @__hip_cuid_ = addrspace(1) global i8 0 | ||
| // DEVICE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata" | ||
| // DEVICE: @One = weak alias i32 (), ptr @__One | ||
| //. | ||
| // HOST-LABEL: define dso_local i32 @__HostFunc( | ||
| // HOST-SAME: ) #[[ATTR0:[0-9]+]] { | ||
| // HOST-NEXT: [[ENTRY:.*:]] | ||
| // HOST-NEXT: ret i32 42 | ||
| // | ||
| int __HostFunc(void) { return 42; } | ||
| int HostFunc(void) __attribute__ ((weak, alias("__HostFunc"))); | ||
|
|
||
| // DEVICE-LABEL: define dso_local i32 @__One( | ||
| // DEVICE-SAME: ) #[[ATTR0:[0-9]+]] { | ||
| // DEVICE-NEXT: [[ENTRY:.*:]] | ||
| // DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) | ||
| // DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr | ||
| // DEVICE-NEXT: ret i32 2 | ||
| // | ||
| __device__ int __One(void) { return 2; } | ||
| __device__ int One(void) __attribute__ ((weak, alias("__One"))); | ||
|
|
||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we add lit tests for:
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added the cases for HIP and amdgcn for OpenMP in 16c1a68, The cases work without modification of the patch. |
||
| } | ||
|
|
||
| // HOST-LABEL: define dso_local noundef i32 @main( | ||
| // HOST-SAME: ) #[[ATTR1:[0-9]+]] { | ||
| // HOST-NEXT: [[ENTRY:.*:]] | ||
| // HOST-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 | ||
| // HOST-NEXT: store i32 0, ptr [[RETVAL]], align 4 | ||
| // HOST-NEXT: ret i32 0 | ||
| // | ||
| int main() { | ||
| return 0; | ||
| } | ||
| //. | ||
| // HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } | ||
| // HOST: attributes #[[ATTR1]] = { mustprogress noinline norecurse nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } | ||
| //. | ||
| // DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } | ||
| //. | ||
| // HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} | ||
| // HOST: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} | ||
| //. | ||
| // DEVICE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} | ||
| // DEVICE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} | ||
| // DEVICE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} | ||
| // DEVICE: [[META3:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} | ||
| //. | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,90 @@ | ||
| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6 | ||
| // REQUIRES: amdgpu-registered-target | ||
|
|
||
| // RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc | ||
| // RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST | ||
| // RUN: %clang_cc1 -fopenmp -x c -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=DEVICE | ||
|
|
||
| //. | ||
| // HOST: @One = weak alias i32 (), ptr @__One | ||
| // HOST: @Two = weak alias i32 (), ptr @__Two | ||
| // HOST: @Three = weak alias i32 (), ptr @__Three | ||
| //. | ||
| // DEVICE: @__omp_rtl_debug_kind = weak_odr hidden addrspace(1) constant i32 0 | ||
| // DEVICE: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden addrspace(1) constant i32 0 | ||
| // DEVICE: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden addrspace(1) constant i32 0 | ||
| // DEVICE: @__omp_rtl_assume_no_thread_state = weak_odr hidden addrspace(1) constant i32 0 | ||
| // DEVICE: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden addrspace(1) constant i32 0 | ||
| // DEVICE: @Two = weak hidden alias i32 (), ptr @__Two | ||
| // DEVICE: @Three = weak hidden alias i32 (), ptr @__Three | ||
| // DEVICE: @Three.1 = weak hidden alias i32 (), ptr @__Three | ||
| //. | ||
| // HOST-LABEL: define dso_local i32 @__One( | ||
| // HOST-SAME: ) #[[ATTR0:[0-9]+]] { | ||
| // HOST-NEXT: [[ENTRY:.*:]] | ||
| // HOST-NEXT: ret i32 1 | ||
| // | ||
| int __One(void) { return 1; } | ||
| int One(void) __attribute__ ((weak, alias("__One"))); | ||
|
|
||
| #pragma omp declare target | ||
| // HOST-LABEL: define dso_local i32 @__Two( | ||
| // HOST-SAME: ) #[[ATTR0]] { | ||
| // HOST-NEXT: [[ENTRY:.*:]] | ||
| // HOST-NEXT: ret i32 2 | ||
| // | ||
| // DEVICE-LABEL: define hidden i32 @__Two( | ||
| // DEVICE-SAME: ) #[[ATTR0:[0-9]+]] { | ||
| // DEVICE-NEXT: [[ENTRY:.*:]] | ||
| // DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) | ||
| // DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr | ||
| // DEVICE-NEXT: ret i32 2 | ||
| // | ||
| int __Two(void) { return 2; } | ||
| int Two(void) __attribute__ ((weak, alias("__Two"))); | ||
| #pragma omp end declare target | ||
|
|
||
| #pragma omp declare target | ||
| // HOST-LABEL: define dso_local i32 @__Three( | ||
| // HOST-SAME: ) #[[ATTR0]] { | ||
| // HOST-NEXT: [[ENTRY:.*:]] | ||
| // HOST-NEXT: ret i32 3 | ||
| // | ||
| // DEVICE-LABEL: define hidden i32 @__Three( | ||
| // DEVICE-SAME: ) #[[ATTR0]] { | ||
| // DEVICE-NEXT: [[ENTRY:.*:]] | ||
| // DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) | ||
| // DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr | ||
| // DEVICE-NEXT: ret i32 3 | ||
| // | ||
| int __Three(void) { return 3; } | ||
| #pragma omp end declare target | ||
| int Three(void) __attribute__ ((weak, alias("__Three"))); | ||
|
|
||
|
|
||
| // HOST-LABEL: define dso_local i32 @main( | ||
| // HOST-SAME: ) #[[ATTR0]] { | ||
| // HOST-NEXT: [[ENTRY:.*:]] | ||
| // HOST-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 | ||
| // HOST-NEXT: store i32 0, ptr [[RETVAL]], align 4 | ||
| // HOST-NEXT: ret i32 0 | ||
| // | ||
| int main(){ | ||
| return 0; | ||
| } | ||
|
|
||
| //. | ||
| // HOST: attributes #[[ATTR0]] = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } | ||
| //. | ||
| // DEVICE: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } | ||
| //. | ||
| // HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} | ||
| // HOST: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 51} | ||
| // HOST: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} | ||
| //. | ||
| // DEVICE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} | ||
| // DEVICE: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} | ||
| // DEVICE: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 51} | ||
| // DEVICE: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 51} | ||
| // DEVICE: [[META4:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} | ||
| //. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6 | ||
| // REQUIRES: nvptx-registered-target | ||
|
|
||
| // RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s | ||
|
|
||
| //. | ||
| // CHECK: @One = weak alias i32 (), ptr @__One | ||
| //. | ||
| // CHECK-LABEL: define dso_local i32 @__One( | ||
| // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { | ||
| // CHECK-NEXT: [[ENTRY:.*:]] | ||
| // CHECK-NEXT: ret i32 1 | ||
| // | ||
| int __One(void) { return 1; } | ||
| int One(void) __attribute__ ((weak, alias("__One"))); | ||
|
|
||
|
|
||
| // CHECK-LABEL: define dso_local i32 @main( | ||
| // CHECK-SAME: ) #[[ATTR0]] { | ||
| // CHECK-NEXT: [[ENTRY:.*:]] | ||
| // CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 | ||
| // CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4 | ||
| // CHECK-NEXT: ret i32 0 | ||
| // | ||
| int main(){ | ||
| return 0; | ||
| } | ||
| //. | ||
| // CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } | ||
| //. | ||
| // CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} | ||
| // CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 51} | ||
| // CHECK: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} | ||
| //. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All of this should be a helper function that returns a bool most likely.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Refactor done in: 16c1a68