Skip to content

Commit a2a85f2

Browse files
jdoerferttstellar
authored andcommitted
[Attributor][FIX] Ensure we use the right AAExecutionDomain
Before we might have ended up queriying the AAExecutionDomain of a different function, which resulted in wrong optimistic results. Partially fixes #60425 (cherry picked from commit 18a2975)
1 parent 3fae904 commit a2a85f2

File tree

3 files changed

+64
-4
lines changed

3 files changed

+64
-4
lines changed

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,12 +1043,14 @@ struct AAPointerInfoImpl
10431043
const auto &NoSyncAA = A.getAAFor<AANoSync>(
10441044
QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
10451045
const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>(
1046-
IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL);
1046+
IRPosition::function(Scope), &QueryingAA, DepClassTy::NONE);
10471047
bool AllInSameNoSyncFn = NoSyncAA.isAssumedNoSync();
10481048
bool InstIsExecutedByInitialThreadOnly =
10491049
ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I);
10501050
bool InstIsExecutedInAlignedRegion =
10511051
ExecDomainAA && ExecDomainAA->isExecutedInAlignedRegion(A, I);
1052+
if (InstIsExecutedInAlignedRegion || InstIsExecutedByInitialThreadOnly)
1053+
A.recordDependence(*ExecDomainAA, QueryingAA, DepClassTy::OPTIONAL);
10521054

10531055
InformationCache &InfoCache = A.getInfoCache();
10541056
bool IsThreadLocalObj =
@@ -1063,14 +1065,24 @@ struct AAPointerInfoImpl
10631065
auto CanIgnoreThreadingForInst = [&](const Instruction &I) -> bool {
10641066
if (IsThreadLocalObj || AllInSameNoSyncFn)
10651067
return true;
1066-
if (!ExecDomainAA)
1068+
const auto *FnExecDomainAA =
1069+
I.getFunction() == &Scope
1070+
? ExecDomainAA
1071+
: A.lookupAAFor<AAExecutionDomain>(
1072+
IRPosition::function(*I.getFunction()), &QueryingAA,
1073+
DepClassTy::NONE);
1074+
if (!FnExecDomainAA)
10671075
return false;
10681076
if (InstIsExecutedInAlignedRegion ||
1069-
ExecDomainAA->isExecutedInAlignedRegion(A, I))
1077+
FnExecDomainAA->isExecutedInAlignedRegion(A, I)) {
1078+
A.recordDependence(*FnExecDomainAA, QueryingAA, DepClassTy::OPTIONAL);
10701079
return true;
1080+
}
10711081
if (InstIsExecutedByInitialThreadOnly &&
1072-
ExecDomainAA->isExecutedByInitialThreadOnly(I))
1082+
FnExecDomainAA->isExecutedByInitialThreadOnly(I)) {
1083+
A.recordDependence(*FnExecDomainAA, QueryingAA, DepClassTy::OPTIONAL);
10731084
return true;
1085+
}
10741086
return false;
10751087
};
10761088

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2680,6 +2680,8 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
26802680

26812681
bool isExecutedInAlignedRegion(Attributor &A,
26822682
const Instruction &I) const override {
2683+
assert(I.getFunction() == getAnchorScope() &&
2684+
"Instruction is out of scope!");
26832685
if (!isValidState() || isa<CallBase>(I))
26842686
return false;
26852687

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes --check-globals --include-generated-funcs
2+
; RUN: opt -passes=openmp-opt -S < %s | FileCheck %s --check-prefixes=CHECK
3+
4+
%"struct.ompx::state::TeamStateTy" = type { %"struct.ompx::state::ICVStateTy", i32, i32, ptr }
5+
%"struct.ompx::state::ICVStateTy" = type { i32, i32, i32, i32, i32, i32 }
6+
7+
@_ZN4ompx5state9TeamStateE = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef
8+
9+
define weak_odr amdgpu_kernel void @__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() {
10+
%1 = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false)
11+
ret void
12+
}
13+
14+
define internal i32 @__kmpc_target_init(ptr %0, i8 %1, i1 %2) {
15+
store <2 x i32> zeroinitializer, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16
16+
%4 = call i1 @__kmpc_kernel_parallel()
17+
ret i32 0
18+
}
19+
20+
define internal i1 @__kmpc_kernel_parallel() {
21+
%1 = load ptr, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 8
22+
ret i1 false
23+
}
24+
25+
!llvm.module.flags = !{!0}
26+
27+
!0 = !{i32 7, !"openmp", i32 50}
28+
;.
29+
; CHECK: @[[_ZN4OMPX5STATE9TEAMSTATEE:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef
30+
;.
31+
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() {
32+
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false)
33+
; CHECK-NEXT: ret void
34+
;
35+
;
36+
; CHECK: Function Attrs: norecurse nosync nounwind memory(write)
37+
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
38+
; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] {
39+
; CHECK-NEXT: ret i32 0
40+
;
41+
;.
42+
; CHECK: attributes #[[ATTR0]] = { norecurse nosync nounwind memory(write) }
43+
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nosync nounwind }
44+
;.
45+
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
46+
;.

0 commit comments

Comments
 (0)