Skip to content

Commit bd00ad5

Browse files
jdoerferttstellar
authored andcommitted
[OpenMP][FIX] Ensure to determine aligned regions properly
There were missing checks in the aligned region code, copy-paste errors (= usage of the IsReachedFromAlignedBarrierOnly value instead of IsReachingAlignedBarrierOnly value on the forward pass), and a missing update of the call state for sync declarations and definitions. Partially fixes #60425 (cherry picked from commit 578d507)
1 parent a2a85f2 commit bd00ad5

File tree

2 files changed

+93
-24
lines changed

2 files changed

+93
-24
lines changed

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2682,7 +2682,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
26822682
const Instruction &I) const override {
26832683
assert(I.getFunction() == getAnchorScope() &&
26842684
"Instruction is out of scope!");
2685-
if (!isValidState() || isa<CallBase>(I))
2685+
if (!isValidState())
26862686
return false;
26872687

26882688
const Instruction *CurI;
@@ -2693,14 +2693,18 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
26932693
auto *CB = dyn_cast<CallBase>(CurI);
26942694
if (!CB)
26952695
continue;
2696+
if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) {
2697+
break;
2698+
}
26962699
const auto &It = CEDMap.find(CB);
26972700
if (It == CEDMap.end())
26982701
continue;
2699-
if (!It->getSecond().IsReachedFromAlignedBarrierOnly)
2702+
if (!It->getSecond().IsReachingAlignedBarrierOnly)
27002703
return false;
2704+
break;
27012705
} while ((CurI = CurI->getNextNonDebugInstruction()));
27022706

2703-
if (!CurI && !BEDMap.lookup(I.getParent()).IsReachedFromAlignedBarrierOnly)
2707+
if (!CurI && !BEDMap.lookup(I.getParent()).IsReachingAlignedBarrierOnly)
27042708
return false;
27052709

27062710
// Check backward until a call or the block beginning is reached.
@@ -2709,12 +2713,16 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
27092713
auto *CB = dyn_cast<CallBase>(CurI);
27102714
if (!CB)
27112715
continue;
2716+
if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) {
2717+
break;
2718+
}
27122719
const auto &It = CEDMap.find(CB);
27132720
if (It == CEDMap.end())
27142721
continue;
27152722
if (!AA::isNoSyncInst(A, *CB, *this)) {
2716-
if (It->getSecond().IsReachedFromAlignedBarrierOnly)
2723+
if (It->getSecond().IsReachedFromAlignedBarrierOnly) {
27172724
break;
2725+
}
27182726
return false;
27192727
}
27202728

@@ -3010,7 +3018,8 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
30103018
if (EDAA.getState().isValidState()) {
30113019
const auto &CalleeED = EDAA.getFunctionExecutionDomain();
30123020
ED.IsReachedFromAlignedBarrierOnly =
3013-
CalleeED.IsReachedFromAlignedBarrierOnly;
3021+
CallED.IsReachedFromAlignedBarrierOnly =
3022+
CalleeED.IsReachedFromAlignedBarrierOnly;
30143023
AlignedBarrierLastInBlock = ED.IsReachedFromAlignedBarrierOnly;
30153024
if (IsNoSync || !CalleeED.IsReachedFromAlignedBarrierOnly)
30163025
ED.EncounteredNonLocalSideEffect |=
@@ -3025,8 +3034,9 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
30253034
continue;
30263035
}
30273036
}
3028-
ED.IsReachedFromAlignedBarrierOnly =
3029-
IsNoSync && ED.IsReachedFromAlignedBarrierOnly;
3037+
if (!IsNoSync)
3038+
ED.IsReachedFromAlignedBarrierOnly =
3039+
CallED.IsReachedFromAlignedBarrierOnly = false;
30303040
AlignedBarrierLastInBlock &= ED.IsReachedFromAlignedBarrierOnly;
30313041
ED.EncounteredNonLocalSideEffect |= !CB->doesNotAccessMemory();
30323042
if (!IsNoSync)

llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll

Lines changed: 76 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,20 @@ target triple = "amdgcn-amd-amdhsa"
88

99
@G = internal addrspace(3) global i32 undef, align 4
1010
@H = internal addrspace(3) global i32 undef, align 4
11+
@X = internal addrspace(3) global i32 undef, align 4
1112
@str = private unnamed_addr addrspace(4) constant [1 x i8] c"\00", align 1
1213

1314
; Make sure we do not delete the stores to @G without also replacing the load with `1`.
1415
;.
1516
; TUNIT: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
1617
; TUNIT: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
18+
; TUNIT: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
1719
; TUNIT: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
1820
; TUNIT: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
1921
;.
2022
; CGSCC: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
2123
; CGSCC: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
24+
; CGSCC: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
2225
; CGSCC: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
2326
;.
2427
define void @kernel() "kernel" {
@@ -30,20 +33,17 @@ define void @kernel() "kernel" {
3033
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
3134
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
3235
; CHECK: if.then:
33-
; CHECK-NEXT: store i32 1, ptr addrspace(3) @G, align 4
3436
; CHECK-NEXT: br label [[IF_MERGE:%.*]]
3537
; CHECK: if.else:
36-
; CHECK-NEXT: call void @barrier() #[[ATTR5:[0-9]+]]
37-
; CHECK-NEXT: [[L:%.*]] = load i32, ptr addrspace(3) @G, align 4
38-
; CHECK-NEXT: call void @use1(i32 [[L]]) #[[ATTR5]]
39-
; CHECK-NEXT: call void @barrier() #[[ATTR5]]
38+
; CHECK-NEXT: call void @barrier() #[[ATTR6:[0-9]+]]
39+
; CHECK-NEXT: call void @use1(i32 undef) #[[ATTR6]]
40+
; CHECK-NEXT: call void @barrier() #[[ATTR6]]
4041
; CHECK-NEXT: br label [[IF_MERGE]]
4142
; CHECK: if.merge:
42-
; CHECK-NEXT: call void @use1(i32 2) #[[ATTR5]]
43+
; CHECK-NEXT: call void @use1(i32 2) #[[ATTR6]]
4344
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
4445
; CHECK: if.then2:
45-
; CHECK-NEXT: store i32 2, ptr addrspace(3) @G, align 4
46-
; CHECK-NEXT: call void @barrier() #[[ATTR5]]
46+
; CHECK-NEXT: call void @barrier() #[[ATTR6]]
4747
; CHECK-NEXT: br label [[IF_END]]
4848
; CHECK: if.end:
4949
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr undef, i8 1)
@@ -87,31 +87,90 @@ define void @test_assume() {
8787
ret void
8888
}
8989

90+
; We can't ignore the sync, hence this might store 2 into %p
91+
define void @kernel2(ptr %p) "kernel" {
92+
; CHECK-LABEL: define {{[^@]+}}@kernel2
93+
; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR1:[0-9]+]] {
94+
; CHECK-NEXT: store i32 1, ptr addrspace(3) @X, align 4
95+
; CHECK-NEXT: call void @sync()
96+
; CHECK-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
97+
; CHECK-NEXT: store i32 2, ptr addrspace(3) @X, align 4
98+
; CHECK-NEXT: store i32 [[V]], ptr [[P]], align 4
99+
; CHECK-NEXT: ret void
100+
;
101+
store i32 1, ptr addrspace(3) @X
102+
call void @sync()
103+
%v = load i32, ptr addrspace(3) @X
104+
store i32 2, ptr addrspace(3) @X
105+
store i32 %v, ptr %p
106+
ret void
107+
}
108+
109+
; We can't ignore the sync, hence this might store 2 into %p
110+
define void @kernel3(ptr %p) "kernel" {
111+
; TUNIT-LABEL: define {{[^@]+}}@kernel3
112+
; TUNIT-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
113+
; TUNIT-NEXT: store i32 1, ptr addrspace(3) @X, align 4
114+
; TUNIT-NEXT: call void @sync_def.internalized()
115+
; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
116+
; TUNIT-NEXT: store i32 2, ptr addrspace(3) @X, align 4
117+
; TUNIT-NEXT: store i32 [[V]], ptr [[P]], align 4
118+
; TUNIT-NEXT: ret void
119+
;
120+
; CGSCC-LABEL: define {{[^@]+}}@kernel3
121+
; CGSCC-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
122+
; CGSCC-NEXT: store i32 1, ptr addrspace(3) @X, align 4
123+
; CGSCC-NEXT: call void @sync_def()
124+
; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
125+
; CGSCC-NEXT: store i32 2, ptr addrspace(3) @X, align 4
126+
; CGSCC-NEXT: store i32 [[V]], ptr [[P]], align 4
127+
; CGSCC-NEXT: ret void
128+
;
129+
store i32 1, ptr addrspace(3) @X
130+
call void @sync_def()
131+
%v = load i32, ptr addrspace(3) @X
132+
store i32 2, ptr addrspace(3) @X
133+
store i32 %v, ptr %p
134+
ret void
135+
}
136+
137+
define void @sync_def() {
138+
; CHECK-LABEL: define {{[^@]+}}@sync_def() {
139+
; CHECK-NEXT: call void @sync()
140+
; CHECK-NEXT: ret void
141+
;
142+
call void @sync()
143+
ret void
144+
}
145+
146+
declare void @sync()
90147
declare void @barrier() norecurse nounwind nocallback "llvm.assume"="ompx_aligned_barrier"
91148
declare void @use1(i32) nosync norecurse nounwind nocallback
92149
declare i32 @__kmpc_target_init(ptr, i8, i1) nocallback
93150
declare void @__kmpc_target_deinit(ptr, i8) nocallback
94151
declare void @llvm.assume(i1)
95152

96153
!llvm.module.flags = !{!0, !1}
97-
!nvvm.annotations = !{!2}
154+
!nvvm.annotations = !{!2, !3, !4}
98155

99156
!0 = !{i32 7, !"openmp", i32 50}
100157
!1 = !{i32 7, !"openmp-device", i32 50}
101158
!2 = !{ptr @kernel, !"kernel", i32 1}
159+
!3 = !{ptr @kernel2, !"kernel", i32 1}
160+
!4 = !{ptr @kernel3, !"kernel", i32 1}
102161

103162
;.
104163
; CHECK: attributes #[[ATTR0]] = { norecurse "kernel" }
105-
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" }
106-
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind }
107-
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback }
108-
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
109-
; CHECK: attributes #[[ATTR5]] = { nounwind }
164+
; CHECK: attributes #[[ATTR1]] = { "kernel" }
165+
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" }
166+
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback norecurse nosync nounwind }
167+
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback }
168+
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
169+
; CHECK: attributes #[[ATTR6]] = { nounwind }
110170
;.
111171
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
112172
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
113173
; CHECK: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
174+
; CHECK: [[META3:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1}
175+
; CHECK: [[META4:![0-9]+]] = !{ptr @kernel3, !"kernel", i32 1}
114176
;.
115-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
116-
; CGSCC: {{.*}}
117-
; TUNIT: {{.*}}

0 commit comments

Comments
 (0)