Skip to content

Commit 1a60905

Browse files
authored
[AArch64][InstCombine] Eliminate redundant barrier intrinsics (#112023)
If there are no memory ops on the path from one dmb to another then one barrier can be eliminated.
1 parent 8f8d5f0 commit 1a60905

File tree

2 files changed

+251
-0
lines changed

2 files changed

+251
-0
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ static cl::opt<unsigned>
6666
BaseHistCntCost("aarch64-base-histcnt-cost", cl::init(8), cl::Hidden,
6767
cl::desc("The cost of a histcnt instruction"));
6868

69+
static cl::opt<unsigned> DMBLookaheadThreshold(
70+
"dmb-lookahead-threshold", cl::init(10), cl::Hidden,
71+
cl::desc("The number of instructions to search for a redundant dmb"));
72+
6973
namespace {
7074
class TailFoldingOption {
7175
// These bitfields will only ever be set to something non-zero in operator=,
@@ -2152,13 +2156,40 @@ static std::optional<Instruction *> instCombineSVEInsr(InstCombiner &IC,
21522156
return std::nullopt;
21532157
}
21542158

2159+
static std::optional<Instruction *> instCombineDMB(InstCombiner &IC,
2160+
IntrinsicInst &II) {
2161+
// If this barrier is post-dominated by identical one we can remove it
2162+
auto *NI = II.getNextNonDebugInstruction();
2163+
unsigned LookaheadThreshold = DMBLookaheadThreshold;
2164+
auto CanSkipOver = [](Instruction *I) {
2165+
return !I->mayReadOrWriteMemory() && !I->mayHaveSideEffects();
2166+
};
2167+
while (LookaheadThreshold-- && CanSkipOver(NI)) {
2168+
auto *NIBB = NI->getParent();
2169+
NI = NI->getNextNonDebugInstruction();
2170+
if (!NI) {
2171+
if (auto *SuccBB = NIBB->getUniqueSuccessor())
2172+
NI = SuccBB->getFirstNonPHIOrDbgOrLifetime();
2173+
else
2174+
break;
2175+
}
2176+
}
2177+
auto *NextII = dyn_cast_or_null<IntrinsicInst>(NI);
2178+
if (NextII && II.isIdenticalTo(NextII))
2179+
return IC.eraseInstFromFunction(II);
2180+
2181+
return std::nullopt;
2182+
}
2183+
21552184
std::optional<Instruction *>
21562185
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
21572186
IntrinsicInst &II) const {
21582187
Intrinsic::ID IID = II.getIntrinsicID();
21592188
switch (IID) {
21602189
default:
21612190
break;
2191+
case Intrinsic::aarch64_dmb:
2192+
return instCombineDMB(IC, II);
21622193
case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
21632194
case Intrinsic::aarch64_sve_fcvt_f16f32:
21642195
case Intrinsic::aarch64_sve_fcvt_f16f64:
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=instcombine < %s | FileCheck %s
3+
; ARM64 dmb intrinsics
4+
5+
target triple = "aarch64-unknown-linux-gnu"
6+
7+
declare void @llvm.aarch64.dmb(i32)
8+
declare void @llvm.aarch64.dsb(i32)
9+
declare void @clobber()
10+
declare void @pure() memory(none) willreturn nounwind
11+
declare i32 @llvm.ctlz.i32(i32, i1)
12+
13+
define void @simple() #0 {
14+
; CHECK-LABEL: define void @simple() {
15+
; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10)
16+
; CHECK-NEXT: ret void
17+
;
18+
call void @llvm.aarch64.dmb(i32 10)
19+
call void @llvm.aarch64.dmb(i32 10)
20+
ret void
21+
}
22+
23+
; dmb ish (0xb) is technically stronger than ishst (0xa) but we don't merge for now
24+
define void @simple_nonmatching() #0 {
25+
; CHECK-LABEL: define void @simple_nonmatching() {
26+
; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10)
27+
; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 11)
28+
; CHECK-NEXT: ret void
29+
;
30+
call void @llvm.aarch64.dmb(i32 10)
31+
call void @llvm.aarch64.dmb(i32 11)
32+
ret void
33+
}
34+
35+
define ptr @simple_safe_instruction(ptr %p) #0 {
36+
; CHECK-LABEL: define ptr @simple_safe_instruction(
37+
; CHECK-SAME: ptr [[P:%.*]]) {
38+
; CHECK-NEXT: [[RES:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
39+
; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10)
40+
; CHECK-NEXT: ret ptr [[RES]]
41+
;
42+
call void @llvm.aarch64.dmb(i32 10)
43+
%res = getelementptr inbounds i8, ptr %p, i32 8
44+
call void @llvm.aarch64.dmb(i32 10)
45+
ret ptr %res
46+
}
47+
48+
define i32 @simple_safe_intrinsic(i32 %n) #0 {
49+
; CHECK-LABEL: define i32 @simple_safe_intrinsic(
50+
; CHECK-SAME: i32 [[N:%.*]]) {
51+
; CHECK-NEXT: [[RES:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[N]], i1 false)
52+
; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10)
53+
; CHECK-NEXT: ret i32 [[RES]]
54+
;
55+
call void @llvm.aarch64.dmb(i32 10)
56+
%res = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
57+
call void @llvm.aarch64.dmb(i32 10)
58+
ret i32 %res
59+
}
60+
61+
define void @simple_unsafe_intrinsic() #0 {
62+
; CHECK-LABEL: define void @simple_unsafe_intrinsic() {
63+
; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10)
64+
; CHECK-NEXT: call void @llvm.aarch64.dsb(i32 10)
65+
; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10)
66+
; CHECK-NEXT: ret void
67+
;
68+
call void @llvm.aarch64.dmb(i32 10)
69+
call void @llvm.aarch64.dsb(i32 10)
70+
call void @llvm.aarch64.dmb(i32 10)
71+
ret void
72+
}
73+
74+
define void @simple_safe_unsafe_instruction(ptr %p) #0 {
75+
; CHECK-LABEL: define void @simple_safe_unsafe_instruction(
76+
; CHECK-SAME: ptr [[P:%.*]]) {
77+
; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10)
78+
; CHECK-NEXT: store i32 42, ptr [[P]], align 4
79+
; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10)
80+
; CHECK-NEXT: ret void
81+
;
82+
call void @llvm.aarch64.dmb(i32 10)
83+
store i32 42, ptr %p
84+
call void @llvm.aarch64.dmb(i32 10)
85+
ret void
86+
}
87+
88+
define void @simple_safe_unsafe_call(ptr %p) #0 {
89+
; CHECK-LABEL: define void @simple_safe_unsafe_call(
90+
; CHECK-SAME: ptr [[P:%.*]]) {
91+
; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10)
92+
; CHECK-NEXT: call void @clobber()
93+
; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10)
94+
; CHECK-NEXT: ret void
95+
;
96+
call void @llvm.aarch64.dmb(i32 10)
97+
call void @clobber()
98+
call void @llvm.aarch64.dmb(i32 10)
99+
ret void
100+
}
101+
102+
define void @simple_safe_safe_call(ptr %p) #0 {
103+
; CHECK-LABEL: define void @simple_safe_safe_call(
104+
; CHECK-SAME: ptr [[P:%.*]]) {
105+
; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10)
106+
; CHECK-NEXT: ret void
107+
;
108+
call void @llvm.aarch64.dmb(i32 10)
109+
call void @pure()
110+
call void @llvm.aarch64.dmb(i32 10)
111+
ret void
112+
}
113+
114+
define void @multiple_bbs1(i1 %f) #0 {
115+
; CHECK-LABEL: define void @multiple_bbs1(
116+
; CHECK-SAME: i1 [[F:%.*]]) {
117+
; CHECK-NEXT: [[ENTRY:.*:]]
118+
; CHECK-NEXT: br i1 [[F]], label %[[BB_T:.*]], label %[[BB_F:.*]]
119+
; CHECK: [[BB_T]]:
120+
; CHECK-NEXT: br label %[[EXIT:.*]]
121+
; CHECK: [[BB_F]]:
122+
; CHECK-NEXT: br label %[[EXIT]]
123+
; CHECK: [[EXIT]]:
124+
; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10)
125+
; CHECK-NEXT: ret void
126+
;
127+
entry:
128+
br i1 %f, label %bb_t, label %bb_f
129+
bb_t:
130+
call void @llvm.aarch64.dmb(i32 10)
131+
br label %exit
132+
bb_f:
133+
call void @llvm.aarch64.dmb(i32 10)
134+
br label %exit
135+
exit:
136+
call void @llvm.aarch64.dmb(i32 10)
137+
ret void
138+
}
139+
140+
define void @multiple_bbs2(i1 %f) #0 {
141+
; CHECK-LABEL: define void @multiple_bbs2(
142+
; CHECK-SAME: i1 [[F:%.*]]) {
143+
; CHECK-NEXT: [[ENTRY:.*:]]
144+
; CHECK-NEXT: br i1 [[F]], label %[[BB_T:.*]], label %[[BB_F:.*]]
145+
; CHECK: [[BB_T]]:
146+
; CHECK-NEXT: br label %[[EXIT:.*]]
147+
; CHECK: [[BB_F]]:
148+
; CHECK-NEXT: br label %[[EXIT]]
149+
; CHECK: [[EXIT]]:
150+
; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10)
151+
; CHECK-NEXT: ret void
152+
;
153+
entry:
154+
br i1 %f, label %bb_t, label %bb_f
155+
bb_t:
156+
call void @llvm.aarch64.dmb(i32 10)
157+
br label %exit
158+
bb_f:
159+
br label %exit
160+
exit:
161+
call void @llvm.aarch64.dmb(i32 10)
162+
ret void
163+
}
164+
165+
define void @multiple_bbs3(i1 %f, ptr %p) #0 {
166+
; CHECK-LABEL: define void @multiple_bbs3(
167+
; CHECK-SAME: i1 [[F:%.*]], ptr [[P:%.*]]) {
168+
; CHECK-NEXT: [[ENTRY:.*:]]
169+
; CHECK-NEXT: br i1 [[F]], label %[[BB_T:.*]], label %[[BB_F:.*]]
170+
; CHECK: [[BB_T]]:
171+
; CHECK-NEXT: br label %[[EXIT:.*]]
172+
; CHECK: [[BB_F]]:
173+
; CHECK-NEXT: store i32 42, ptr [[P]], align 4
174+
; CHECK-NEXT: br label %[[EXIT]]
175+
; CHECK: [[EXIT]]:
176+
; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10)
177+
; CHECK-NEXT: ret void
178+
;
179+
entry:
180+
br i1 %f, label %bb_t, label %bb_f
181+
bb_t:
182+
call void @llvm.aarch64.dmb(i32 10)
183+
br label %exit
184+
bb_f:
185+
store i32 42, ptr %p
186+
br label %exit
187+
exit:
188+
call void @llvm.aarch64.dmb(i32 10)
189+
ret void
190+
}
191+
192+
define void @multiple_bbs_unsafe(i1 %f, ptr %p) #0 {
193+
; CHECK-LABEL: define void @multiple_bbs_unsafe(
194+
; CHECK-SAME: i1 [[F:%.*]], ptr [[P:%.*]]) {
195+
; CHECK-NEXT: [[ENTRY:.*:]]
196+
; CHECK-NEXT: br i1 [[F]], label %[[BB_T:.*]], label %[[BB_F:.*]]
197+
; CHECK: [[BB_T]]:
198+
; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10)
199+
; CHECK-NEXT: store i32 42, ptr [[P]], align 4
200+
; CHECK-NEXT: br label %[[EXIT:.*]]
201+
; CHECK: [[BB_F]]:
202+
; CHECK-NEXT: br label %[[EXIT]]
203+
; CHECK: [[EXIT]]:
204+
; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10)
205+
; CHECK-NEXT: ret void
206+
;
207+
entry:
208+
br i1 %f, label %bb_t, label %bb_f
209+
bb_t:
210+
call void @llvm.aarch64.dmb(i32 10)
211+
store i32 42, ptr %p
212+
br label %exit
213+
bb_f:
214+
call void @llvm.aarch64.dmb(i32 10)
215+
br label %exit
216+
exit:
217+
call void @llvm.aarch64.dmb(i32 10)
218+
ret void
219+
}
220+

0 commit comments

Comments
 (0)