Skip to content

Commit 62fd332

Browse files
nimit25Nimit Sachdeva
andauthored
[InstCombine] Optimize usub.sat pattern (#151044)
Fixes #79690 Generalized proof: https://alive2.llvm.org/ce/z/22ybrr --------- Co-authored-by: Nimit Sachdeva <[email protected]>
1 parent 6c3db64 commit 62fd332

File tree

2 files changed

+375
-0
lines changed

2 files changed

+375
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1993,6 +1993,63 @@ Value *InstCombinerImpl::foldSelectWithConstOpToBinOp(ICmpInst *Cmp,
19931993
return BinOp;
19941994
}
19951995

1996+
/// Folds:
1997+
/// %a_sub = call @llvm.usub.sat(x, IntConst1)
1998+
/// %b_sub = call @llvm.usub.sat(y, IntConst2)
1999+
/// %or = or %a_sub, %b_sub
2000+
/// %cmp = icmp eq %or, 0
2001+
/// %sel = select %cmp, 0, MostSignificantBit
2002+
/// into:
2003+
/// %a_sub' = usub.sat(x, IntConst1 - MostSignificantBit)
2004+
/// %b_sub' = usub.sat(y, IntConst2 - MostSignificantBit)
2005+
/// %or = or %a_sub', %b_sub'
2006+
/// %and = and %or, MostSignificantBit
2007+
/// Likewise, for vector arguments as well.
2008+
static Instruction *foldICmpUSubSatWithAndForMostSignificantBitCmp(
2009+
SelectInst &SI, ICmpInst *ICI, InstCombiner::BuilderTy &Builder) {
2010+
if (!SI.hasOneUse() || !ICI->hasOneUse())
2011+
return nullptr;
2012+
CmpPredicate Pred;
2013+
Value *A, *B;
2014+
const APInt *Constant1, *Constant2;
2015+
if (!match(SI.getCondition(),
2016+
m_ICmp(Pred,
2017+
m_OneUse(m_Or(m_OneUse(m_Intrinsic<Intrinsic::usub_sat>(
2018+
m_Value(A), m_APInt(Constant1))),
2019+
m_OneUse(m_Intrinsic<Intrinsic::usub_sat>(
2020+
m_Value(B), m_APInt(Constant2))))),
2021+
m_Zero())))
2022+
return nullptr;
2023+
2024+
Value *TrueVal = SI.getTrueValue();
2025+
Value *FalseVal = SI.getFalseValue();
2026+
if (!(Pred == ICmpInst::ICMP_EQ &&
2027+
(match(TrueVal, m_Zero()) && match(FalseVal, m_SignMask()))) ||
2028+
(Pred == ICmpInst::ICMP_NE &&
2029+
(match(TrueVal, m_SignMask()) && match(FalseVal, m_Zero()))))
2030+
return nullptr;
2031+
2032+
auto *Ty = A->getType();
2033+
unsigned BW = Constant1->getBitWidth();
2034+
APInt MostSignificantBit = APInt::getSignMask(BW);
2035+
2036+
// Anything over MSB is negative
2037+
if (Constant1->isNonNegative() || Constant2->isNonNegative())
2038+
return nullptr;
2039+
2040+
APInt AdjAP1 = *Constant1 - MostSignificantBit + 1;
2041+
APInt AdjAP2 = *Constant2 - MostSignificantBit + 1;
2042+
2043+
auto *Adj1 = ConstantInt::get(Ty, AdjAP1);
2044+
auto *Adj2 = ConstantInt::get(Ty, AdjAP2);
2045+
2046+
Value *NewA = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, A, Adj1);
2047+
Value *NewB = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, B, Adj2);
2048+
Value *Or = Builder.CreateOr(NewA, NewB);
2049+
Constant *MSBConst = ConstantInt::get(Ty, MostSignificantBit);
2050+
return BinaryOperator::CreateAnd(Or, MSBConst);
2051+
}
2052+
19962053
/// Visit a SelectInst that has an ICmpInst as its first operand.
19972054
Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
19982055
ICmpInst *ICI) {
@@ -2009,6 +2066,9 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
20092066
if (Instruction *NewSel =
20102067
tryToReuseConstantFromSelectInComparison(SI, *ICI, *this))
20112068
return NewSel;
2069+
if (Instruction *Folded =
2070+
foldICmpUSubSatWithAndForMostSignificantBitCmp(SI, ICI, Builder))
2071+
return Folded;
20122072

20132073
// NOTE: if we wanted to, this is where to detect integer MIN/MAX
20142074
bool Changed = false;
Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
3+
; RUN: opt -passes=instcombine -S < %s 2>&1 | FileCheck %s
4+
5+
define i8 @test_i8(i8 %a, i8 %b) {
6+
; CHECK-LABEL: define i8 @test_i8(
7+
; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
8+
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A]], i8 96)
9+
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 112)
10+
; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP1]], [[TMP2]]
11+
; CHECK-NEXT: [[RES:%.*]] = and i8 [[TMP3]], -128
12+
; CHECK-NEXT: ret i8 [[RES]]
13+
;
14+
15+
%a_sub = call i8 @llvm.usub.sat.i8(i8 %a, i8 223)
16+
%b_sub = call i8 @llvm.usub.sat.i8(i8 %b, i8 239)
17+
%or = or i8 %a_sub, %b_sub
18+
%cmp = icmp eq i8 %or, 0
19+
%res = select i1 %cmp, i8 0, i8 128
20+
ret i8 %res
21+
}
22+
23+
define i8 @test_i8_ne(i8 %a, i8 %b) {
24+
; CHECK-LABEL: define i8 @test_i8_ne(
25+
; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
26+
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A]], i8 96)
27+
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 112)
28+
; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP1]], [[TMP2]]
29+
; CHECK-NEXT: [[RES:%.*]] = and i8 [[TMP3]], -128
30+
; CHECK-NEXT: ret i8 [[RES]]
31+
;
32+
33+
%a_sub = call i8 @llvm.usub.sat.i8(i8 %a, i8 223)
34+
%b_sub = call i8 @llvm.usub.sat.i8(i8 %b, i8 239)
35+
%or = or i8 %a_sub, %b_sub
36+
%cmp = icmp ne i8 %or, 0
37+
%res = select i1 %cmp, i8 128, i8 0
38+
ret i8 %res
39+
}
40+
41+
define i16 @test_i16(i16 %a, i16 %b) {
42+
; CHECK-LABEL: define i16 @test_i16(
43+
; CHECK-SAME: i16 [[A:%.*]], i16 [[B:%.*]]) {
44+
; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[A]], i16 32642)
45+
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[B]], i16 32656)
46+
; CHECK-NEXT: [[TMP3:%.*]] = or i16 [[TMP1]], [[TMP2]]
47+
; CHECK-NEXT: [[RES:%.*]] = and i16 [[TMP3]], -32768
48+
; CHECK-NEXT: ret i16 [[RES]]
49+
;
50+
51+
%a_sub = call i16 @llvm.usub.sat.i16(i16 %a, i16 65409)
52+
%b_sub = call i16 @llvm.usub.sat.i16(i16 %b, i16 65423)
53+
%or = or i16 %a_sub, %b_sub
54+
%cmp = icmp eq i16 %or, 0
55+
%res = select i1 %cmp, i16 0, i16 32768
56+
ret i16 %res
57+
}
58+
59+
define i32 @test_i32(i32 %a, i32 %b) {
60+
; CHECK-LABEL: define i32 @test_i32(
61+
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
62+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 224)
63+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[B]], i32 240)
64+
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]]
65+
; CHECK-NEXT: [[RES:%.*]] = and i32 [[TMP3]], -2147483648
66+
; CHECK-NEXT: ret i32 [[RES]]
67+
;
68+
69+
%a_sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 2147483871)
70+
%b_sub = call i32 @llvm.usub.sat.i32(i32 %b, i32 2147483887)
71+
%or = or i32 %a_sub, %b_sub
72+
%cmp = icmp eq i32 %or, 0
73+
%res = select i1 %cmp, i32 0, i32 2147483648
74+
ret i32 %res
75+
}
76+
77+
define i64 @test_i64(i64 %a, i64 %b) {
78+
; CHECK-LABEL: define i64 @test_i64(
79+
; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) {
80+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A]], i64 224)
81+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[B]], i64 240)
82+
; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]]
83+
; CHECK-NEXT: [[RES:%.*]] = and i64 [[TMP3]], -9223372036854775808
84+
; CHECK-NEXT: ret i64 [[RES]]
85+
;
86+
87+
%a_sub = call i64 @llvm.usub.sat.i64(i64 %a, i64 9223372036854776031)
88+
%b_sub = call i64 @llvm.usub.sat.i64(i64 %b, i64 9223372036854776047)
89+
%or = or i64 %a_sub, %b_sub
90+
%cmp = icmp eq i64 %or, 0
91+
%res = select i1 %cmp, i64 0, i64 9223372036854775808
92+
ret i64 %res
93+
}
94+
95+
define i32 @no_fold_due_to_small_K(i32 %a, i32 %b) {
96+
; CHECK-LABEL: define i32 @no_fold_due_to_small_K(
97+
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
98+
; CHECK-NEXT: [[A_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 100)
99+
; CHECK-NEXT: [[B_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[B]], i32 239)
100+
; CHECK-NEXT: [[OR:%.*]] = or i32 [[A_SUB]], [[B_SUB]]
101+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[OR]], 0
102+
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 0, i32 -2147483648
103+
; CHECK-NEXT: ret i32 [[RES]]
104+
;
105+
106+
%a_sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 100)
107+
%b_sub = call i32 @llvm.usub.sat.i32(i32 %b, i32 239)
108+
%or = or i32 %a_sub, %b_sub
109+
%cmp = icmp eq i32 %or, 0
110+
%res = select i1 %cmp, i32 0, i32 2147483648
111+
ret i32 %res
112+
}
113+
114+
define i32 @commuted_test_neg(i32 %a, i32 %b) {
115+
; CHECK-LABEL: define i32 @commuted_test_neg(
116+
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
117+
; CHECK-NEXT: [[B_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[B]], i32 239)
118+
; CHECK-NEXT: [[A_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 223)
119+
; CHECK-NEXT: [[OR:%.*]] = or i32 [[B_SUB]], [[A_SUB]]
120+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[OR]], 0
121+
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 0, i32 -2147483648
122+
; CHECK-NEXT: ret i32 [[RES]]
123+
;
124+
125+
%b_sub = call i32 @llvm.usub.sat.i32(i32 %b, i32 239)
126+
%a_sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 223)
127+
%or = or i32 %b_sub, %a_sub
128+
%cmp = icmp eq i32 %or, 0
129+
%res = select i1 %cmp, i32 0, i32 2147483648
130+
ret i32 %res
131+
}
132+
define <4 x i32> @vector_test(<4 x i32> %a, <4 x i32> %b) {
133+
; CHECK-LABEL: define <4 x i32> @vector_test(
134+
; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) {
135+
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A]], <4 x i32> splat (i32 224))
136+
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[B]], <4 x i32> splat (i32 240))
137+
; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
138+
; CHECK-NEXT: [[RES:%.*]] = and <4 x i32> [[TMP3]], splat (i32 -2147483648)
139+
; CHECK-NEXT: ret <4 x i32> [[RES]]
140+
;
141+
142+
143+
%a_sub = call <4 x i32> @llvm.usub.sat.v4i32(
144+
<4 x i32> %a, <4 x i32> splat (i32 2147483871))
145+
%b_sub = call <4 x i32> @llvm.usub.sat.v4i32(
146+
<4 x i32> %b, <4 x i32> splat (i32 2147483887))
147+
%or = or <4 x i32> %a_sub, %b_sub
148+
%cmp = icmp eq <4 x i32> %or, zeroinitializer
149+
%res = select <4 x i1> %cmp,
150+
<4 x i32> zeroinitializer,
151+
<4 x i32> splat (i32 -2147483648)
152+
ret <4 x i32> %res
153+
}
154+
155+
define <4 x i32> @vector_negative_test(<4 x i32> %a, <4 x i32> %b) {
156+
; CHECK-LABEL: define <4 x i32> @vector_negative_test(
157+
; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) {
158+
; CHECK-NEXT: [[A_SUB:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A]], <4 x i32> <i32 -2147483425, i32 0, i32 -2147483425, i32 -2147483425>)
159+
; CHECK-NEXT: [[B_SUB:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[B]], <4 x i32> splat (i32 -2147483409))
160+
; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[A_SUB]], [[B_SUB]]
161+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[OR]], zeroinitializer
162+
; CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 -2147483648)
163+
; CHECK-NEXT: ret <4 x i32> [[RES]]
164+
;
165+
%a_sub = call <4 x i32> @llvm.usub.sat.v4i32(
166+
<4 x i32> %a,
167+
<4 x i32> <i32 2147483871, i32 0, i32 2147483871, i32 2147483871>)
168+
%b_sub = call <4 x i32> @llvm.usub.sat.v4i32(
169+
<4 x i32> %b,
170+
<4 x i32> <i32 2147483887, i32 2147483887, i32 2147483887, i32 2147483887>)
171+
%or = or <4 x i32> %a_sub, %b_sub
172+
%cmp = icmp eq <4 x i32> %or, zeroinitializer
173+
%res = select <4 x i1> %cmp, <4 x i32> zeroinitializer,
174+
<4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
175+
ret <4 x i32> %res
176+
}
177+
178+
define <4 x i32> @vector_ne_test(<4 x i32> %a, <4 x i32> %b) {
179+
; CHECK-LABEL: define <4 x i32> @vector_ne_test(
180+
; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) {
181+
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A]], <4 x i32> splat (i32 224))
182+
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[B]], <4 x i32> splat (i32 240))
183+
; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
184+
; CHECK-NEXT: [[RES:%.*]] = and <4 x i32> [[TMP3]], splat (i32 -2147483648)
185+
; CHECK-NEXT: ret <4 x i32> [[RES]]
186+
;
187+
188+
189+
%a_sub = call <4 x i32> @llvm.usub.sat.v4i32(
190+
<4 x i32> %a, <4 x i32> splat (i32 2147483871))
191+
%b_sub = call <4 x i32> @llvm.usub.sat.v4i32(
192+
<4 x i32> %b, <4 x i32> splat (i32 2147483887))
193+
%or = or <4 x i32> %a_sub, %b_sub
194+
%cmp = icmp eq <4 x i32> %or, zeroinitializer
195+
%res = select <4 x i1> %cmp,
196+
<4 x i32> zeroinitializer,
197+
<4 x i32> splat (i32 -2147483648)
198+
ret <4 x i32> %res
199+
}
200+
201+
declare i1 @id_i1(i1)
202+
203+
204+
define i1 @multi_use_icmp(i32 %a, i32 %b) {
205+
; CHECK-LABEL: define i1 @multi_use_icmp(
206+
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
207+
; CHECK-NEXT: [[A_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 5)
208+
; CHECK-NEXT: [[B_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[B]], i32 7)
209+
; CHECK-NEXT: [[OR:%.*]] = or i32 [[A_SUB]], [[B_SUB]]
210+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[OR]], 0
211+
; CHECK-NEXT: [[CMP_OPAQUE:%.*]] = call i1 @id_i1(i1 [[CMP]])
212+
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 0, i32 -2147483648
213+
; CHECK-NEXT: [[EXTRA:%.*]] = xor i1 [[CMP_OPAQUE]], true
214+
; CHECK-NEXT: [[SEL_OPAQUE:%.*]] = call i32 @id_i32(i32 [[SEL]])
215+
; CHECK-NEXT: [[SEL_NZ:%.*]] = icmp ne i32 [[SEL_OPAQUE]], 0
216+
; CHECK-NEXT: [[R:%.*]] = and i1 [[SEL_NZ]], [[EXTRA]]
217+
; CHECK-NEXT: ret i1 [[R]]
218+
;
219+
%a_sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 5)
220+
%b_sub = call i32 @llvm.usub.sat.i32(i32 %b, i32 7)
221+
%or = or i32 %a_sub, %b_sub
222+
%cmp = icmp eq i32 %or, 0
223+
%cmp_opaque = call i1 @id_i1(i1 %cmp)
224+
%sel = select i1 %cmp, i32 0, i32 -2147483648
225+
%extra = xor i1 %cmp_opaque, true
226+
%sel_opaque = call i32 @id_i32(i32 %sel)
227+
%sel_is_nonzero = icmp ne i32 %sel_opaque, 0
228+
%r = and i1 %extra, %sel_is_nonzero
229+
ret i1 %r
230+
}
231+
232+
233+
declare i32 @id_i32(i32)
234+
235+
define i32 @multi_use_select(i32 %a, i32 %b) {
236+
; CHECK-LABEL: define i32 @multi_use_select(
237+
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
238+
; CHECK-NEXT: [[A_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 224)
239+
; CHECK-NEXT: [[B_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[B]], i32 240)
240+
; CHECK-NEXT: [[OR:%.*]] = or i32 [[A_SUB]], [[B_SUB]]
241+
; CHECK-NEXT: [[SEL:%.*]] = and i32 [[OR]], -2147483648
242+
; CHECK-NEXT: [[SEL_OPAQUE:%.*]] = call i32 @id_i32(i32 [[SEL]])
243+
; CHECK-NEXT: ret i32 [[SEL_OPAQUE]]
244+
;
245+
%a_sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 -2147483425)
246+
%b_sub = call i32 @llvm.usub.sat.i32(i32 %b, i32 -2147483409)
247+
%or = or i32 %a_sub, %b_sub
248+
%cmp = icmp eq i32 %or, 0
249+
%sel = select i1 %cmp, i32 0, i32 -2147483648
250+
%sel_opaque = call i32 @id_i32(i32 %sel)
251+
ret i32 %sel_opaque
252+
}
253+
254+
255+
define i8 @no_fold_usub_extra_use(i8 %a, i8 %b) {
256+
; CHECK-LABEL: define i8 @no_fold_usub_extra_use(
257+
; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
258+
; CHECK-NEXT: [[A_SUB:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A]], i8 -33)
259+
; CHECK-NEXT: [[B_SUB:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 -17)
260+
; CHECK-NEXT: [[OR:%.*]] = or i8 [[A_SUB]], [[B_SUB]]
261+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[OR]], 0
262+
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i8 0, i8 -128
263+
; CHECK-NEXT: call void @use(i8 [[A_SUB]])
264+
; CHECK-NEXT: ret i8 [[RES]]
265+
;
266+
%a_sub = call i8 @llvm.usub.sat.i8(i8 %a, i8 223)
267+
%b_sub = call i8 @llvm.usub.sat.i8(i8 %b, i8 239)
268+
%or = or i8 %a_sub, %b_sub
269+
%cmp = icmp eq i8 %or, 0
270+
%res = select i1 %cmp, i8 0, i8 128
271+
call void @use(i8 %a_sub)
272+
ret i8 %res
273+
}
274+
275+
define i8 @no_fold_or_extra_use(i8 %a, i8 %b) {
276+
; CHECK-LABEL: define i8 @no_fold_or_extra_use(
277+
; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
278+
; CHECK-NEXT: [[A_SUB:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A]], i8 -33)
279+
; CHECK-NEXT: [[B_SUB:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 -17)
280+
; CHECK-NEXT: [[OR:%.*]] = or i8 [[A_SUB]], [[B_SUB]]
281+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[OR]], 0
282+
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i8 0, i8 -128
283+
; CHECK-NEXT: call void @use(i8 [[OR]])
284+
; CHECK-NEXT: ret i8 [[RES]]
285+
;
286+
%a_sub = call i8 @llvm.usub.sat.i8(i8 %a, i8 223)
287+
%b_sub = call i8 @llvm.usub.sat.i8(i8 %b, i8 239)
288+
%or = or i8 %a_sub, %b_sub
289+
%cmp = icmp eq i8 %or, 0
290+
%res = select i1 %cmp, i8 0, i8 128
291+
call void @use(i8 %or)
292+
ret i8 %res
293+
}
294+
295+
define i8 @no_fold_usub_b_extra_use(i8 %a, i8 %b) {
296+
; CHECK-LABEL: define i8 @no_fold_usub_b_extra_use(
297+
; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
298+
; CHECK-NEXT: [[A_SUB:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A]], i8 -33)
299+
; CHECK-NEXT: [[B_SUB:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 -17)
300+
; CHECK-NEXT: [[OR:%.*]] = or i8 [[A_SUB]], [[B_SUB]]
301+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[OR]], 0
302+
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i8 0, i8 -128
303+
; CHECK-NEXT: call void @use(i8 [[B_SUB]])
304+
; CHECK-NEXT: ret i8 [[RES]]
305+
;
306+
%a_sub = call i8 @llvm.usub.sat.i8(i8 %a, i8 223)
307+
%b_sub = call i8 @llvm.usub.sat.i8(i8 %b, i8 239)
308+
%or = or i8 %a_sub, %b_sub
309+
%cmp = icmp eq i8 %or, 0
310+
%res = select i1 %cmp, i8 0, i8 128
311+
call void @use(i8 %b_sub)
312+
ret i8 %res
313+
}
314+
315+
declare void @use(i8)

0 commit comments

Comments
 (0)