Skip to content

Commit eab6eba

Browse files
committed
[InstCombine] Fold icmp with clamp to unsigned bound check
1 parent cffbd01 commit eab6eba

File tree

3 files changed

+67
-34
lines changed

3 files changed

+67
-34
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5780,6 +5780,47 @@ Instruction *InstCombinerImpl::foldICmpWithMinMax(Instruction &I,
57805780
return nullptr;
57815781
}
57825782

5783+
// Transform patterns like:
5784+
// icmp eq/ne X, min(max(X, Lo), Hi)
5785+
// Into:
5786+
// (X - Lo) u< (Hi - Lo + 1)
5787+
Instruction *InstCombinerImpl::foldICmpWithClamp(ICmpInst &I, Value *X,
5788+
MinMaxIntrinsic *Min) {
5789+
if (!I.isEquality() || !Min->hasOneUse())
5790+
return nullptr;
5791+
5792+
const APInt *Lo = nullptr, *Hi = nullptr;
5793+
if (Min->isSigned()) {
5794+
if (!match(Min->getLHS(), m_OneUse(m_SMax(m_Specific(X), m_APInt(Lo)))) ||
5795+
!match(Min->getRHS(), m_APInt(Hi)) || !Lo->slt(*Hi))
5796+
return nullptr;
5797+
} else {
5798+
if (!match(Min->getLHS(), m_OneUse(m_UMax(m_Specific(X), m_APInt(Lo)))) ||
5799+
!match(Min->getRHS(), m_APInt(Hi)) || !Lo->ult(*Hi))
5800+
return nullptr;
5801+
}
5802+
5803+
// If Hi is the maximum value, the min operation becomes redundant and
5804+
// will be removed by other optimizations.
5805+
if ((Min->isSigned() && (Lo->isMinSignedValue() || Hi->isMaxSignedValue())) ||
5806+
(!Min->isSigned() && (Lo->isMinValue() || Hi->isMaxValue())))
5807+
return nullptr;
5808+
5809+
ConstantRange CR(*Lo, *Hi + 1);
5810+
ICmpInst::Predicate Pred;
5811+
APInt C, Offset;
5812+
if (I.getPredicate() == ICmpInst::ICMP_EQ)
5813+
CR.getEquivalentICmp(Pred, C, Offset);
5814+
else
5815+
CR.inverse().getEquivalentICmp(Pred, C, Offset);
5816+
5817+
if (Offset != 0)
5818+
X = Builder.CreateAdd(X, ConstantInt::get(X->getType(), Offset));
5819+
5820+
return replaceInstUsesWith(
5821+
I, Builder.CreateICmp(Pred, X, ConstantInt::get(X->getType(), C)));
5822+
}
5823+
57835824
// Canonicalize checking for a power-of-2-or-zero value:
57845825
static Instruction *foldICmpPow2Test(ICmpInst &I,
57855826
InstCombiner::BuilderTy &Builder) {
@@ -7467,10 +7508,14 @@ Instruction *InstCombinerImpl::foldICmpCommutative(CmpPredicate Pred,
74677508
if (Instruction *NI = foldSelectICmp(Pred, SI, Op1, CxtI))
74687509
return NI;
74697510

7470-
if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op0))
7511+
if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op0)) {
74717512
if (Instruction *Res = foldICmpWithMinMax(CxtI, MinMax, Op1, Pred))
74727513
return Res;
74737514

7515+
if (Instruction *Res = foldICmpWithClamp(CxtI, Op1, MinMax))
7516+
return Res;
7517+
}
7518+
74747519
{
74757520
Value *X;
74767521
const APInt *C;

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
725725
Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ);
726726
Instruction *foldICmpWithMinMax(Instruction &I, MinMaxIntrinsic *MinMax,
727727
Value *Z, CmpPredicate Pred);
728+
Instruction *foldICmpWithClamp(ICmpInst &Cmp, Value *X, MinMaxIntrinsic *Min);
728729
Instruction *foldICmpEquality(ICmpInst &Cmp);
729730
Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I);
730731
Instruction *foldSignBitTest(ICmpInst &I);

llvm/test/Transforms/InstCombine/icmp-clamp.ll

Lines changed: 20 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@ declare void @use(i32)
66
define i1 @test_i32_eq(i32 %x) {
77
; CHECK-LABEL: define i1 @test_i32_eq(
88
; CHECK-SAME: i32 [[X:%.*]]) {
9-
; CHECK-NEXT: [[V1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X]], i32 -95)
10-
; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[V1]], i32 160)
11-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V2]], [[X]]
9+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], 95
10+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], 256
1211
; CHECK-NEXT: ret i1 [[CMP]]
1312
;
1413
%v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 -95)
@@ -20,9 +19,8 @@ define i1 @test_i32_eq(i32 %x) {
2019
define i1 @test_i32_ne(i32 %x) {
2120
; CHECK-LABEL: define i1 @test_i32_ne(
2221
; CHECK-SAME: i32 [[X:%.*]]) {
23-
; CHECK-NEXT: [[V1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X]], i32 -95)
24-
; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[V1]], i32 160)
25-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[V2]], [[X]]
22+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -161
23+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], -256
2624
; CHECK-NEXT: ret i1 [[CMP]]
2725
;
2826
%v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 -95)
@@ -34,9 +32,7 @@ define i1 @test_i32_ne(i32 %x) {
3432
define i1 @test_i32_eq_no_add(i32 %x) {
3533
; CHECK-LABEL: define i1 @test_i32_eq_no_add(
3634
; CHECK-SAME: i32 [[X:%.*]]) {
37-
; CHECK-NEXT: [[V1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X]], i32 0)
38-
; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[V1]], i32 160)
39-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V2]], [[X]]
35+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X]], 161
4036
; CHECK-NEXT: ret i1 [[CMP]]
4137
;
4238
%v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 0)
@@ -48,9 +44,7 @@ define i1 @test_i32_eq_no_add(i32 %x) {
4844
define i1 @test_i32_ne_no_add(i32 %x) {
4945
; CHECK-LABEL: define i1 @test_i32_ne_no_add(
5046
; CHECK-SAME: i32 [[X:%.*]]) {
51-
; CHECK-NEXT: [[V1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X]], i32 0)
52-
; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[V1]], i32 160)
53-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[V2]], [[X]]
47+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X]], 160
5448
; CHECK-NEXT: ret i1 [[CMP]]
5549
;
5650
%v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 0)
@@ -62,9 +56,8 @@ define i1 @test_i32_ne_no_add(i32 %x) {
6256
define i1 @test_unsigned_eq(i32 %x) {
6357
; CHECK-LABEL: define i1 @test_unsigned_eq(
6458
; CHECK-SAME: i32 [[X:%.*]]) {
65-
; CHECK-NEXT: [[V1:%.*]] = tail call i32 @llvm.umax.i32(i32 [[X]], i32 10)
66-
; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.umin.i32(i32 [[V1]], i32 100)
67-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V2]], [[X]]
59+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -10
60+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], 91
6861
; CHECK-NEXT: ret i1 [[CMP]]
6962
;
7063
%v1 = tail call i32 @llvm.umax.i32(i32 %x, i32 10)
@@ -76,9 +69,8 @@ define i1 @test_unsigned_eq(i32 %x) {
7669
define i1 @test_unsigned_ne(i32 %x) {
7770
; CHECK-LABEL: define i1 @test_unsigned_ne(
7871
; CHECK-SAME: i32 [[X:%.*]]) {
79-
; CHECK-NEXT: [[V1:%.*]] = tail call i32 @llvm.umax.i32(i32 [[X]], i32 10)
80-
; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.umin.i32(i32 [[V1]], i32 100)
81-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[V2]], [[X]]
72+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -101
73+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], -91
8274
; CHECK-NEXT: ret i1 [[CMP]]
8375
;
8476
%v1 = tail call i32 @llvm.umax.i32(i32 %x, i32 10)
@@ -92,9 +84,8 @@ define i1 @test_unsigned_ne(i32 %x) {
9284
define i1 @test_i8_eq(i8 %x) {
9385
; CHECK-LABEL: define i1 @test_i8_eq(
9486
; CHECK-SAME: i8 [[X:%.*]]) {
95-
; CHECK-NEXT: [[V1:%.*]] = tail call i8 @llvm.smax.i8(i8 [[X]], i8 -50)
96-
; CHECK-NEXT: [[V2:%.*]] = tail call i8 @llvm.smin.i8(i8 [[V1]], i8 50)
97-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[V2]], [[X]]
87+
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], 50
88+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[TMP1]], 101
9889
; CHECK-NEXT: ret i1 [[CMP]]
9990
;
10091
%v1 = tail call i8 @llvm.smax.i8(i8 %x, i8 -50)
@@ -106,9 +97,8 @@ define i1 @test_i8_eq(i8 %x) {
10697
define i1 @test_i16_eq(i16 %x) {
10798
; CHECK-LABEL: define i1 @test_i16_eq(
10899
; CHECK-SAME: i16 [[X:%.*]]) {
109-
; CHECK-NEXT: [[V1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 -1000)
110-
; CHECK-NEXT: [[V2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[V1]], i16 1000)
111-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[V2]], [[X]]
100+
; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[X]], 1000
101+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[TMP1]], 2001
112102
; CHECK-NEXT: ret i1 [[CMP]]
113103
;
114104
%v1 = tail call i16 @llvm.smax.i16(i16 %x, i16 -1000)
@@ -120,9 +110,8 @@ define i1 @test_i16_eq(i16 %x) {
120110
define i1 @test_i64_eq(i64 %x) {
121111
; CHECK-LABEL: define i1 @test_i64_eq(
122112
; CHECK-SAME: i64 [[X:%.*]]) {
123-
; CHECK-NEXT: [[V1:%.*]] = tail call i64 @llvm.smax.i64(i64 [[X]], i64 -1)
124-
; CHECK-NEXT: [[V2:%.*]] = tail call i64 @llvm.smin.i64(i64 [[V1]], i64 9223372036854775806)
125-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[V2]], [[X]]
113+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[X]], 1
114+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP1]], -1
126115
; CHECK-NEXT: ret i1 [[CMP]]
127116
;
128117
%v1 = tail call i64 @llvm.smax.i64(i64 %x, i64 -1)
@@ -250,9 +239,8 @@ define i1 @test_multi_use_min(i32 %x) {
250239
define i1 @test_commuted_eq(i32 %x) {
251240
; CHECK-LABEL: define i1 @test_commuted_eq(
252241
; CHECK-SAME: i32 [[X:%.*]]) {
253-
; CHECK-NEXT: [[V1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X]], i32 -95)
254-
; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[V1]], i32 160)
255-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], [[V2]]
242+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], 95
243+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], 256
256244
; CHECK-NEXT: ret i1 [[CMP]]
257245
;
258246
%v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 -95)
@@ -266,9 +254,8 @@ define i1 @test_commuted_eq(i32 %x) {
266254
define <2 x i1> @test_vec_splat_eq(<2 x i32> %x) {
267255
; CHECK-LABEL: define <2 x i1> @test_vec_splat_eq(
268256
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
269-
; CHECK-NEXT: [[V1:%.*]] = tail call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[X]], <2 x i32> splat (i32 -50))
270-
; CHECK-NEXT: [[V2:%.*]] = tail call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[V1]], <2 x i32> splat (i32 50))
271-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V2]], [[X]]
257+
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X]], splat (i32 50)
258+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 101)
272259
; CHECK-NEXT: ret <2 x i1> [[CMP]]
273260
;
274261
%v1 = tail call <2 x i32> @llvm.smax.v2i32(<2 x i32> %x, <2 x i32> <i32 -50, i32 -50>)

0 commit comments

Comments
 (0)