Skip to content

Commit ede49fa

Browse files
AZero13topperc
authored andcommitted
[InstCombine] Canonicalize more saturated-add variants (#100008)
LLVM is not evaluating X u > C, a, b the same way it evaluates X <= C, b, a. To fix this, let's move the folds to after the canonicalization of -1 to TrueVal. Let's allow splat vectors with poison elements to be recognized too! Finally, for completion, handle the one case that isn't caught by the above checks because it is canonicalized to eq: X == -1 ? -1 : X + 1 -> uadd.sat(X, 1) Alive2 Proof: https://alive2.llvm.org/ce/z/WEcgYH
1 parent da140bc commit ede49fa

File tree

2 files changed

+49
-32
lines changed

2 files changed

+49
-32
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -974,14 +974,7 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
974974
Value *Cmp1 = Cmp->getOperand(1);
975975
ICmpInst::Predicate Pred = Cmp->getPredicate();
976976
Value *X;
977-
const APInt *C, *CmpC;
978-
if (Pred == ICmpInst::ICMP_ULT &&
979-
match(TVal, m_Add(m_Value(X), m_APInt(C))) && X == Cmp0 &&
980-
match(FVal, m_AllOnes()) && match(Cmp1, m_APInt(CmpC)) && *CmpC == ~*C) {
981-
// (X u< ~C) ? (X + C) : -1 --> uadd.sat(X, C)
982-
return Builder.CreateBinaryIntrinsic(
983-
Intrinsic::uadd_sat, X, ConstantInt::get(X->getType(), *C));
984-
}
977+
const APInt *C;
985978

986979
// Match unsigned saturated add of 2 variables with an unnecessary 'not'.
987980
// There are 8 commuted variants.
@@ -993,6 +986,46 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
993986
if (!match(TVal, m_AllOnes()))
994987
return nullptr;
995988

989+
// uge -1 is canonicalized to eq -1 and requires special handling
990+
// (a == -1) ? -1 : a + 1 -> uadd.sat(a, 1)
991+
if (Pred == ICmpInst::ICMP_EQ) {
992+
if (match(FVal, m_Add(m_Specific(Cmp0), m_One())) &&
993+
match(Cmp1, m_AllOnes())) {
994+
return Builder.CreateBinaryIntrinsic(
995+
Intrinsic::uadd_sat, Cmp0, ConstantInt::get(Cmp0->getType(), 1));
996+
}
997+
return nullptr;
998+
}
999+
1000+
if ((Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_UGT) &&
1001+
match(FVal, m_Add(m_Specific(Cmp0), m_APIntAllowPoison(C))) &&
1002+
match(Cmp1, m_SpecificIntAllowPoison(~*C))) {
1003+
// (X u> ~C) ? -1 : (X + C) --> uadd.sat(X, C)
1004+
// (X u>= ~C)? -1 : (X + C) --> uadd.sat(X, C)
1005+
return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, Cmp0,
1006+
ConstantInt::get(Cmp0->getType(), *C));
1007+
}
1008+
1009+
// Negative one does not work here because X u> -1 ? -1, X + -1 is not a
1010+
// saturated add.
1011+
if (Pred == ICmpInst::ICMP_UGT &&
1012+
match(FVal, m_Add(m_Specific(Cmp0), m_APIntAllowPoison(C))) &&
1013+
match(Cmp1, m_SpecificIntAllowPoison(~*C - 1)) && !C->isAllOnes()) {
1014+
// (X u> ~C - 1) ? -1 : (X + C) --> uadd.sat(X, C)
1015+
return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, Cmp0,
1016+
ConstantInt::get(Cmp0->getType(), *C));
1017+
}
1018+
1019+
// Zero does not work here because X u>= 0 ? -1 : X -> is always -1, which is
1020+
// not a saturated add.
1021+
if (Pred == ICmpInst::ICMP_UGE &&
1022+
match(FVal, m_Add(m_Specific(Cmp0), m_APIntAllowPoison(C))) &&
1023+
match(Cmp1, m_SpecificIntAllowPoison(-*C)) && !C->isZero()) {
1024+
// (X u >= -C) ? -1 : (X + C) --> uadd.sat(X, C)
1025+
return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, Cmp0,
1026+
ConstantInt::get(Cmp0->getType(), *C));
1027+
}
1028+
9961029
// Canonicalize predicate to less-than or less-or-equal-than.
9971030
if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
9981031
std::swap(Cmp0, Cmp1);

llvm/test/Transforms/InstCombine/saturating-add-sub.ll

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1398,9 +1398,7 @@ define i32 @uadd_sat(i32 %x, i32 %y) {
13981398

13991399
define i32 @uadd_sat_flipped(i32 %x) {
14001400
; CHECK-LABEL: @uadd_sat_flipped(
1401-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -11
1402-
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X]], 9
1403-
; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
1401+
; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9)
14041402
; CHECK-NEXT: ret i32 [[COND]]
14051403
;
14061404
%cmp = icmp ugt i32 %x, -11
@@ -1411,9 +1409,7 @@ define i32 @uadd_sat_flipped(i32 %x) {
14111409

14121410
define i32 @uadd_sat_flipped2(i32 %x) {
14131411
; CHECK-LABEL: @uadd_sat_flipped2(
1414-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -10
1415-
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X]], 9
1416-
; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
1412+
; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9)
14171413
; CHECK-NEXT: ret i32 [[COND]]
14181414
;
14191415
%cmp = icmp ugt i32 %x, -10
@@ -1452,9 +1448,7 @@ define i32 @uadd_sat_flipped3_neg_no_nuw(i32 %x) {
14521448

14531449
define i32 @uadd_sat_negative_one(i32 %x) {
14541450
; CHECK-LABEL: @uadd_sat_negative_one(
1455-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], -1
1456-
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X]], 1
1457-
; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
1451+
; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 1)
14581452
; CHECK-NEXT: ret i32 [[COND]]
14591453
;
14601454
%cmp = icmp eq i32 %x, -1
@@ -1476,9 +1470,7 @@ define <2 x i8> @uadd_sat_flipped4_vector(<2 x i8> %x) {
14761470

14771471
define <2 x i8> @uadd_sat_flipped4_poison_vector(<2 x i8> %x) {
14781472
; CHECK-LABEL: @uadd_sat_flipped4_poison_vector(
1479-
; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 -10, i8 poison>
1480-
; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], <i8 9, i8 9>
1481-
; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[ADD]], <2 x i8> <i8 -1, i8 -1>
1473+
; CHECK-NEXT: [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> <i8 9, i8 9>)
14821474
; CHECK-NEXT: ret <2 x i8> [[COND]]
14831475
;
14841476
%cmp = icmp ult <2 x i8> %x, <i8 -10, i8 poison>
@@ -1489,9 +1481,7 @@ define <2 x i8> @uadd_sat_flipped4_poison_vector(<2 x i8> %x) {
14891481

14901482
define <2 x i8> @uadd_sat_flipped4_poison_vector_compare(<2 x i8> %x) {
14911483
; CHECK-LABEL: @uadd_sat_flipped4_poison_vector_compare(
1492-
; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 -10, i8 poison>
1493-
; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], <i8 9, i8 poison>
1494-
; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[ADD]], <2 x i8> <i8 -1, i8 -1>
1484+
; CHECK-NEXT: [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> <i8 9, i8 9>)
14951485
; CHECK-NEXT: ret <2 x i8> [[COND]]
14961486
;
14971487
%cmp = icmp ult <2 x i8> %x, <i8 -10, i8 poison>
@@ -1986,9 +1976,7 @@ define i32 @uadd_sat_not_commute_select_uge_commute_add(i32 %x, i32 %y) {
19861976

19871977
define i32 @uadd_sat_constant(i32 %x) {
19881978
; CHECK-LABEL: @uadd_sat_constant(
1989-
; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], 42
1990-
; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[X]], -43
1991-
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 -1, i32 [[A]]
1979+
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 42)
19921980
; CHECK-NEXT: ret i32 [[R]]
19931981
;
19941982
%a = add i32 %x, 42
@@ -2054,9 +2042,7 @@ define i32 @uadd_sat_canon_y_nuw(i32 %x, i32 %y) {
20542042

20552043
define <4 x i32> @uadd_sat_constant_vec(<4 x i32> %x) {
20562044
; CHECK-LABEL: @uadd_sat_constant_vec(
2057-
; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
2058-
; CHECK-NEXT: [[C:%.*]] = icmp ugt <4 x i32> [[X]], <i32 -43, i32 -43, i32 -43, i32 -43>
2059-
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[C]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[A]]
2045+
; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> <i32 42, i32 42, i32 42, i32 42>)
20602046
; CHECK-NEXT: ret <4 x i32> [[R]]
20612047
;
20622048
%a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
@@ -2078,9 +2064,7 @@ define <4 x i32> @uadd_sat_constant_vec_commute(<4 x i32> %x) {
20782064

20792065
define <4 x i32> @uadd_sat_constant_vec_commute_undefs(<4 x i32> %x) {
20802066
; CHECK-LABEL: @uadd_sat_constant_vec_commute_undefs(
2081-
; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 poison>
2082-
; CHECK-NEXT: [[C:%.*]] = icmp ult <4 x i32> [[X]], <i32 -43, i32 -43, i32 poison, i32 -43>
2083-
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[C]], <4 x i32> [[A]], <4 x i32> <i32 -1, i32 poison, i32 -1, i32 -1>
2067+
; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> <i32 42, i32 42, i32 42, i32 42>)
20842068
; CHECK-NEXT: ret <4 x i32> [[R]]
20852069
;
20862070
%a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 poison>

0 commit comments

Comments
 (0)