Skip to content

Commit cdfaef9

Browse files
committed
[InstCombine] When canoncicalizing clamp like, also consider certain sgt/slt cases
In particular, when %target_low=0 and %target_high=-1 and C1=0 ``` %old_cmp1 = icmp slt %x, C2 %old_replacement = select %old_cmp1, %target_low, %target_high ``` might have aleady been combined into ``` %old_cmp1 = icmp sgt %x, C2 %old_replacement = sext %old_cmp1 ``` For this particular case, the canonacalization allows for a more optimized sequence utilizing `max` to be created. ``` %old_cmp1 = icmp sgt %x, C2 %old_replacement = sext %old_cmp1 %old_cmp0 = icmp ult i32 %x, C0 %r = select i1 %old_cmp0, i32 %x, i32 %old_replacement ``` If 0 s<= C2 s<= C0, can be re-written as: ``` %new_cmp1 = icmp slt i32 %x, 0 %new_cmp2 = icmp sge i32 %x, C0 %new_clamped_low = select i1 %new_cmp1, i32 0, i32 %x %r = select i1 %new_cmp2, i32 -1, i32 %new_clamped_low ``` Can be re-written as (already occurs from the canonicalized version): ``` %clamped_low = max i32 %x, 0 %new_cmp2 = icmp sge i32 %x, C0 %sext = sext i1 %new_cmp2 %r = or i32 %sext, %new_cmp2 ```
1 parent 46767d0 commit cdfaef9

File tree

2 files changed

+52
-23
lines changed

2 files changed

+52
-23
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1521,10 +1521,17 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
15211521
m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C0))))
15221522
return nullptr;
15231523

1524-
if (!isa<SelectInst>(Sel1)) {
1525-
Pred0 = ICmpInst::getInversePredicate(Pred0);
1526-
std::swap(X, Sel1);
1527-
}
1524+
auto SwapSelectOperands = [](ICmpInst::Predicate &Pred, Value *&Op0,
1525+
Value *&Op1) -> void {
1526+
std::swap(Op0, Op1);
1527+
Pred = ICmpInst::getInversePredicate(Pred);
1528+
};
1529+
1530+
if (!isa<SelectInst>(Sel1))
1531+
SwapSelectOperands(Pred0, Sel1, X);
1532+
1533+
if (!isa<SelectInst>(Sel1) && !isa<SExtInst>(Sel1))
1534+
SwapSelectOperands(Pred0, Sel1, X);
15281535

15291536
// Canonicalize Cmp0 into ult or uge.
15301537
// FIXME: we shouldn't care about lanes that are 'undef' in the end?
@@ -1575,26 +1582,44 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
15751582
m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C1)))))
15761583
return nullptr;
15771584

1585+
// Will create Replacement[Low/High] later for SExtICmp case
15781586
Value *Cmp1;
15791587
CmpPredicate Pred1;
15801588
Constant *C2;
15811589
Value *ReplacementLow, *ReplacementHigh;
1582-
if (!match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow),
1583-
m_Value(ReplacementHigh))) ||
1590+
bool FoldSExtICmp;
1591+
auto MatchSExtICmp = [](Value *PossibleSextIcmp, Value *&Cmp1) -> bool {
1592+
Value *ICmpOp0, *ICmpOp1;
1593+
return match(PossibleSextIcmp, m_SExt(m_Value(Cmp1))) &&
1594+
match(Cmp1, m_ICmp(m_Value(ICmpOp0), m_Value(ICmpOp1)));
1595+
};
1596+
if (!((FoldSExtICmp = MatchSExtICmp(Sel1, Cmp1)) ||
1597+
match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow),
1598+
m_Value(ReplacementHigh)))) ||
15841599
!match(Cmp1,
15851600
m_ICmp(Pred1, m_Specific(X),
15861601
m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C2)))))
15871602
return nullptr;
15881603

1604+
// When folding sext-icmp, only efficient if C1 = 0 so we can make use of the
1605+
// `smax` instruction
1606+
if (FoldSExtICmp && !C1->isZeroValue())
1607+
return nullptr;
1608+
15891609
if (!Cmp1->hasOneUse() && (Cmp00 == X || !Cmp00->hasOneUse()))
15901610
return nullptr; // Not enough one-use instructions for the fold.
15911611
// FIXME: this restriction could be relaxed if Cmp1 can be reused as one of
15921612
// two comparisons we'll need to build.
15931613

15941614
// Canonicalize Cmp1 into the form we expect.
15951615
// FIXME: we shouldn't care about lanes that are 'undef' in the end?
1616+
bool SwapReplacement = false;
15961617
switch (Pred1) {
15971618
case ICmpInst::Predicate::ICMP_SLT:
1619+
// The sext(icmp) case only is advantageous for SGT/SGTE since that enables
1620+
// max conversion
1621+
if (FoldSExtICmp)
1622+
return nullptr;
15981623
break;
15991624
case ICmpInst::Predicate::ICMP_SLE:
16001625
// We'd have to increment C2 by one, and for that it must not have signed
@@ -1615,7 +1640,7 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
16151640
// Also non-canonical, but here we don't need to change C2,
16161641
// so we don't have any restrictions on C2, so we can just handle it.
16171642
Pred1 = ICmpInst::Predicate::ICMP_SLT;
1618-
std::swap(ReplacementLow, ReplacementHigh);
1643+
SwapReplacement = true;
16191644
break;
16201645
default:
16211646
return nullptr; // Unknown predicate.
@@ -1644,6 +1669,14 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
16441669
if (!Precond2 || !match(Precond2, m_One()))
16451670
return nullptr;
16461671

1672+
if (FoldSExtICmp) {
1673+
ReplacementLow = Constant::getAllOnesValue(Sel1->getType());
1674+
ReplacementHigh = Constant::getNullValue(Sel1->getType());
1675+
}
1676+
1677+
if (SwapReplacement)
1678+
std::swap(ReplacementLow, ReplacementHigh);
1679+
16471680
// If we are matching from a truncated input, we need to sext the
16481681
// ReplacementLow and ReplacementHigh values. Only do the transform if they
16491682
// are free to extend due to being constants.

llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-i1.ll

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,9 @@
1919
; clamp-like max case, can be optimized with max
2020
define i32 @clamp_max_sgt(i32 %x) {
2121
; CHECK-LABEL: @clamp_max_sgt(
22-
; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 256
23-
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], 0
24-
; CHECK-NEXT: [[COND:%.*]] = sext i1 [[CMP2]] to i32
25-
; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]]
22+
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 255
23+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[X]], i32 0)
24+
; CHECK-NEXT: [[COND3:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[TMP2]]
2625
; CHECK-NEXT: ret i32 [[COND3]]
2726
;
2827
%or.cond = icmp ult i32 %x, 256
@@ -35,10 +34,9 @@ define i32 @clamp_max_sgt(i32 %x) {
3534
; clamp-like max case with vector, can be optimized with max
3635
define <2 x i32> @clamp_max_sgt_vec(<2 x i32> %x) {
3736
; CHECK-LABEL: @clamp_max_sgt_vec(
38-
; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult <2 x i32> [[X:%.*]], <i32 100, i32 256>
39-
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt <2 x i32> [[X]], <i32 98, i32 254>
40-
; CHECK-NEXT: [[COND:%.*]] = sext <2 x i1> [[CMP2]] to <2 x i32>
41-
; CHECK-NEXT: [[COND3:%.*]] = select <2 x i1> [[OR_COND]], <2 x i32> [[X]], <2 x i32> [[COND]]
37+
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], <i32 99, i32 255>
38+
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[X]], <2 x i32> zeroinitializer)
39+
; CHECK-NEXT: [[COND3:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> splat (i32 -1), <2 x i32> [[TMP2]]
4240
; CHECK-NEXT: ret <2 x i32> [[COND3]]
4341
;
4442
%or.cond = icmp ult <2 x i32> %x, <i32 100, i32 256>
@@ -67,10 +65,9 @@ define <2 x i32> @clamp_max_vec(<2 x i32> %x) {
6765
; clamp-like max case, can be optimized with max
6866
define i32 @clamp_max_sgt_neg1(i32 %x) {
6967
; CHECK-LABEL: @clamp_max_sgt_neg1(
70-
; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 256
71-
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], -1
72-
; CHECK-NEXT: [[COND:%.*]] = sext i1 [[CMP2]] to i32
73-
; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]]
68+
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 255
69+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[X]], i32 0)
70+
; CHECK-NEXT: [[COND3:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[TMP2]]
7471
; CHECK-NEXT: ret i32 [[COND3]]
7572
;
7673
%or.cond = icmp ult i32 %x, 256
@@ -83,10 +80,9 @@ define i32 @clamp_max_sgt_neg1(i32 %x) {
8380
; clamp-like max case, can be optimized with max
8481
define i32 @clamp_max_sge(i32 %x) {
8582
; CHECK-LABEL: @clamp_max_sge(
86-
; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 256
87-
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], -1
88-
; CHECK-NEXT: [[COND:%.*]] = sext i1 [[CMP2]] to i32
89-
; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]]
83+
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 255
84+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[X]], i32 0)
85+
; CHECK-NEXT: [[COND3:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[TMP2]]
9086
; CHECK-NEXT: ret i32 [[COND3]]
9187
;
9288
%or.cond = icmp ult i32 %x, 256

0 commit comments

Comments
 (0)