Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 31 additions & 6 deletions llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1521,10 +1521,17 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C0))))
return nullptr;

if (!isa<SelectInst>(Sel1)) {
Pred0 = ICmpInst::getInversePredicate(Pred0);
std::swap(X, Sel1);
}
auto SwapSelectOperands = [](ICmpInst::Predicate &Pred, Value *&Op0,
Value *&Op1) -> void {
std::swap(Op0, Op1);
Pred = ICmpInst::getInversePredicate(Pred);
};

if (!isa<SelectInst>(Sel1))
SwapSelectOperands(Pred0, Sel1, X);

if (!isa<SelectInst>(Sel1) && !isa<SExtInst>(Sel1))
SwapSelectOperands(Pred0, Sel1, X);

// Canonicalize Cmp0 into ult or uge.
// FIXME: we shouldn't care about lanes that are 'undef' in the end?
Expand Down Expand Up @@ -1575,17 +1582,26 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C1)))))
return nullptr;

// Will create Replacement[Low/High] later for SExtICmp case
Value *Cmp1;
CmpPredicate Pred1;
Constant *C2;
Value *ReplacementLow, *ReplacementHigh;
if (!match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow),
m_Value(ReplacementHigh))) ||
bool FoldSExtICmp =
match(Sel1, m_SExt(m_Value(Cmp1, m_ICmp(m_Value(), m_Value()))));
if (!(FoldSExtICmp ||
match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow),
m_Value(ReplacementHigh)))) ||
!match(Cmp1,
m_ICmp(Pred1, m_Specific(X),
m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C2)))))
return nullptr;

// When folding sext-icmp, only efficient if C1 = 0 so we can make use of the
// `smax` instruction
if (FoldSExtICmp && !C1->isZeroValue())
return nullptr;

if (!Cmp1->hasOneUse() && (Cmp00 == X || !Cmp00->hasOneUse()))
return nullptr; // Not enough one-use instructions for the fold.
// FIXME: this restriction could be relaxed if Cmp1 can be reused as one of
Expand All @@ -1595,6 +1611,10 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
// FIXME: we shouldn't care about lanes that are 'undef' in the end?
switch (Pred1) {
case ICmpInst::Predicate::ICMP_SLT:
// The sext(icmp) case only is advantageous for SGT/SGTE since that enables
// max conversion
if (FoldSExtICmp)
return nullptr;
break;
case ICmpInst::Predicate::ICMP_SLE:
// We'd have to increment C2 by one, and for that it must not have signed
Expand Down Expand Up @@ -1644,6 +1664,11 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
if (!Precond2 || !match(Precond2, m_One()))
return nullptr;

if (FoldSExtICmp) {
ReplacementHigh = Constant::getAllOnesValue(Sel1->getType());
ReplacementLow = Constant::getNullValue(Sel1->getType());
}

// If we are matching from a truncated input, we need to sext the
// ReplacementLow and ReplacementHigh values. Only do the transform if they
// are free to extend due to being constants.
Expand Down
159 changes: 159 additions & 0 deletions llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-i1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes='instcombine<no-verify-fixpoint>' -S | FileCheck %s

; Given a pattern like:
; %old_cmp1 = icmp sgt i32 %x, C2
; %old_replacement = sext i1 %old_cmp1 to i32
; %old_cmp0 = icmp ult i32 %x, C0
; %r = select i1 %old_cmp0, i32 %x, i32 %old_replacement
; it can be rewriten as more canonical pattern:
; %new_cmp2 = icmp sge i32 %x, C0
; %new_clamped_low = smax i32 %target_low, i32 %x
; %r = select i1 %new_cmp2, i32 -1, i32 %new_clamped_low
; Iff 0 s<= C2 s<= C0
; Also, ULT predicate can also be UGE; or UGT iff C0 != -1 (+invert result)
; Also, SLT predicate can also be SGE; or SGT iff C2 != INT_MAX (+invert res.)

;-------------------------------------------------------------------------------

; clamp-like max case, can be optimized with max
define i32 @clamp_max_sgt(i32 %x) {
; CHECK-LABEL: @clamp_max_sgt(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 255
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[X]], i32 0)
; CHECK-NEXT: [[COND3:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[TMP2]]
; CHECK-NEXT: ret i32 [[COND3]]
;
%or.cond = icmp ult i32 %x, 256
%cmp2 = icmp sgt i32 %x, 0
%cond = sext i1 %cmp2 to i32
%cond3 = select i1 %or.cond, i32 %x, i32 %cond
ret i32 %cond3
}

; clamp-like max case with vector, can be optimized with max
define <2 x i32> @clamp_max_sgt_vec(<2 x i32> %x) {
; CHECK-LABEL: @clamp_max_sgt_vec(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], <i32 99, i32 255>
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[X]], <2 x i32> zeroinitializer)
; CHECK-NEXT: [[COND3:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> splat (i32 -1), <2 x i32> [[TMP2]]
; CHECK-NEXT: ret <2 x i32> [[COND3]]
;
%or.cond = icmp ult <2 x i32> %x, <i32 100, i32 256>
%cmp2 = icmp sgt <2 x i32> %x, <i32 98, i32 254>
%cond = sext <2 x i1> %cmp2 to <2 x i32>
%cond3 = select <2 x i1> %or.cond, <2 x i32> %x, <2 x i32> %cond
ret <2 x i32> %cond3
}

; Not clamp-like vector
define <2 x i32> @clamp_max_vec(<2 x i32> %x) {
; CHECK-LABEL: @clamp_max_vec(
; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult <2 x i32> [[X:%.*]], <i32 100, i32 256>
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt <2 x i32> [[X]], <i32 128, i32 0>
; CHECK-NEXT: [[COND:%.*]] = sext <2 x i1> [[CMP2]] to <2 x i32>
; CHECK-NEXT: [[COND3:%.*]] = select <2 x i1> [[OR_COND]], <2 x i32> [[X]], <2 x i32> [[COND]]
; CHECK-NEXT: ret <2 x i32> [[COND3]]
;
%or.cond = icmp ult <2 x i32> %x, <i32 100, i32 256>
%cmp2 = icmp sgt <2 x i32> %x, <i32 128, i32 0>
%cond = sext <2 x i1> %cmp2 to <2 x i32>
%cond3 = select <2 x i1> %or.cond, <2 x i32> %x, <2 x i32> %cond
ret <2 x i32> %cond3
}

; clamp-like max case, can be optimized with max
define i32 @clamp_max_sgt_neg1(i32 %x) {
; CHECK-LABEL: @clamp_max_sgt_neg1(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 255
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[X]], i32 0)
; CHECK-NEXT: [[COND3:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[TMP2]]
; CHECK-NEXT: ret i32 [[COND3]]
;
%or.cond = icmp ult i32 %x, 256
%cmp2 = icmp sgt i32 %x, -1
%cond = sext i1 %cmp2 to i32
%cond3 = select i1 %or.cond, i32 %x, i32 %cond
ret i32 %cond3
}

; clamp-like max case, can be optimized with max
define i32 @clamp_max_sge(i32 %x) {
; CHECK-LABEL: @clamp_max_sge(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 255
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[X]], i32 0)
; CHECK-NEXT: [[COND3:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[TMP2]]
; CHECK-NEXT: ret i32 [[COND3]]
;
%or.cond = icmp ult i32 %x, 256
%cmp2 = icmp sge i32 %x, 0
%cond = sext i1 %cmp2 to i32
%cond3 = select i1 %or.cond, i32 %x, i32 %cond
ret i32 %cond3
}

; Don't support SLT cases, need to select 0 as the low value, -1 as high value
define i32 @clamp_max_slt(i32 %x) {
; CHECK-LABEL: @clamp_max_slt(
; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 256
; CHECK-NEXT: [[COND:%.*]] = ashr i32 [[X]], 31
; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]]
; CHECK-NEXT: ret i32 [[COND3]]
;
%or.cond = icmp ult i32 %x, 256
%cmp2 = icmp slt i32 %x, 0
%cond = sext i1 %cmp2 to i32
%cond3 = select i1 %or.cond, i32 %x, i32 %cond
ret i32 %cond3
}

; Don't support SLE cases, need to select 0 as the low value, -1 as high value
define i32 @clamp_max_sle(i32 %x) {
; CHECK-LABEL: @clamp_max_sle(
; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 256
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[X]], 1
; CHECK-NEXT: [[COND:%.*]] = sext i1 [[CMP2]] to i32
; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]]
; CHECK-NEXT: ret i32 [[COND3]]
;
%or.cond = icmp ult i32 %x, 256
%cmp2 = icmp sle i32 %x, 0
%cond = sext i1 %cmp2 to i32
%cond3 = select i1 %or.cond, i32 %x, i32 %cond
ret i32 %cond3
}

; Not selecting between 0, x, and -1, so can't be optimized with max
; Select between 0, x, and 1
define i32 @clamp_max_bad_values(i32 %x) {
; CHECK-LABEL: @clamp_max_bad_values(
; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 256
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], 0
; CHECK-NEXT: [[COND:%.*]] = zext i1 [[CMP2]] to i32
; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]]
; CHECK-NEXT: ret i32 [[COND3]]
;
%or.cond = icmp ult i32 %x, 256
%cmp2 = icmp sgt i32 %x, 0
%cond = zext i1 %cmp2 to i32
%cond3 = select i1 %or.cond, i32 %x, i32 %cond
ret i32 %cond3
}

; Boundaries of range are not 0 and x (x is some positive integer)
define i32 @clamp_max_offset(i32 %x) {
; CHECK-LABEL: @clamp_max_offset(
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -10
; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[TMP1]], 246
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], 10
; CHECK-NEXT: [[COND:%.*]] = sext i1 [[CMP2]] to i32
; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]]
; CHECK-NEXT: ret i32 [[COND3]]
;
%1 = add i32 %x, -10
%or.cond = icmp ult i32 %1, 246
%cmp2 = icmp sgt i32 %x, 10
%cond = sext i1 %cmp2 to i32
%cond3 = select i1 %or.cond, i32 %x, i32 %cond
ret i32 %cond3
}