diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index eb4332fbc0959..e55b77a771859 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1521,10 +1521,17 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C0)))) return nullptr; - if (!isa(Sel1)) { - Pred0 = ICmpInst::getInversePredicate(Pred0); - std::swap(X, Sel1); - } + auto SwapSelectOperands = [](ICmpInst::Predicate &Pred, Value *&Op0, + Value *&Op1) -> void { + std::swap(Op0, Op1); + Pred = ICmpInst::getInversePredicate(Pred); + }; + + if (!isa(Sel1)) + SwapSelectOperands(Pred0, Sel1, X); + + if (!isa(Sel1) && !isa(Sel1)) + SwapSelectOperands(Pred0, Sel1, X); // Canonicalize Cmp0 into ult or uge. // FIXME: we shouldn't care about lanes that are 'undef' in the end? @@ -1575,17 +1582,26 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C1))))) return nullptr; + // Will create Replacement[Low/High] later for SExtICmp case Value *Cmp1; CmpPredicate Pred1; Constant *C2; Value *ReplacementLow, *ReplacementHigh; - if (!match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow), - m_Value(ReplacementHigh))) || + bool FoldSExtICmp = + match(Sel1, m_SExt(m_Value(Cmp1, m_ICmp(m_Value(), m_Value())))); + if (!(FoldSExtICmp || + match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow), + m_Value(ReplacementHigh)))) || !match(Cmp1, m_ICmp(Pred1, m_Specific(X), m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C2))))) return nullptr; + // When folding sext-icmp, only efficient if C1 = 0 so we can make use of the + // `smax` instruction + if (FoldSExtICmp && !C1->isZeroValue()) + return nullptr; + if (!Cmp1->hasOneUse() && (Cmp00 == X || !Cmp00->hasOneUse())) return nullptr; // Not enough one-use instructions for the fold. // FIXME: this restriction could be relaxed if Cmp1 can be reused as one of @@ -1595,6 +1611,10 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, // FIXME: we shouldn't care about lanes that are 'undef' in the end? switch (Pred1) { case ICmpInst::Predicate::ICMP_SLT: + // The sext(icmp) case only is advantageous for SGT/SGTE since that enables + // max conversion + if (FoldSExtICmp) + return nullptr; break; case ICmpInst::Predicate::ICMP_SLE: // We'd have to increment C2 by one, and for that it must not have signed @@ -1644,6 +1664,11 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, if (!Precond2 || !match(Precond2, m_One())) return nullptr; + if (FoldSExtICmp) { + ReplacementHigh = Constant::getAllOnesValue(Sel1->getType()); + ReplacementLow = Constant::getNullValue(Sel1->getType()); + } + // If we are matching from a truncated input, we need to sext the // ReplacementLow and ReplacementHigh values. Only do the transform if they // are free to extend due to being constants. diff --git a/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-i1.ll b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-i1.ll new file mode 100644 index 0000000000000..062c7b477a44b --- /dev/null +++ b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-i1.ll @@ -0,0 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes='instcombine' -S | FileCheck %s + +; Given a pattern like: +; %old_cmp1 = icmp sgt i32 %x, C2 +; %old_replacement = sext i1 %old_cmp1 to i32 +; %old_cmp0 = icmp ult i32 %x, C0 +; %r = select i1 %old_cmp0, i32 %x, i32 %old_replacement +; it can be rewriten as more canonical pattern: +; %new_cmp2 = icmp sge i32 %x, C0 +; %new_clamped_low = smax i32 %target_low, i32 %x +; %r = select i1 %new_cmp2, i32 -1, i32 %new_clamped_low +; Iff 0 s<= C2 s<= C0 +; Also, ULT predicate can also be UGE; or UGT iff C0 != -1 (+invert result) +; Also, SLT predicate can also be SGE; or SGT iff C2 != INT_MAX (+invert res.) + +;------------------------------------------------------------------------------- + +; clamp-like max case, can be optimized with max +define i32 @clamp_max_sgt(i32 %x) { +; CHECK-LABEL: @clamp_max_sgt( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 255 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[X]], i32 0) +; CHECK-NEXT: [[COND3:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[TMP2]] +; CHECK-NEXT: ret i32 [[COND3]] +; + %or.cond = icmp ult i32 %x, 256 + %cmp2 = icmp sgt i32 %x, 0 + %cond = sext i1 %cmp2 to i32 + %cond3 = select i1 %or.cond, i32 %x, i32 %cond + ret i32 %cond3 +} + +; clamp-like max case with vector, can be optimized with max +define <2 x i32> @clamp_max_sgt_vec(<2 x i32> %x) { +; CHECK-LABEL: @clamp_max_sgt_vec( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[X]], <2 x i32> zeroinitializer) +; CHECK-NEXT: [[COND3:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> splat (i32 -1), <2 x i32> [[TMP2]] +; CHECK-NEXT: ret <2 x i32> [[COND3]] +; + %or.cond = icmp ult <2 x i32> %x, + %cmp2 = icmp sgt <2 x i32> %x, + %cond = sext <2 x i1> %cmp2 to <2 x i32> + %cond3 = select <2 x i1> %or.cond, <2 x i32> %x, <2 x i32> %cond + ret <2 x i32> %cond3 +} + +; Not clamp-like vector +define <2 x i32> @clamp_max_vec(<2 x i32> %x) { +; CHECK-LABEL: @clamp_max_vec( +; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt <2 x i32> [[X]], +; CHECK-NEXT: [[COND:%.*]] = sext <2 x i1> [[CMP2]] to <2 x i32> +; CHECK-NEXT: [[COND3:%.*]] = select <2 x i1> [[OR_COND]], <2 x i32> [[X]], <2 x i32> [[COND]] +; CHECK-NEXT: ret <2 x i32> [[COND3]] +; + %or.cond = icmp ult <2 x i32> %x, + %cmp2 = icmp sgt <2 x i32> %x, + %cond = sext <2 x i1> %cmp2 to <2 x i32> + %cond3 = select <2 x i1> %or.cond, <2 x i32> %x, <2 x i32> %cond + ret <2 x i32> %cond3 +} + +; clamp-like max case, can be optimized with max +define i32 @clamp_max_sgt_neg1(i32 %x) { +; CHECK-LABEL: @clamp_max_sgt_neg1( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 255 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[X]], i32 0) +; CHECK-NEXT: [[COND3:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[TMP2]] +; CHECK-NEXT: ret i32 [[COND3]] +; + %or.cond = icmp ult i32 %x, 256 + %cmp2 = icmp sgt i32 %x, -1 + %cond = sext i1 %cmp2 to i32 + %cond3 = select i1 %or.cond, i32 %x, i32 %cond + ret i32 %cond3 +} + +; clamp-like max case, can be optimized with max +define i32 @clamp_max_sge(i32 %x) { +; CHECK-LABEL: @clamp_max_sge( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 255 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[X]], i32 0) +; CHECK-NEXT: [[COND3:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[TMP2]] +; CHECK-NEXT: ret i32 [[COND3]] +; + %or.cond = icmp ult i32 %x, 256 + %cmp2 = icmp sge i32 %x, 0 + %cond = sext i1 %cmp2 to i32 + %cond3 = select i1 %or.cond, i32 %x, i32 %cond + ret i32 %cond3 +} + +; Don't support SLT cases, need to select 0 as the low value, -1 as high value +define i32 @clamp_max_slt(i32 %x) { +; CHECK-LABEL: @clamp_max_slt( +; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 256 +; CHECK-NEXT: [[COND:%.*]] = ashr i32 [[X]], 31 +; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]] +; CHECK-NEXT: ret i32 [[COND3]] +; + %or.cond = icmp ult i32 %x, 256 + %cmp2 = icmp slt i32 %x, 0 + %cond = sext i1 %cmp2 to i32 + %cond3 = select i1 %or.cond, i32 %x, i32 %cond + ret i32 %cond3 +} + +; Don't support SLE cases, need to select 0 as the low value, -1 as high value +define i32 @clamp_max_sle(i32 %x) { +; CHECK-LABEL: @clamp_max_sle( +; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 256 +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[X]], 1 +; CHECK-NEXT: [[COND:%.*]] = sext i1 [[CMP2]] to i32 +; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]] +; CHECK-NEXT: ret i32 [[COND3]] +; + %or.cond = icmp ult i32 %x, 256 + %cmp2 = icmp sle i32 %x, 0 + %cond = sext i1 %cmp2 to i32 + %cond3 = select i1 %or.cond, i32 %x, i32 %cond + ret i32 %cond3 +} + +; Not selecting between 0, x, and -1, so can't be optimized with max +; Select between 0, x, and 1 +define i32 @clamp_max_bad_values(i32 %x) { +; CHECK-LABEL: @clamp_max_bad_values( +; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 256 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], 0 +; CHECK-NEXT: [[COND:%.*]] = zext i1 [[CMP2]] to i32 +; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]] +; CHECK-NEXT: ret i32 [[COND3]] +; + %or.cond = icmp ult i32 %x, 256 + %cmp2 = icmp sgt i32 %x, 0 + %cond = zext i1 %cmp2 to i32 + %cond3 = select i1 %or.cond, i32 %x, i32 %cond + ret i32 %cond3 +} + +; Boundaries of range are not 0 and x (x is some positive integer) +define i32 @clamp_max_offset(i32 %x) { +; CHECK-LABEL: @clamp_max_offset( +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -10 +; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[TMP1]], 246 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], 10 +; CHECK-NEXT: [[COND:%.*]] = sext i1 [[CMP2]] to i32 +; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]] +; CHECK-NEXT: ret i32 [[COND3]] +; + %1 = add i32 %x, -10 + %or.cond = icmp ult i32 %1, 246 + %cmp2 = icmp sgt i32 %x, 10 + %cond = sext i1 %cmp2 to i32 + %cond3 = select i1 %or.cond, i32 %x, i32 %cond + ret i32 %cond3 +}