Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,59 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
return nullptr;
}

/// Try to fold a select to a min/max intrinsic. Many cases are already handled
/// by matchDecomposedSelectPattern but here we handle the cases where more
/// exensive modification of the IR is required.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extensive?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

static Value *foldSelectICmpMinMax(const ICmpInst *Cmp, Value *TVal,
Value *FVal,
InstCombiner::BuilderTy &Builder,
const SimplifyQuery &SQ) {
const Value *CmpLHS = Cmp->getOperand(0);
const Value *CmpRHS = Cmp->getOperand(1);
ICmpInst::Predicate Pred = Cmp->getPredicate();

// (X > Y) ? X : (Y - 1) ==> MIN(X, Y - 1)
// (X < Y) ? X : (Y + 1) ==> MAX(X, Y + 1)
// This transformation is valid when overflow corresponding to the sign of
// the comparison is poison and we must drop the non-matching overflow flag.
if (CmpRHS == TVal) {
std::swap(CmpLHS, CmpRHS);
Pred = CmpInst::getSwappedPredicate(Pred);
}

// TODO: consider handeling 'or disjoint' as well, though these would need to
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// TODO: consider handeling 'or disjoint' as well, though these would need to
// TODO: consider handling 'or disjoint' as well, though these would need to

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

// be converted to 'add' instructions.
if (CmpLHS == TVal && isa<Instruction>(FVal)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Early exit?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good. Updated.

if (Pred == CmpInst::ICMP_SGT &&
match(FVal, m_NSWAdd(m_Specific(CmpRHS), m_One()))) {
cast<Instruction>(FVal)->setHasNoUnsignedWrap(false);
return Builder.CreateBinaryIntrinsic(Intrinsic::smax, TVal, FVal);
}

if (Pred == CmpInst::ICMP_SLT &&
match(FVal, m_NSWAdd(m_Specific(CmpRHS), m_AllOnes()))) {
cast<Instruction>(FVal)->setHasNoUnsignedWrap(false);
return Builder.CreateBinaryIntrinsic(Intrinsic::smin, TVal, FVal);
}

if (Pred == CmpInst::ICMP_UGT &&
match(FVal, m_NUWAdd(m_Specific(CmpRHS), m_One()))) {
cast<Instruction>(FVal)->setHasNoSignedWrap(false);
return Builder.CreateBinaryIntrinsic(Intrinsic::umax, TVal, FVal);
}

// Note: We must use isKnownNonZero here because "sub nuw %x, 1" will be
// canonicalize to "add %x, -1" discarding the nuw flag.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// canonicalize to "add %x, -1" discarding the nuw flag.
// canonicalized to "add %x, -1" discarding the nuw flag.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

if (Pred == CmpInst::ICMP_ULT &&
match(FVal, m_Add(m_Specific(CmpRHS), m_AllOnes())) &&
isKnownNonZero(CmpRHS, SQ)) {
cast<Instruction>(FVal)->setHasNoSignedWrap(false);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case the nuw flag is also invalid: https://alive2.llvm.org/ce/z/ZpSaKv

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated. In practice, I'm not sure we could ever reach this point in that case, at least I was not able to construct a test. But I agree it is good to stay on the safe side and not rely on unrelated optimizations to prevent a mis-compilation here.

return Builder.CreateBinaryIntrinsic(Intrinsic::umin, TVal, FVal);
}
}
return nullptr;
}

/// We want to turn:
/// (select (icmp eq (and X, Y), 0), (and (lshr X, Z), 1), 1)
/// into:
Expand Down Expand Up @@ -1917,6 +1970,9 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
return &SI;
}

if (Value *V = foldSelectICmpMinMax(ICI, TrueVal, FalseVal, Builder, SQ))
return replaceInstUsesWith(SI, V);

if (Instruction *V =
foldSelectICmpAndAnd(SI.getType(), ICI, TrueVal, FalseVal, Builder))
return V;
Expand Down
244 changes: 244 additions & 0 deletions llvm/test/Transforms/InstCombine/minmax-fold.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1598,3 +1598,247 @@ define <2 x i32> @test_umax_smax_vec_neg(<2 x i32> %x) {
%umax = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %smax, <2 x i32> <i32 1, i32 10>)
ret <2 x i32> %umax
}

define i32 @test_smin_sub1_nsw(i32 %x, i32 %w) {
; CHECK-LABEL: @test_smin_sub1_nsw(
; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[W:%.*]], -1
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[SUB]])
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp slt i32 %x, %w
%sub = add nsw i32 %w, -1
%r = select i1 %cmp, i32 %x, i32 %sub
ret i32 %r
}

define i32 @test_smax_add1_nsw(i32 %x, i32 %w) {
; CHECK-LABEL: @test_smax_add1_nsw(
; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[W:%.*]], 1
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[X2]])
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp sgt i32 %x, %w
%add = add nsw i32 %w, 1
%r = select i1 %cmp, i32 %x, i32 %add
ret i32 %r
}

define i32 @test_umax_add1_nuw(i32 %x, i32 %w) {
; CHECK-LABEL: @test_umax_add1_nuw(
; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[W:%.*]], 1
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[ADD]])
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp ugt i32 %x, %w
%add = add nuw i32 %w, 1
%r = select i1 %cmp, i32 %x, i32 %add
ret i32 %r
}

define i32 @test_umin_sub1_nuw(i32 %x, i32 range(i32 1, 0) %w) {
; CHECK-LABEL: @test_umin_sub1_nuw(
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[W:%.*]], -1
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 [[SUB]])
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp ult i32 %x, %w
%sub = add i32 %w, -1
%r = select i1 %cmp, i32 %x, i32 %sub
ret i32 %r
}

define i32 @test_smin_sub1_nsw_swapped(i32 %x, i32 %w) {
; CHECK-LABEL: @test_smin_sub1_nsw_swapped(
; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[W:%.*]], -1
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[SUB]])
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp sgt i32 %w, %x
%sub = add nsw i32 %w, -1
%r = select i1 %cmp, i32 %x, i32 %sub
ret i32 %r
}

define i32 @test_smax_add1_nsw_swapped(i32 %x, i32 %w) {
; CHECK-LABEL: @test_smax_add1_nsw_swapped(
; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[W:%.*]], 1
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[X2]])
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp slt i32 %w, %x
%add = add nsw i32 %w, 1
%r = select i1 %cmp, i32 %x, i32 %add
ret i32 %r
}

define i32 @test_umax_add1_nuw_swapped(i32 %x, i32 %w) {
; CHECK-LABEL: @test_umax_add1_nuw_swapped(
; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[W:%.*]], 1
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[ADD]])
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp ult i32 %w, %x
%add = add nuw i32 %w, 1
%r = select i1 %cmp, i32 %x, i32 %add
ret i32 %r
}

define i32 @test_umin_sub1_nuw_swapped(i32 %x, i32 range(i32 1, 0) %w) {
; CHECK-LABEL: @test_umin_sub1_nuw_swapped(
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[W:%.*]], -1
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 [[SUB]])
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp ugt i32 %w, %x
%sub = add i32 %w, -1
%r = select i1 %cmp, i32 %x, i32 %sub
ret i32 %r
}

define <2 x i16> @test_smin_sub1_nsw_vec(<2 x i16> %x, <2 x i16> %w) {
; CHECK-LABEL: @test_smin_sub1_nsw_vec(
; CHECK-NEXT: [[SUB:%.*]] = add nsw <2 x i16> [[W:%.*]], splat (i16 -1)
; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[SUB]])
; CHECK-NEXT: ret <2 x i16> [[R]]
;
%cmp = icmp slt <2 x i16> %x, %w
%sub = add nsw <2 x i16> %w, splat (i16 -1)
%r = select <2 x i1> %cmp, <2 x i16> %x, <2 x i16> %sub
ret <2 x i16> %r
}

define <2 x i16> @test_smax_add1_nsw_vec(<2 x i16> %x, <2 x i16> %w) {
; CHECK-LABEL: @test_smax_add1_nsw_vec(
; CHECK-NEXT: [[ADD:%.*]] = add nsw <2 x i16> [[W:%.*]], splat (i16 1)
; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[ADD]])
; CHECK-NEXT: ret <2 x i16> [[R]]
;
%cmp = icmp sgt <2 x i16> %x, %w
%add = add nsw <2 x i16> %w, splat (i16 1)
%r = select <2 x i1> %cmp, <2 x i16> %x, <2 x i16> %add
ret <2 x i16> %r
}

define <2 x i16> @test_umax_add1_nuw_vec(<2 x i16> %x, <2 x i16> %w) {
; CHECK-LABEL: @test_umax_add1_nuw_vec(
; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i16> [[W:%.*]], splat (i16 1)
; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[ADD]])
; CHECK-NEXT: ret <2 x i16> [[R]]
;
%cmp = icmp ugt <2 x i16> %x, %w
%add = add nuw <2 x i16> %w, splat (i16 1)
%r = select <2 x i1> %cmp, <2 x i16> %x, <2 x i16> %add
ret <2 x i16> %r
}

define <2 x i16> @test_umin_sub1_nuw_vec(<2 x i16> %x, <2 x i16> range(i16 1, 0) %w) {
; CHECK-LABEL: @test_umin_sub1_nuw_vec(
; CHECK-NEXT: [[SUB:%.*]] = add <2 x i16> [[W:%.*]], splat (i16 -1)
; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[SUB]])
; CHECK-NEXT: ret <2 x i16> [[R]]
;
%cmp = icmp ult <2 x i16> %x, %w
%sub = add <2 x i16> %w, splat (i16 -1)
%r = select <2 x i1> %cmp, <2 x i16> %x, <2 x i16> %sub
ret <2 x i16> %r
}


define i32 @test_smin_sub1_nsw_drop_flags(i32 %x, i32 %w) {
; CHECK-LABEL: @test_smin_sub1_nsw_drop_flags(
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], [[W:%.*]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i32 [[X]], i32 -1
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp slt i32 %x, %w
%sub = add nsw nuw i32 %w, -1
%r = select i1 %cmp, i32 %x, i32 %sub
ret i32 %r
}

define i32 @test_smax_add1_nsw_drop_flags(i32 %x, i32 %w) {
; CHECK-LABEL: @test_smax_add1_nsw_drop_flags(
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[W:%.*]], 1
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[ADD]])
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp sgt i32 %x, %w
%add = add nsw nuw i32 %w, 1
%r = select i1 %cmp, i32 %x, i32 %add
ret i32 %r
}

define i32 @test_umax_add1_nuw_drop_flags(i32 %x, i32 %w) {
; CHECK-LABEL: @test_umax_add1_nuw_drop_flags(
; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[W:%.*]], 1
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[ADD]])
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp ugt i32 %x, %w
%add = add nuw nsw i32 %w, 1
%r = select i1 %cmp, i32 %x, i32 %add
ret i32 %r
}

define i32 @test_umin_sub1_nuw_drop_flags(i32 %x, i32 range(i32 1, 0) %w) {
; CHECK-LABEL: @test_umin_sub1_nuw_drop_flags(
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[W:%.*]], -1
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 [[SUB]])
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp ult i32 %x, %w
%sub = add nsw i32 %w, -1
%r = select i1 %cmp, i32 %x, i32 %sub
ret i32 %r
}

;; Confirm we don't crash on these cases.
define i32 @test_smin_or_neg1_nsw(i32 %x, i32 %w) {
; CHECK-LABEL: @test_smin_or_neg1_nsw(
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], [[W:%.*]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i32 [[X]], i32 -1
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp slt i32 %x, %w
%sub = or disjoint i32 %w, -1
%r = select i1 %cmp, i32 %x, i32 %sub
ret i32 %r
}

define i32 @test_smax_or_1_nsw(i32 %x, i32 %w) {
; CHECK-LABEL: @test_smax_or_1_nsw(
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[W:%.*]]
; CHECK-NEXT: [[ADD:%.*]] = or disjoint i32 [[W]], 1
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[ADD]]
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp sgt i32 %x, %w
%add = or disjoint i32 %w, 1
%r = select i1 %cmp, i32 %x, i32 %add
ret i32 %r
}

define i32 @test_umax_or_1_nuw(i32 %x, i32 %w) {
; CHECK-LABEL: @test_umax_or_1_nuw(
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], [[W:%.*]]
; CHECK-NEXT: [[ADD:%.*]] = or disjoint i32 [[W]], 1
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[ADD]]
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp ugt i32 %x, %w
%add = or disjoint i32 %w, 1
%r = select i1 %cmp, i32 %x, i32 %add
ret i32 %r
}

define i32 @test_umin_or_neg1_nuw(i32 %x, i32 range(i32 1, 0) %w) {
; CHECK-LABEL: @test_umin_or_neg1_nuw(
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], [[W:%.*]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i32 [[X]], i32 -1
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp ult i32 %x, %w
%sub = or disjoint i32 %w, -1
%r = select i1 %cmp, i32 %x, i32 %sub
ret i32 %r
}