Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 59 additions & 49 deletions llvm/lib/Analysis/ValueTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1065,6 +1065,63 @@ void llvm::adjustKnownBitsForSelectArm(KnownBits &Known, Value *Cond,
Known = CondRes;
}

// Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
// Returns the input and lower/upper bounds.
static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
const APInt *&CLow, const APInt *&CHigh) {
assert(isa<Operator>(Select) &&
cast<Operator>(Select)->getOpcode() == Instruction::Select &&
"Input should be a Select!");

const Value *LHS = nullptr, *RHS = nullptr;
SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor;
if (SPF != SPF_SMAX && SPF != SPF_SMIN)
return false;

if (!match(RHS, m_APInt(CLow)))
return false;

const Value *LHS2 = nullptr, *RHS2 = nullptr;
SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor;
if (getInverseMinMaxFlavor(SPF) != SPF2)
return false;

if (!match(RHS2, m_APInt(CHigh)))
return false;

if (SPF == SPF_SMIN)
std::swap(CLow, CHigh);

In = LHS2;
return CLow->sle(*CHigh);
}

static bool isSignedMinMaxIntrinsicClamp(const IntrinsicInst *II,
const APInt *&CLow,
const APInt *&CHigh) {
assert((II->getIntrinsicID() == Intrinsic::smin ||
II->getIntrinsicID() == Intrinsic::smax) &&
"Must be smin/smax");

Intrinsic::ID InverseID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
auto *InnerII = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
if (!InnerII || InnerII->getIntrinsicID() != InverseID ||
!match(II->getArgOperand(1), m_APInt(CLow)) ||
!match(InnerII->getArgOperand(1), m_APInt(CHigh)))
return false;

if (II->getIntrinsicID() == Intrinsic::smin)
std::swap(CLow, CHigh);
return CLow->sle(*CHigh);
}

static void unionWithMinMaxIntrinsicClamp(const IntrinsicInst *II,
KnownBits &Known) {
const APInt *CLow, *CHigh;
if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
Known = Known.unionWith(ConstantRange(*CLow, *CHigh).toKnownBits());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Known = Known.unionWith(ConstantRange(*CLow, *CHigh).toKnownBits());
Known = Known.unionWith(ConstantRange(*CLow, *CHigh + 1).toKnownBits());

*CHigh should be included by ConstantRange.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It produces a wrong KnownBits result 00000000000????? for smax(smin(X, 32), 0).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is an counterexample: https://alive2.llvm.org/ce/z/ji-_Jq

; bin/opt -passes=instcombine test.ll -S
define i16 @test_0a(i16 %x) {
  %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 32)
  %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 0)
  %and = and i16 %2, 31
  ret i16 %and
}

Output:

define i16 @test_0a(i16 %x) {
  %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 32)
  %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 0)
  ret i16 %2
}

%and should not be eliminated.

Please add this negative test.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, my mistake. I also checked that this is ok if *CHigh is MaxSignedValue (in this case +1 loops to MinSignedValue and ConstantRange interprets this correctly). I added 3 more LIT tests: two to check that +1 is correct now and one to verify that MaxSignedValue is dealt with correctly.

}

static void computeKnownBitsFromOperator(const Operator *I,
const APInt &DemandedElts,
KnownBits &Known, unsigned Depth,
Expand Down Expand Up @@ -1804,11 +1861,13 @@ static void computeKnownBitsFromOperator(const Operator *I,
computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
Known = KnownBits::smin(Known, Known2);
unionWithMinMaxIntrinsicClamp(II, Known);
break;
case Intrinsic::smax:
computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
Known = KnownBits::smax(Known, Known2);
unionWithMinMaxIntrinsicClamp(II, Known);
break;
case Intrinsic::ptrmask: {
computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
Expand Down Expand Up @@ -3751,55 +3810,6 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2,
return false;
}

// Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
// Returns the input and lower/upper bounds.
static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
const APInt *&CLow, const APInt *&CHigh) {
assert(isa<Operator>(Select) &&
cast<Operator>(Select)->getOpcode() == Instruction::Select &&
"Input should be a Select!");

const Value *LHS = nullptr, *RHS = nullptr;
SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor;
if (SPF != SPF_SMAX && SPF != SPF_SMIN)
return false;

if (!match(RHS, m_APInt(CLow)))
return false;

const Value *LHS2 = nullptr, *RHS2 = nullptr;
SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor;
if (getInverseMinMaxFlavor(SPF) != SPF2)
return false;

if (!match(RHS2, m_APInt(CHigh)))
return false;

if (SPF == SPF_SMIN)
std::swap(CLow, CHigh);

In = LHS2;
return CLow->sle(*CHigh);
}

static bool isSignedMinMaxIntrinsicClamp(const IntrinsicInst *II,
const APInt *&CLow,
const APInt *&CHigh) {
assert((II->getIntrinsicID() == Intrinsic::smin ||
II->getIntrinsicID() == Intrinsic::smax) && "Must be smin/smax");

Intrinsic::ID InverseID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
auto *InnerII = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
if (!InnerII || InnerII->getIntrinsicID() != InverseID ||
!match(II->getArgOperand(1), m_APInt(CLow)) ||
!match(InnerII->getArgOperand(1), m_APInt(CHigh)))
return false;

if (II->getIntrinsicID() == Intrinsic::smin)
std::swap(CLow, CHigh);
return CLow->sle(*CHigh);
}

/// For vector constants, loop over the elements and find the constant with the
/// minimum number of sign bits. Return 0 if the value is not a vector constant
/// or if any element was not analyzed; otherwise, return the count for the
Expand Down
205 changes: 205 additions & 0 deletions llvm/test/Analysis/ValueTracking/knownbits-trunc-with-min-max-clamp.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=aggressive-instcombine -mtriple=x86_64 -S | FileCheck %s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
; RUN: opt < %s -passes=aggressive-instcombine -mtriple=x86_64 -S | FileCheck %s
; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s

Please add an appropriate data layout instead, something like target datalayout = "n:32:16:8" probably works.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added target datalayout = "n8:16:32", removed -mtriple.


; This LIT test checks if TruncInstCombine pass correctly recognizes the
; constraints from a signed min-max clamp. The clamp is a sequence of smin and
; smax instructions limiting a variable into a range, smin <= x <= smax.
;
; Each LIT test (except the last one) has two versions depending on the order
; of smin and smax:
; a) y = smax(smin(x, upper_limit), lower_limit)
; b) y = smin(smax(x, lower_limit), upper_limit)

define i8 @test_0a(i16 %x) {
; CHECK-LABEL: define i8 @test_0a(
; CHECK-SAME: i16 [[X:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 31)
; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0)
; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
; CHECK-NEXT: [[B:%.*]] = lshr i8 [[A]], 2
; CHECK-NEXT: ret i8 [[B]]
;
%1 = tail call i16 @llvm.smin.i16(i16 %x, i16 31)
%2 = tail call i16 @llvm.smax.i16(i16 %1, i16 0)
%a = sext i16 %2 to i32
%b = lshr i32 %a, 2
%b.trunc = trunc i32 %b to i8
ret i8 %b.trunc
}

define i8 @test_0b(i16 %x) {
; CHECK-LABEL: define i8 @test_0b(
; CHECK-SAME: i16 [[X:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 0)
; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 31)
; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
; CHECK-NEXT: [[B:%.*]] = lshr i8 [[A]], 2
; CHECK-NEXT: ret i8 [[B]]
;
%1 = tail call i16 @llvm.smax.i16(i16 %x, i16 0)
%2 = tail call i16 @llvm.smin.i16(i16 %1, i16 31)
%a = sext i16 %2 to i32
%b = lshr i32 %a, 2
%b.trunc = trunc i32 %b to i8
ret i8 %b.trunc
}

define i8 @test_1a(i16 %x) {
; CHECK-LABEL: define i8 @test_1a(
; CHECK-SAME: i16 [[X:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 31)
; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0)
; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
; CHECK-NEXT: [[B:%.*]] = add i8 [[A]], 2
; CHECK-NEXT: ret i8 [[B]]
;
%1 = tail call i16 @llvm.smin.i16(i16 %x, i16 31)
%2 = tail call i16 @llvm.smax.i16(i16 %1, i16 0)
%a = sext i16 %2 to i32
%b = add i32 %a, 2
%b.trunc = trunc i32 %b to i8
ret i8 %b.trunc
}

define i8 @test_1b(i16 %x) {
; CHECK-LABEL: define i8 @test_1b(
; CHECK-SAME: i16 [[X:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 0)
; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 31)
; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
; CHECK-NEXT: [[B:%.*]] = add i8 [[A]], 2
; CHECK-NEXT: ret i8 [[B]]
;
%1 = tail call i16 @llvm.smax.i16(i16 %x, i16 0)
%2 = tail call i16 @llvm.smin.i16(i16 %1, i16 31)
%a = sext i16 %2 to i32
%b = add i32 %a, 2
%b.trunc = trunc i32 %b to i8
ret i8 %b.trunc
}

define i8 @test_2a(i16 %x) {
; CHECK-LABEL: define i8 @test_2a(
; CHECK-SAME: i16 [[X:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 -1)
; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -31)
; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
; CHECK-NEXT: [[B:%.*]] = add i8 [[A]], 2
; CHECK-NEXT: ret i8 [[B]]
;
%1 = tail call i16 @llvm.smin.i16(i16 %x, i16 -1)
%2 = tail call i16 @llvm.smax.i16(i16 %1, i16 -31)
%a = sext i16 %2 to i32
%b = add i32 %a, 2
%b.trunc = trunc i32 %b to i8
ret i8 %b.trunc
}

define i8 @test_2b(i16 %x) {
; CHECK-LABEL: define i8 @test_2b(
; CHECK-SAME: i16 [[X:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 -31)
; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 -1)
; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
; CHECK-NEXT: [[B:%.*]] = add i8 [[A]], 2
; CHECK-NEXT: ret i8 [[B]]
;
%1 = tail call i16 @llvm.smax.i16(i16 %x, i16 -31)
%2 = tail call i16 @llvm.smin.i16(i16 %1, i16 -1)
%a = sext i16 %2 to i32
%b = add i32 %a, 2
%b.trunc = trunc i32 %b to i8
ret i8 %b.trunc
}

define i8 @test_3a(i16 %x) {
; CHECK-LABEL: define i8 @test_3a(
; CHECK-SAME: i16 [[X:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 31)
; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -31)
; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
; CHECK-NEXT: [[B:%.*]] = add i8 [[A]], 2
; CHECK-NEXT: ret i8 [[B]]
;
%1 = tail call i16 @llvm.smin.i16(i16 %x, i16 31)
%2 = tail call i16 @llvm.smax.i16(i16 %1, i16 -31)
%a = sext i16 %2 to i32
%b = add i32 %a, 2
%b.trunc = trunc i32 %b to i8
ret i8 %b.trunc
}

define i8 @test_3b(i16 %x) {
; CHECK-LABEL: define i8 @test_3b(
; CHECK-SAME: i16 [[X:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 -31)
; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 31)
; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
; CHECK-NEXT: [[B:%.*]] = add i8 [[A]], 2
; CHECK-NEXT: ret i8 [[B]]
;
%1 = tail call i16 @llvm.smax.i16(i16 %x, i16 -31)
%2 = tail call i16 @llvm.smin.i16(i16 %1, i16 31)
%a = sext i16 %2 to i32
%b = add i32 %a, 2
%b.trunc = trunc i32 %b to i8
ret i8 %b.trunc
}

define <16 x i8> @test_vec_1a(<16 x i16> %x) {
; CHECK-LABEL: define <16 x i8> @test_vec_1a(
; CHECK-SAME: <16 x i16> [[X:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> [[X]], <16 x i16> splat (i16 127))
; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> [[TMP1]], <16 x i16> zeroinitializer)
; CHECK-NEXT: [[A:%.*]] = trunc <16 x i16> [[TMP2]] to <16 x i8>
; CHECK-NEXT: [[B:%.*]] = add <16 x i8> [[A]], splat (i8 2)
; CHECK-NEXT: ret <16 x i8> [[B]]
;
%1 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %x, <16 x i16> splat (i16 127))
%2 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %1, <16 x i16> zeroinitializer)
%a = sext <16 x i16> %2 to <16 x i32>
%b = add <16 x i32> %a, splat (i32 2)
%b.trunc = trunc <16 x i32> %b to <16 x i8>
ret <16 x i8> %b.trunc
}

define <16 x i8> @test_vec_1b(<16 x i16> %x) {
; CHECK-LABEL: define <16 x i8> @test_vec_1b(
; CHECK-SAME: <16 x i16> [[X:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> [[X]], <16 x i16> zeroinitializer)
; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> [[TMP1]], <16 x i16> splat (i16 127))
; CHECK-NEXT: [[A:%.*]] = trunc <16 x i16> [[TMP2]] to <16 x i8>
; CHECK-NEXT: [[B:%.*]] = add <16 x i8> [[A]], splat (i8 2)
; CHECK-NEXT: ret <16 x i8> [[B]]
;
%1 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %x, <16 x i16> zeroinitializer)
%2 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %1, <16 x i16> splat (i16 127))
%a = sext <16 x i16> %2 to <16 x i32>
%b = add <16 x i32> %a, splat (i32 2)
%b.trunc = trunc <16 x i32> %b to <16 x i8>
ret <16 x i8> %b.trunc
}

define i8 @test_final(i16 %x, i16 %y) {
; CHECK-LABEL: define i8 @test_final(
; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 127)
; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0)
; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.smax.i16(i16 [[Y]], i16 0)
; CHECK-NEXT: [[TMP4:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP3]], i16 127)
; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i16 [[MUL]], 7
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i16 [[SHR]] to i8
; CHECK-NEXT: ret i8 [[TRUNC]]
;
%1 = tail call i16 @llvm.smin.i16(i16 %x, i16 127)
%2 = tail call i16 @llvm.smax.i16(i16 %1, i16 0)
%x.clamp = zext nneg i16 %2 to i32
%3 = tail call i16 @llvm.smax.i16(i16 %y, i16 0)
%4 = tail call i16 @llvm.smin.i16(i16 %3, i16 127)
%y.clamp = zext nneg i16 %4 to i32
%mul = mul nuw nsw i32 %x.clamp, %y.clamp
%shr = lshr i32 %mul, 7
%trunc= trunc nuw nsw i32 %shr to i8
ret i8 %trunc
}
Loading