Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,11 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner {
return isFreeToInvert(V, WillInvertAllUses, Unused);
}

/// If `Not` is true, returns true if V is a negative power of 2 or zero.
/// If `Not` is false, returns true if V is a Mask or zero.
bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q,
unsigned Depth = 0);

/// Given i1 V, can every user of V be freely adapted if V is changed to !V ?
/// InstCombine's freelyInvertAllUsersOf() must be kept in sync with this fn.
/// NOTE: for Instructions only!
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2780,6 +2780,12 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I))
return Res;

// Canonicalize (sub nuw Mask, X) -> (xor Mask, X)
if (I.hasNoUnsignedWrap() &&
isMaskOrZero(Op0, /*Not=*/false,
getSimplifyQuery().getWithInstruction(&I)))
return BinaryOperator::CreateXor(Op0, Op1);

return TryToNarrowDeduceFlags();
}

Expand Down
20 changes: 10 additions & 10 deletions llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4326,8 +4326,8 @@ Instruction *InstCombinerImpl::foldSelectICmp(CmpPredicate Pred, SelectInst *SI,
}

// Returns whether V is a Mask ((X + 1) & X == 0) or ~Mask (-Pow2OrZero)
static bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q,
unsigned Depth = 0) {
bool InstCombiner::isMaskOrZero(const Value *V, bool Not,
const SimplifyQuery &Q, unsigned Depth) {
if (Not ? match(V, m_NegatedPower2OrZero()) : match(V, m_LowBitMaskOrZero()))
return true;
if (V->getType()->getScalarSizeInBits() == 1)
Expand Down Expand Up @@ -4381,14 +4381,14 @@ static bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q,
case Instruction::Add:
// Pow2 - 1 is a Mask.
if (!Not && match(I->getOperand(1), m_AllOnes()))
return isKnownToBeAPowerOfTwo(I->getOperand(0), Q.DL, /*OrZero*/ true,
Depth, Q.AC, Q.CxtI, Q.DT);
return ::isKnownToBeAPowerOfTwo(I->getOperand(0), Q.DL, /*OrZero*/ true,
Depth, Q.AC, Q.CxtI, Q.DT);
break;
case Instruction::Sub:
// -Pow2 is a ~Mask.
if (Not && match(I->getOperand(0), m_Zero()))
return isKnownToBeAPowerOfTwo(I->getOperand(1), Q.DL, /*OrZero*/ true,
Depth, Q.AC, Q.CxtI, Q.DT);
return ::isKnownToBeAPowerOfTwo(I->getOperand(1), Q.DL, /*OrZero*/ true,
Depth, Q.AC, Q.CxtI, Q.DT);
break;
case Instruction::Call: {
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
Expand Down Expand Up @@ -4497,13 +4497,13 @@ static Value *foldICmpWithLowBitMaskedVal(CmpPredicate Pred, Value *Op0,
if (match(Op0, m_c_And(m_Specific(Op1), m_Value(M)))) {
X = Op1;
// Look for: x & Mask pred x
if (isMaskOrZero(M, /*Not=*/false, Q)) {
if (IC.isMaskOrZero(M, /*Not=*/false, Q)) {
return !ICmpInst::isSigned(Pred) ||
(match(M, m_NonNegative()) || isKnownNonNegative(M, Q));
}

// Look for: x & ~Mask pred ~Mask
if (isMaskOrZero(X, /*Not=*/true, Q)) {
if (IC.isMaskOrZero(X, /*Not=*/true, Q)) {
return !ICmpInst::isSigned(Pred) || isKnownNonZero(X, Q);
}
return false;
Expand All @@ -4513,7 +4513,7 @@ static Value *foldICmpWithLowBitMaskedVal(CmpPredicate Pred, Value *Op0,

auto Check = [&]() {
// Look for: ~x | Mask == -1
if (isMaskOrZero(M, /*Not=*/false, Q)) {
if (IC.isMaskOrZero(M, /*Not=*/false, Q)) {
if (Value *NotX =
IC.getFreelyInverted(X, X->hasOneUse(), &IC.Builder)) {
X = NotX;
Expand All @@ -4531,7 +4531,7 @@ static Value *foldICmpWithLowBitMaskedVal(CmpPredicate Pred, Value *Op0,
match(Op0, m_OneUse(m_And(m_Value(X), m_Value(M))))) {
auto Check = [&]() {
// Look for: x & ~Mask == 0
if (isMaskOrZero(M, /*Not=*/true, Q)) {
if (IC.isMaskOrZero(M, /*Not=*/true, Q)) {
if (Value *NotM =
IC.getFreelyInverted(M, M->hasOneUse(), &IC.Builder)) {
M = NotM;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/InstCombine/sub-xor.ll
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ define i32 @xor_dominating_cond(i32 %x) {
; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[X:%.*]], 256
; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[A:%.*]] = sub nuw nsw i32 255, [[X]]
; CHECK-NEXT: [[A:%.*]] = xor i32 [[X]], 255
; CHECK-NEXT: ret i32 [[A]]
; CHECK: if.end:
; CHECK-NEXT: ret i32 [[X]]
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/InstCombine/sub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2204,9 +2204,9 @@ define i8 @shrink_sub_from_constant_lowbits2(i8 %x) {

define <2 x i8> @shrink_sub_from_constant_lowbits3(<2 x i8> %x) {
; CHECK-LABEL: @shrink_sub_from_constant_lowbits3(
; CHECK-NEXT: [[X0000:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 4)
; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> splat (i8 24), [[X0000]]
; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i8> [[SUB]], splat (i8 3)
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 1)
; CHECK-NEXT: [[SUB:%.*]] = and <2 x i8> [[TMP1]], splat (i8 30)
; CHECK-NEXT: [[R:%.*]] = xor <2 x i8> [[SUB]], splat (i8 3)
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%x0000 = shl <2 x i8> %x, <i8 4, i8 4> ; 4 low bits are known zero
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ define <3 x i8> @shuf_add(<3 x i8> %x) {

define <3 x i8> @shuf_sub(<3 x i8> %x) {
; CHECK-LABEL: @shuf_sub(
; CHECK-NEXT: [[BO:%.*]] = sub nuw <3 x i8> <i8 1, i8 poison, i8 3>, [[X:%.*]]
; CHECK-NEXT: [[BO:%.*]] = xor <3 x i8> [[X:%.*]], <i8 1, i8 poison, i8 3>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> <i32 0, i32 poison, i32 2>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ define <3 x i8> @shuf_add(<3 x i8> %x) {

define <3 x i8> @shuf_sub(<3 x i8> %x) {
; CHECK-LABEL: @shuf_sub(
; CHECK-NEXT: [[BO:%.*]] = sub nuw <3 x i8> <i8 1, i8 poison, i8 3>, [[X:%.*]]
; CHECK-NEXT: [[BO:%.*]] = xor <3 x i8> [[X:%.*]], <i8 1, i8 poison, i8 3>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> <i32 0, i32 poison, i32 2>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
Expand Down
13 changes: 7 additions & 6 deletions llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,19 @@ define void @foo(ptr noalias noundef %0, ptr noalias noundef %1) optsize {
; CHECK-LABEL: define void @foo(
; CHECK-SAME: ptr noalias nocapture noundef readonly [[TMP0:%.*]], ptr noalias nocapture noundef writeonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -28
; CHECK-NEXT: br label [[TMP4:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[TMP4]] ]
; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 255, [[INDVARS_IV]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[TMP3]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = load <8 x i32>, ptr [[GEP]], align 4
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[TMP2]] ], [ [[VEC_IND_NEXT:%.*]], [[TMP4]] ]
; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[VEC_IND]], splat (i64 4294967295)
; CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i64> [[TMP6]], splat (i64 255)
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], <8 x i64> [[TMP3]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = tail call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP7]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison)
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER]], splat (i32 5)
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP10]], align 4
; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP10]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 8
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 256
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[MIDDLE_BLOCK:%.*]], label [[TMP4]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -141,14 +141,14 @@ define i32 @test_loop_idiom_recogize(i32 %x, i32 %y, ptr %lam, ptr %alp) nounwin
; CHECK-NEXT: Classifying expressions for: @test_loop_idiom_recogize
; CHECK-NEXT: %indvar = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%bb1> U: [0,256) S: [0,256) Exits: 255 LoopDispositions: { %bb1: Computable }
; CHECK-NEXT: %i.0.reg2mem.0 = sub nuw nsw i32 255, %indvar
; CHECK-NEXT: --> {255,+,-1}<nsw><%bb1> U: [0,256) S: [0,256) Exits: 0 LoopDispositions: { %bb1: Computable }
; CHECK-NEXT: %i.0.reg2mem.0 = xor i32 %indvar, 255
; CHECK-NEXT: --> %i.0.reg2mem.0 U: [0,-2147483648) S: [0,-2147483648) Exits: 0 LoopDispositions: { %bb1: Variant }
; CHECK-NEXT: %0 = getelementptr i32, ptr %alp, i32 %i.0.reg2mem.0
; CHECK-NEXT: --> {(1020 + %alp),+,-4}<nw><%bb1> U: full-set S: full-set Exits: %alp LoopDispositions: { %bb1: Computable }
; CHECK-NEXT: --> ((4 * %i.0.reg2mem.0) + %alp) U: full-set S: full-set Exits: %alp LoopDispositions: { %bb1: Variant }
; CHECK-NEXT: %1 = load i32, ptr %0, align 4
; CHECK-NEXT: --> %1 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb1: Variant }
; CHECK-NEXT: %2 = getelementptr i32, ptr %lam, i32 %i.0.reg2mem.0
; CHECK-NEXT: --> {(1020 + %lam),+,-4}<nw><%bb1> U: full-set S: full-set Exits: %lam LoopDispositions: { %bb1: Computable }
; CHECK-NEXT: --> ((4 * %i.0.reg2mem.0) + %lam) U: full-set S: full-set Exits: %lam LoopDispositions: { %bb1: Variant }
; CHECK-NEXT: %indvar.next = add nuw nsw i32 %indvar, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%bb1> U: [1,257) S: [1,257) Exits: 256 LoopDispositions: { %bb1: Computable }
; CHECK-NEXT: %tmp10 = mul i32 %x, 255
Expand Down
Loading