diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h index fa6b60cba15aa..1dae2c82dae2e 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -236,6 +236,11 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner { return isFreeToInvert(V, WillInvertAllUses, Unused); } + /// If `Not` is true, returns true if V is a negative power of 2 or zero. + /// If `Not` is false, returns true if V is a Mask or zero. + bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q, + unsigned Depth = 0); + /// Given i1 V, can every user of V be freely adapted if V is changed to !V ? /// InstCombine's freelyInvertAllUsersOf() must be kept in sync with this fn. /// NOTE: for Instructions only! diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 73876d00e73a7..d91437e8aec9e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2780,6 +2780,12 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I)) return Res; + // Canonicalize (sub nuw Mask, X) -> (xor Mask, X) + if (I.hasNoUnsignedWrap() && + isMaskOrZero(Op0, /*Not=*/false, + getSimplifyQuery().getWithInstruction(&I))) + return BinaryOperator::CreateXor(Op0, Op1); + return TryToNarrowDeduceFlags(); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 2e45725759949..bc3db5b065df4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -4326,8 +4326,8 @@ Instruction *InstCombinerImpl::foldSelectICmp(CmpPredicate Pred, SelectInst *SI, } // Returns whether V is a Mask ((X + 1) & X == 0) or ~Mask (-Pow2OrZero) -static bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q, - unsigned Depth = 0) { +bool InstCombiner::isMaskOrZero(const Value *V, bool Not, + const SimplifyQuery &Q, unsigned Depth) { if (Not ? match(V, m_NegatedPower2OrZero()) : match(V, m_LowBitMaskOrZero())) return true; if (V->getType()->getScalarSizeInBits() == 1) @@ -4381,14 +4381,14 @@ static bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q, case Instruction::Add: // Pow2 - 1 is a Mask. if (!Not && match(I->getOperand(1), m_AllOnes())) - return isKnownToBeAPowerOfTwo(I->getOperand(0), Q.DL, /*OrZero*/ true, - Depth, Q.AC, Q.CxtI, Q.DT); + return ::isKnownToBeAPowerOfTwo(I->getOperand(0), Q.DL, /*OrZero*/ true, + Depth, Q.AC, Q.CxtI, Q.DT); break; case Instruction::Sub: // -Pow2 is a ~Mask. if (Not && match(I->getOperand(0), m_Zero())) - return isKnownToBeAPowerOfTwo(I->getOperand(1), Q.DL, /*OrZero*/ true, - Depth, Q.AC, Q.CxtI, Q.DT); + return ::isKnownToBeAPowerOfTwo(I->getOperand(1), Q.DL, /*OrZero*/ true, + Depth, Q.AC, Q.CxtI, Q.DT); break; case Instruction::Call: { if (auto *II = dyn_cast(I)) { @@ -4497,13 +4497,13 @@ static Value *foldICmpWithLowBitMaskedVal(CmpPredicate Pred, Value *Op0, if (match(Op0, m_c_And(m_Specific(Op1), m_Value(M)))) { X = Op1; // Look for: x & Mask pred x - if (isMaskOrZero(M, /*Not=*/false, Q)) { + if (IC.isMaskOrZero(M, /*Not=*/false, Q)) { return !ICmpInst::isSigned(Pred) || (match(M, m_NonNegative()) || isKnownNonNegative(M, Q)); } // Look for: x & ~Mask pred ~Mask - if (isMaskOrZero(X, /*Not=*/true, Q)) { + if (IC.isMaskOrZero(X, /*Not=*/true, Q)) { return !ICmpInst::isSigned(Pred) || isKnownNonZero(X, Q); } return false; @@ -4513,7 +4513,7 @@ static Value *foldICmpWithLowBitMaskedVal(CmpPredicate Pred, Value *Op0, auto Check = [&]() { // Look for: ~x | Mask == -1 - if (isMaskOrZero(M, /*Not=*/false, Q)) { + if (IC.isMaskOrZero(M, /*Not=*/false, Q)) { if (Value *NotX = IC.getFreelyInverted(X, X->hasOneUse(), &IC.Builder)) { X = NotX; @@ -4531,7 +4531,7 @@ static Value *foldICmpWithLowBitMaskedVal(CmpPredicate Pred, Value *Op0, match(Op0, m_OneUse(m_And(m_Value(X), m_Value(M))))) { auto Check = [&]() { // Look for: x & ~Mask == 0 - if (isMaskOrZero(M, /*Not=*/true, Q)) { + if (IC.isMaskOrZero(M, /*Not=*/true, Q)) { if (Value *NotM = IC.getFreelyInverted(M, M->hasOneUse(), &IC.Builder)) { M = NotM; diff --git a/llvm/test/Transforms/InstCombine/sub-xor.ll b/llvm/test/Transforms/InstCombine/sub-xor.ll index a4135e0b51453..180de6d2f8828 100644 --- a/llvm/test/Transforms/InstCombine/sub-xor.ll +++ b/llvm/test/Transforms/InstCombine/sub-xor.ll @@ -166,7 +166,7 @@ define i32 @xor_dominating_cond(i32 %x) { ; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[X:%.*]], 256 ; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[A:%.*]] = sub nuw nsw i32 255, [[X]] +; CHECK-NEXT: [[A:%.*]] = xor i32 [[X]], 255 ; CHECK-NEXT: ret i32 [[A]] ; CHECK: if.end: ; CHECK-NEXT: ret i32 [[X]] diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index e89419d1f3838..ebda52a135495 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -2204,9 +2204,9 @@ define i8 @shrink_sub_from_constant_lowbits2(i8 %x) { define <2 x i8> @shrink_sub_from_constant_lowbits3(<2 x i8> %x) { ; CHECK-LABEL: @shrink_sub_from_constant_lowbits3( -; CHECK-NEXT: [[X0000:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 4) -; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> splat (i8 24), [[X0000]] -; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i8> [[SUB]], splat (i8 3) +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 1) +; CHECK-NEXT: [[SUB:%.*]] = and <2 x i8> [[TMP1]], splat (i8 30) +; CHECK-NEXT: [[R:%.*]] = xor <2 x i8> [[SUB]], splat (i8 3) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %x0000 = shl <2 x i8> %x, ; 4 low bits are known zero diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll index a240dfe7d271a..8d0e340b77a10 100644 --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll @@ -215,7 +215,7 @@ define <3 x i8> @shuf_add(<3 x i8> %x) { define <3 x i8> @shuf_sub(<3 x i8> %x) { ; CHECK-LABEL: @shuf_sub( -; CHECK-NEXT: [[BO:%.*]] = sub nuw <3 x i8> , [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = xor <3 x i8> [[X:%.*]], ; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll index ee7ef9955e643..f9f7017f1bd70 100644 --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -218,7 +218,7 @@ define <3 x i8> @shuf_add(<3 x i8> %x) { define <3 x i8> @shuf_sub(<3 x i8> %x) { ; CHECK-LABEL: @shuf_sub( -; CHECK-NEXT: [[BO:%.*]] = sub nuw <3 x i8> , [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = xor <3 x i8> [[X:%.*]], ; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll index b3625094f07ea..a6c66ff40d8ac 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll @@ -8,18 +8,19 @@ define void @foo(ptr noalias noundef %0, ptr noalias noundef %1) optsize { ; CHECK-LABEL: define void @foo( ; CHECK-SAME: ptr noalias nocapture noundef readonly [[TMP0:%.*]], ptr noalias nocapture noundef writeonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -28 ; CHECK-NEXT: br label [[TMP4:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[TMP4]] ] -; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 255, [[INDVARS_IV]] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[TMP3]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = load <8 x i32>, ptr [[GEP]], align 4 +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[TMP2]] ], [ [[VEC_IND_NEXT:%.*]], [[TMP4]] ] +; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[VEC_IND]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i64> [[TMP6]], splat (i64 255) +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = tail call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP7]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison) ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER]], splat (i32 5) -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP10]], align 4 +; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP10]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 256 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[MIDDLE_BLOCK:%.*]], label [[TMP4]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll b/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll index 4a19940b6f874..181669165220e 100644 --- a/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll +++ b/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll @@ -141,14 +141,14 @@ define i32 @test_loop_idiom_recogize(i32 %x, i32 %y, ptr %lam, ptr %alp) nounwin ; CHECK-NEXT: Classifying expressions for: @test_loop_idiom_recogize ; CHECK-NEXT: %indvar = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ] ; CHECK-NEXT: --> {0,+,1}<%bb1> U: [0,256) S: [0,256) Exits: 255 LoopDispositions: { %bb1: Computable } -; CHECK-NEXT: %i.0.reg2mem.0 = sub nuw nsw i32 255, %indvar -; CHECK-NEXT: --> {255,+,-1}<%bb1> U: [0,256) S: [0,256) Exits: 0 LoopDispositions: { %bb1: Computable } +; CHECK-NEXT: %i.0.reg2mem.0 = xor i32 %indvar, 255 +; CHECK-NEXT: --> %i.0.reg2mem.0 U: [0,-2147483648) S: [0,-2147483648) Exits: 0 LoopDispositions: { %bb1: Variant } ; CHECK-NEXT: %0 = getelementptr i32, ptr %alp, i32 %i.0.reg2mem.0 -; CHECK-NEXT: --> {(1020 + %alp),+,-4}<%bb1> U: full-set S: full-set Exits: %alp LoopDispositions: { %bb1: Computable } +; CHECK-NEXT: --> ((4 * %i.0.reg2mem.0) + %alp) U: full-set S: full-set Exits: %alp LoopDispositions: { %bb1: Variant } ; CHECK-NEXT: %1 = load i32, ptr %0, align 4 ; CHECK-NEXT: --> %1 U: full-set S: full-set Exits: <> LoopDispositions: { %bb1: Variant } ; CHECK-NEXT: %2 = getelementptr i32, ptr %lam, i32 %i.0.reg2mem.0 -; CHECK-NEXT: --> {(1020 + %lam),+,-4}<%bb1> U: full-set S: full-set Exits: %lam LoopDispositions: { %bb1: Computable } +; CHECK-NEXT: --> ((4 * %i.0.reg2mem.0) + %lam) U: full-set S: full-set Exits: %lam LoopDispositions: { %bb1: Variant } ; CHECK-NEXT: %indvar.next = add nuw nsw i32 %indvar, 1 ; CHECK-NEXT: --> {1,+,1}<%bb1> U: [1,257) S: [1,257) Exits: 256 LoopDispositions: { %bb1: Computable } ; CHECK-NEXT: %tmp10 = mul i32 %x, 255