diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 8c8cc0859e4af..72616a85baf15 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3590,6 +3590,25 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } break; } + case Intrinsic::experimental_vp_reverse: { + Value *X; + Value *Vec = II->getArgOperand(0); + Value *Mask = II->getArgOperand(1); + if (!match(Mask, m_AllOnes())) + break; + Value *EVL = II->getArgOperand(2); + // rev(unop rev(X)) --> unop X + if (match(Vec, + m_OneUse(m_UnOp(m_Intrinsic( + m_Value(X), m_AllOnes(), m_Specific(EVL)))))) { + auto *OldUnOp = cast(Vec); + auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags( + OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(), + II->getIterator()); + return replaceInstUsesWith(CI, NewUnOp); + } + break; + } case Intrinsic::vector_reduce_or: case Intrinsic::vector_reduce_and: { // Canonicalize logical or/and reductions: diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 4fe900e9421f8..b51e059032b6c 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2231,6 +2231,39 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2))))) return createBinOpReverse(LHS, V2); + auto createBinOpVPReverse = [&](Value *X, Value *Y, Value *EVL) { + Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName()); + if (auto *BO = dyn_cast(V)) + BO->copyIRFlags(&Inst); + + ElementCount EC = cast(V->getType())->getElementCount(); + Value *AllTrueMask = Builder.CreateVectorSplat(EC, Builder.getTrue()); + Module *M = Inst.getModule(); + Function *F = Intrinsic::getOrInsertDeclaration( + M, Intrinsic::experimental_vp_reverse, V->getType()); + return CallInst::Create(F, {V, AllTrueMask, EVL}); + }; + + Value *EVL; + if (match(LHS, m_Intrinsic( + m_Value(V1), m_AllOnes(), m_Value(EVL)))) { + // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2)) + if (match(RHS, m_Intrinsic( + m_Value(V2), m_AllOnes(), m_Specific(EVL))) && + (LHS->hasOneUse() || RHS->hasOneUse() || + (LHS == RHS && LHS->hasNUses(2)))) + return createBinOpVPReverse(V1, V2, EVL); + + // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat)) + if (LHS->hasOneUse() && isSplatValue(RHS)) + return createBinOpVPReverse(V1, RHS, EVL); + } + // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2)) + else if (isSplatValue(LHS) && + match(RHS, m_Intrinsic( + m_Value(V2), m_AllOnes(), m_Value(EVL)))) + return createBinOpVPReverse(LHS, V2, EVL); + // It may not be safe to reorder shuffles and things like div, urem, etc. // because we may trap when executing those ops on unknown vector elements. // See PR20059. diff --git a/llvm/test/Transforms/InstCombine/vp-reverse.ll b/llvm/test/Transforms/InstCombine/vp-reverse.ll index 79e6c47bdf1b2..540b57da3475b 100644 --- a/llvm/test/Transforms/InstCombine/vp-reverse.ll +++ b/llvm/test/Transforms/InstCombine/vp-reverse.ll @@ -3,11 +3,8 @@ define @binop_reverse_elim( %a, %b, i32 %evl) { ; CHECK-LABEL: @binop_reverse_elim( -; CHECK-NEXT: [[A:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[A1:%.*]], splat (i1 true), i32 [[EVL:%.*]]) -; CHECK-NEXT: [[B:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[B1:%.*]], splat (i1 true), i32 [[EVL]]) -; CHECK-NEXT: [[ADD1:%.*]] = add nsw [[A]], [[B]] -; CHECK-NEXT: [[ADD_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[ADD1]], splat (i1 true), i32 [[EVL]]) -; CHECK-NEXT: ret [[ADD_REV]] +; CHECK-NEXT: [[ADD1:%.*]] = add nsw [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret [[ADD1]] ; %a.rev = tail call @llvm.experimental.vp.reverse( %a, splat (i1 true), i32 %evl) %b.rev = tail call @llvm.experimental.vp.reverse( %b, splat (i1 true), i32 %evl) @@ -16,8 +13,10 @@ define @binop_reverse_elim( %a, %add.rev } -define @binop_reverse_elim2( %a, %b, %m, i32 %evl) { -; CHECK-LABEL: @binop_reverse_elim2( +; Negative test - the mask needs to be reversed between the inner and +; the outer to be correct. +define @binop_reverse_elim_samemask( %a, %b, %m, i32 %evl) { +; CHECK-LABEL: @binop_reverse_elim_samemask( ; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[A:%.*]], [[M:%.*]], i32 [[EVL:%.*]]) ; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[B:%.*]], [[M]], i32 [[EVL]]) ; CHECK-NEXT: [[ADD:%.*]] = add nsw [[A_REV]], [[B_REV]] @@ -48,10 +47,9 @@ define @binop_reverse_elim_diffmask( %a, @binop_reverse_elim_diffevl( %a, %b, i32 %evl) { ; CHECK-LABEL: @binop_reverse_elim_diffevl( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[A:%.*]], splat (i1 true), i32 [[EVL:%.*]]) -; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[B:%.*]], splat (i1 true), i32 [[EVL]]) -; CHECK-NEXT: [[ADD:%.*]] = add nsw [[A_REV]], [[B_REV]] -; CHECK-NEXT: [[ADD_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[ADD]], splat (i1 true), i32 10) +; CHECK-NEXT: [[ADD:%.*]] = add nsw [[A_REV:%.*]], [[B_REV:%.*]] +; CHECK-NEXT: [[ADD1:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[ADD]], splat (i1 true), i32 [[EVL:%.*]]) +; CHECK-NEXT: [[ADD_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[ADD1]], splat (i1 true), i32 10) ; CHECK-NEXT: ret [[ADD_REV]] ; %a.rev = tail call @llvm.experimental.vp.reverse( %a, splat (i1 true), i32 %evl) @@ -63,10 +61,8 @@ define @binop_reverse_elim_diffevl( %a, @binop_reverse_splat_elim( %a, i32 %evl) { ; CHECK-LABEL: @binop_reverse_splat_elim( -; CHECK-NEXT: [[A:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[A1:%.*]], splat (i1 true), i32 [[EVL:%.*]]) -; CHECK-NEXT: [[ADD1:%.*]] = add nsw [[A]], splat (i32 22) -; CHECK-NEXT: [[ADD_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[ADD1]], splat (i1 true), i32 [[EVL]]) -; CHECK-NEXT: ret [[ADD_REV]] +; CHECK-NEXT: [[ADD1:%.*]] = add nsw [[A:%.*]], splat (i32 22) +; CHECK-NEXT: ret [[ADD1]] ; %a.rev = tail call @llvm.experimental.vp.reverse( %a, splat (i1 true), i32 %evl) %add = add nsw %a.rev, splat (i32 22) @@ -76,10 +72,8 @@ define @binop_reverse_splat_elim( %a, i32 % define @binop_reverse_splat_elim2( %a, i32 %evl) { ; CHECK-LABEL: @binop_reverse_splat_elim2( -; CHECK-NEXT: [[A:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[A1:%.*]], splat (i1 true), i32 [[EVL:%.*]]) -; CHECK-NEXT: [[ADD1:%.*]] = add nsw [[A]], splat (i32 22) -; CHECK-NEXT: [[ADD_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[ADD1]], splat (i1 true), i32 [[EVL]]) -; CHECK-NEXT: ret [[ADD_REV]] +; CHECK-NEXT: [[ADD1:%.*]] = add nsw [[A:%.*]], splat (i32 22) +; CHECK-NEXT: ret [[ADD1]] ; %a.rev = tail call @llvm.experimental.vp.reverse( %a, splat (i1 true), i32 %evl) %add = add nsw splat (i32 22), %a.rev @@ -87,12 +81,40 @@ define @binop_reverse_splat_elim2( %a, i32 ret %add.rev } +define @binop_reverse_splat_elim3( %a, i32 %b, i32 %evl) { +; CHECK-LABEL: @binop_reverse_splat_elim3( +; CHECK-NEXT: [[B_INS:%.*]] = insertelement poison, i32 [[B:%.*]], i64 0 +; CHECK-NEXT: [[B_VEC:%.*]] = shufflevector [[B_INS]], poison, zeroinitializer +; CHECK-NEXT: [[ADD:%.*]] = add nsw [[B_VEC]], [[A_REV:%.*]] +; CHECK-NEXT: ret [[ADD]] +; + %b.ins = insertelement poison, i32 %b, i32 0 + %b.vec = shufflevector %b.ins, poison, zeroinitializer + %a.rev = tail call @llvm.experimental.vp.reverse( %a, splat (i1 true), i32 %evl) + %add = add nsw %b.vec, %a.rev + %add.rev = tail call @llvm.experimental.vp.reverse( %add, splat (i1 true), i32 %evl) + ret %add.rev +} + +define @binop_reverse_splat_elim4( %a, i32 %b, i32 %evl) { +; CHECK-LABEL: @binop_reverse_splat_elim4( +; CHECK-NEXT: [[B_INS:%.*]] = insertelement poison, i32 [[B:%.*]], i64 0 +; CHECK-NEXT: [[B_VEC:%.*]] = shufflevector [[B_INS]], poison, zeroinitializer +; CHECK-NEXT: [[ADD1:%.*]] = add nsw [[A:%.*]], [[B_VEC]] +; CHECK-NEXT: ret [[ADD1]] +; + %b.ins = insertelement poison, i32 %b, i32 0 + %b.vec = shufflevector %b.ins, poison, zeroinitializer + %a.rev = tail call @llvm.experimental.vp.reverse( %a, splat (i1 true), i32 %evl) + %add = add nsw %a.rev, %b.vec + %add.rev = tail call @llvm.experimental.vp.reverse( %add, splat (i1 true), i32 %evl) + ret %add.rev +} + define @unop_reverse_splat_elim( %a, %b, i32 %evl) { ; CHECK-LABEL: @unop_reverse_splat_elim( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4f32( [[A:%.*]], splat (i1 true), i32 [[EVL:%.*]]) -; CHECK-NEXT: [[OP:%.*]] = fneg [[A_REV]] -; CHECK-NEXT: [[OP_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4f32( [[OP]], splat (i1 true), i32 [[EVL]]) -; CHECK-NEXT: ret [[OP_REV]] +; CHECK-NEXT: [[OP:%.*]] = fneg [[A_REV:%.*]] +; CHECK-NEXT: ret [[OP]] ; %a.rev = tail call @llvm.experimental.vp.reverse.nxv4f32( %a, splat (i1 true), i32 %evl) %op = fneg %a.rev