diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 2393ac7182dfd..529c4a1936aaa 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9383,7 +9383,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range, VPlanTransforms::prepareForVectorization( *Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck, CM.foldTailByMasking(), OrigLoop, - getDebugLocFromInstOrOperands(Legal->getPrimaryInduction())); + getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), + Legal->hasUncountableEarlyExit(), Range); VPlanTransforms::createLoopRegions(*Plan); // Don't use getDecisionAndClampRange here, because we don't know the UF @@ -9584,12 +9585,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range, R->setOperand(1, WideIV->getStepValue()); } - if (auto *UncountableExitingBlock = - Legal->getUncountableEarlyExitingBlock()) { - VPlanTransforms::runPass(VPlanTransforms::handleUncountableEarlyExit, *Plan, - OrigLoop, UncountableExitingBlock, RecipeBuilder, - Range); - } DenseMap IVEndValues; addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues); SetVector ExitUsersToFix = @@ -9687,7 +9682,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB); VPlanTransforms::prepareForVectorization( *Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop, - getDebugLocFromInstOrOperands(Legal->getPrimaryInduction())); + getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), false, + Range); VPlanTransforms::createLoopRegions(*Plan); for (ElementCount VF : Range) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 35e5415a6d4e3..287bc93ce496a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -460,11 +460,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL); } -void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy, - PredicatedScalarEvolution &PSE, - bool RequiresScalarEpilogueCheck, - bool TailFolded, Loop *TheLoop, - DebugLoc IVDL) { +void VPlanTransforms::prepareForVectorization( + VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE, + bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop, + DebugLoc IVDL, bool HasUncountableEarlyExit, VFRange &Range) { VPDominatorTree VPDT; VPDT.recalculate(Plan); @@ -491,19 +490,33 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy, addCanonicalIVRecipes(Plan, cast(HeaderVPB), cast(LatchVPB), InductionTy, IVDL); - // Disconnect all edges to exit blocks other than from the middle block. - // TODO: VPlans with early exits should be explicitly converted to a form - // exiting only via the latch here, including adjusting the exit condition, - // instead of simply disconnecting the edges and adjusting the VPlan later. - for (VPBlockBase *EB : Plan.getExitBlocks()) { + [[maybe_unused]] bool HandledUncountableEarlyExit = false; + // Disconnect all early exits from the loop leaving it with a single exit from + // the latch. Early exits that are countable are left for a scalar epilog. The + // condition of uncountable early exits (currently at most one is supported) + // is fused into the latch exit, and used to branch from middle block to the + // early exit destination. + for (VPIRBasicBlock *EB : Plan.getExitBlocks()) { for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) { if (Pred == MiddleVPBB) continue; + if (HasUncountableEarlyExit) { + assert(!HandledUncountableEarlyExit && + "can handle exactly one uncountable early exit"); + handleUncountableEarlyExit(cast(Pred), EB, Plan, + cast(HeaderVPB), + cast(LatchVPB), Range); + HandledUncountableEarlyExit = true; + } + cast(Pred)->getTerminator()->eraseFromParent(); VPBlockUtils::disconnectBlocks(Pred, EB); } } + assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) && + "missed an uncountable exit that must be handled"); + // Create SCEV and VPValue for the trip count. // We use the symbolic max backedge-taken-count, which works also when // vectorizing loops with uncountable early exits. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b10b47cc1282a..806c20ef8cf73 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2461,63 +2461,56 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan, } void VPlanTransforms::handleUncountableEarlyExit( - VPlan &Plan, Loop *OrigLoop, BasicBlock *UncountableExitingBlock, - VPRecipeBuilder &RecipeBuilder, VFRange &Range) { - VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); - auto *LatchVPBB = cast(LoopRegion->getExiting()); - VPBuilder Builder(LatchVPBB->getTerminator()); - auto *MiddleVPBB = Plan.getMiddleBlock(); - VPValue *IsEarlyExitTaken = nullptr; - - // Process the uncountable exiting block. Update IsEarlyExitTaken, which - // tracks if the uncountable early exit has been taken. Also split the middle - // block and have it conditionally branch to the early exit block if - // EarlyExitTaken. - auto *EarlyExitingBranch = - cast(UncountableExitingBlock->getTerminator()); - BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0); - BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1); - BasicBlock *EarlyExitIRBB = - !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc; - VPIRBasicBlock *VPEarlyExitBlock = Plan.getExitBlock(EarlyExitIRBB); - - VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask( - OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); - auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond); - IsEarlyExitTaken = - Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond}); + VPBasicBlock *EarlyExitingVPBB, VPBasicBlock *EarlyExitVPBB, VPlan &Plan, + VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, VFRange &Range) { + using namespace llvm::VPlanPatternMatch; + VPBlockBase *MiddleVPBB = LatchVPBB->getSuccessors()[0]; + if (!EarlyExitVPBB->getSinglePredecessor() && + EarlyExitVPBB->getPredecessors()[1] == MiddleVPBB) { + assert(EarlyExitVPBB->getNumPredecessors() == 2 && + EarlyExitVPBB->getPredecessors()[0] == EarlyExitingVPBB && + "unsupported early exit VPBB"); + // Early exit operand should always be last phi operand. If EarlyExitVPBB + // has two predecessors and EarlyExitingVPBB is the first, swap the operands + // of the phis. + for (VPRecipeBase &R : EarlyExitVPBB->phis()) + cast(&R)->swapOperands(); + } + + VPBuilder Builder(LatchVPBB->getTerminator()); + VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0]; + assert( + match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond(m_VPValue())) && + "Terminator must be be BranchOnCond"); + VPValue *CondOfEarlyExitingVPBB = + EarlyExitingVPBB->getTerminator()->getOperand(0); + auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB + ? CondOfEarlyExitingVPBB + : Builder.createNot(CondOfEarlyExitingVPBB); + + // Split the middle block and have it conditionally branch to the early exit + // block if CondToEarlyExit. + VPValue *IsEarlyExitTaken = + Builder.createNaryOp(VPInstruction::AnyOf, {CondToEarlyExit}); VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split"); VPBasicBlock *VectorEarlyExitVPBB = Plan.createVPBasicBlock("vector.early.exit"); - VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle); + VPBlockUtils::insertOnEdge(LatchVPBB, MiddleVPBB, NewMiddle); VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB); NewMiddle->swapSuccessors(); - VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, VPEarlyExitBlock); + VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB); // Update the exit phis in the early exit block. VPBuilder MiddleBuilder(NewMiddle); VPBuilder EarlyExitB(VectorEarlyExitVPBB); - for (VPRecipeBase &R : VPEarlyExitBlock->phis()) { + for (VPRecipeBase &R : EarlyExitVPBB->phis()) { auto *ExitIRI = cast(&R); - // Early exit operand should always be last, i.e., 0 if VPEarlyExitBlock has + // Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has // a single predecessor and 1 if it has two. unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1; - if (!VPEarlyExitBlock->getSinglePredecessor()) { - // If VPEarlyExitBlock has two predecessors, they are already ordered such - // that early exit is second (and latch exit is first), by construction. - // But its underlying IRBB (EarlyExitIRBB) may have its predecessors - // ordered the other way around, and it is the order of the latter which - // corresponds to the order of operands of VPEarlyExitBlock's phi recipes. - // Therefore, if early exit (UncountableExitingBlock) is the first - // predecessor of EarlyExitIRBB, we swap the operands of phi recipes, - // thereby bringing them to match VPEarlyExitBlock's predecessor order, - // with early exit being last (second). Otherwise they already match. - if (*pred_begin(VPEarlyExitBlock->getIRBasicBlock()) == - UncountableExitingBlock) - ExitIRI->swapOperands(); - + if (ExitIRI->getNumOperands() != 1) { // The first of two operands corresponds to the latch exit, via MiddleVPBB // predecessor. Extract its last lane. ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder); @@ -2533,7 +2526,7 @@ void VPlanTransforms::handleUncountableEarlyExit( LoopVectorizationPlanner::getDecisionAndClampRange(IsVector, Range)) { // Update the incoming value from the early exit. VPValue *FirstActiveLane = EarlyExitB.createNaryOp( - VPInstruction::FirstActiveLane, {EarlyExitTakenCond}, nullptr, + VPInstruction::FirstActiveLane, {CondToEarlyExit}, nullptr, "first.active.lane"); IncomingFromEarlyExit = EarlyExitB.createNaryOp( Instruction::ExtractElement, {IncomingFromEarlyExit, FirstActiveLane}, diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index cb127d37661c7..d284d916633c8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -69,7 +69,8 @@ struct VPlanTransforms { PredicatedScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop, - DebugLoc IVDL); + DebugLoc IVDL, bool HasUncountableExit, + VFRange &Range); /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's /// flat CFG into a hierarchical CFG. @@ -173,15 +174,16 @@ struct VPlanTransforms { /// Remove dead recipes from \p Plan. static void removeDeadRecipes(VPlan &Plan); - /// Update \p Plan to account for the uncountable early exit block in \p - /// UncountableExitingBlock by - /// * updating the condition exiting the vector loop to include the early - /// exit conditions + /// Update \p Plan to account for the uncountable early exit from \p + /// EarlyExitingVPBB to \p EarlyExitVPBB by + /// * updating the condition exiting the loop via the latch to include the + /// early exit condition, /// * splitting the original middle block to branch to the early exit block - /// if taken. - static void handleUncountableEarlyExit(VPlan &Plan, Loop *OrigLoop, - BasicBlock *UncountableExitingBlock, - VPRecipeBuilder &RecipeBuilder, + /// conditionally - according to the early exit condition. + static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, + VPBasicBlock *EarlyExitVPBB, + VPlan &Plan, VPBasicBlock *HeaderVPBB, + VPBasicBlock *LatchVPBB, VFRange &Range); /// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h index d9bd413bcc186..2a15e907e5fa5 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h +++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h @@ -13,6 +13,7 @@ #define LLVM_UNITTESTS_TRANSFORMS_VECTORIZE_VPLANTESTBASE_H #include "../lib/Transforms/Vectorize/VPlan.h" +#include "../lib/Transforms/Vectorize/VPlanHelpers.h" #include "../lib/Transforms/Vectorize/VPlanTransforms.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" @@ -72,8 +73,9 @@ class VPlanTestIRBase : public testing::Test { PredicatedScalarEvolution PSE(*SE, *L); DenseMap VPB2IRBB; auto Plan = VPlanTransforms::buildPlainCFG(L, *LI, VPB2IRBB); + VFRange R(ElementCount::getFixed(1), ElementCount::getFixed(2)); VPlanTransforms::prepareForVectorization(*Plan, IntegerType::get(*Ctx, 64), - PSE, true, false, L, {}); + PSE, true, false, L, {}, false, R); VPlanTransforms::createLoopRegions(*Plan); return Plan; }