diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 4f34500c1c0e8..54f6d1e42f504 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9373,11 +9373,8 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, } } -// Collect VPIRInstructions for phis in the exit blocks that are modeled -// in VPlan and add the exiting VPValue as operand. -static SetVector -collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder, - VPlan &Plan) { +// Collect VPIRInstructions for phis in the exit block from the latch only. +static SetVector collectUsersInLatchExitBlock(VPlan &Plan) { SetVector ExitUsersToFix; for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) { // Nothing to do for unreachable exit blocks. @@ -9393,11 +9390,8 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder, continue; } - PHINode &ExitPhi = ExitIRI->getIRPhi(); - BasicBlock *ExitingBB = OrigLoop->getLoopLatch(); - Value *IncomingValue = ExitPhi.getIncomingValueForBlock(ExitingBB); - VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue); - ExitIRI->addOperand(V); + assert(ExitIRI->getNumOperands() == 1 && "must have a single operand"); + VPValue *V = ExitIRI->getOperand(0); if (V->isLiveIn()) continue; assert(V->getDefiningRecipe()->getParent()->getEnclosingLoopRegion() && @@ -9426,7 +9420,7 @@ addUsersInExitBlocks(VPlan &Plan, ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB && "exit values from early exits must be fixed when branch to " "early-exit is added"); - ExitIRI->extractLastLaneOfOperand(B); + ExitIRI->extractLastLaneOfFirstOperand(B); } } @@ -9767,7 +9761,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { DenseMap IVEndValues; addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues); SetVector ExitUsersToFix = - collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan); + collectUsersInLatchExitBlock(*Plan); addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix); addUsersInExitBlocks(*Plan, ExitUsersToFix); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 0f2aac146e7a6..b662aa0331d67 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1161,10 +1161,10 @@ class VPIRInstruction : public VPRecipeBase { return true; } - /// Update the recipes single operand to the last lane of the operand using \p - /// Builder. Must only be used for single operand VPIRInstructions wrapping a - /// PHINode. - void extractLastLaneOfOperand(VPBuilder &Builder); + /// Update the recipes first operand to the last lane of the operand using \p + /// Builder. Must only be used for VPIRInstructions with at least one operand + /// wrapping a PHINode. + void extractLastLaneOfFirstOperand(VPBuilder &Builder); }; /// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 9fcccfcf8117f..b187fae4abcc6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -352,6 +352,23 @@ std::unique_ptr PlainCFGBuilder::buildPlainCFG( Plan->getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader())); Plan->getEntry()->setPlan(&*Plan); + // Fix VPlan loop-closed-ssa exit phi's by adding incoming operands to the + // VPIRInstructions wrapping them. + // // Note that the operand order corresponds to IR predecessor order, and may + // need adjusting when VPlan predecessors are added, if an exit block has + // multiple predecessor. + for (auto *EB : Plan->getExitBlocks()) { + for (VPRecipeBase &R : EB->phis()) { + auto *PhiR = cast(&R); + PHINode &Phi = PhiR->getIRPhi(); + assert(PhiR->getNumOperands() == 0 && + "no phi operands should be added yet"); + for (BasicBlock *Pred : predecessors(EB->getIRBasicBlock())) + PhiR->addOperand( + getOrCreateVPOperand(Phi.getIncomingValueForBlock(Pred))); + } + } + for (const auto &[IRBB, VPB] : BB2VPBB) VPB2IRBB[VPB] = IRBB; @@ -462,19 +479,28 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy, VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph"); VPBlockUtils::connectBlocks(ScalarPH, Plan.getScalarHeader()); - if (!RequiresScalarEpilogueCheck) { - VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); - return; - } // If needed, add a check in the middle block to see if we have completed // all of the iterations in the first vector loop. Three cases: - // 1) If (N - N%VF) == N, then we *don't* need to run the remainder. + // 1) If we require a scalar epilogue, there is no conditional branch as + // we unconditionally branch to the scalar preheader. Remove the recipes + // from the exit blocks. + // 2) If (N - N%VF) == N, then we *don't* need to run the remainder. // Thus if tail is to be folded, we know we don't need to run the // remainder and we can set the condition to true. - // 2) If we require a scalar epilogue, there is no conditional branch as - // we unconditionally branch to the scalar preheader. Do nothing. // 3) Otherwise, construct a runtime check. + + if (!RequiresScalarEpilogueCheck) { + VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); + // The exit blocks are unreachable, remove their recipes to make sure no + // users remain that may pessimize transforms. + for (auto *EB : Plan.getExitBlocks()) { + for (VPRecipeBase &R : make_early_inc_range(*EB)) + R.eraseFromParent(); + } + return; + } + BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock(); auto *VPExitBlock = Plan.getExitBlock(IRExitBlock); // The connection order corresponds to the operands of the conditional branch. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index a342d5182c974..621012f235a8d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1137,10 +1137,10 @@ InstructionCost VPIRInstruction::computeCost(ElementCount VF, return 0; } -void VPIRInstruction::extractLastLaneOfOperand(VPBuilder &Builder) { +void VPIRInstruction::extractLastLaneOfFirstOperand(VPBuilder &Builder) { assert(isa(getInstruction()) && - "can only add exiting operands to phi nodes"); - assert(getNumOperands() == 1 && "must have a single operand"); + "can only update exiting operands to phi nodes"); + assert(getNumOperands() > 0 && "must have at least one operand"); VPValue *Exiting = getOperand(0); if (!Exiting->isLiveIn()) { LLVMContext &Ctx = getInstruction().getContext(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index f2dc68b2ea8b6..ee33a58404818 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2505,20 +2505,29 @@ void VPlanTransforms::handleUncountableEarlyExit( VPBuilder EarlyExitB(VectorEarlyExitVPBB); for (VPRecipeBase &R : VPEarlyExitBlock->phis()) { auto *ExitIRI = cast(&R); - PHINode &ExitPhi = ExitIRI->getIRPhi(); - VPValue *IncomingFromEarlyExit = RecipeBuilder.getVPValueOrAddLiveIn( - ExitPhi.getIncomingValueForBlock(UncountableExitingBlock)); - + // Early exit operand should always be last, i.e., 0 if VPEarlyExitBlock has + // a single predecessor and 1 if it has two. + unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1; if (OrigLoop->getUniqueExitBlock()) { - // If there's a unique exit block, VPEarlyExitBlock has 2 predecessors - // (MiddleVPBB and NewMiddle). Add the incoming value from MiddleVPBB - // which is coming from the original latch. - VPValue *IncomingFromLatch = RecipeBuilder.getVPValueOrAddLiveIn( - ExitPhi.getIncomingValueForBlock(OrigLoop->getLoopLatch())); - ExitIRI->addOperand(IncomingFromLatch); - ExitIRI->extractLastLaneOfOperand(MiddleBuilder); + // If VPEarlyExitBlock has two predecessors, they are already ordered such + // that early exit is second (and latch exit is first), by construction. + // But its underlying IRBB (EarlyExitIRBB) may have its predecessors + // ordered the other way around, and it is the order of the latter which + // corresponds to the order of operands of VPEarlyExitBlock's phi recipes. + // Therefore, if early exit (UncountableExitingBlock) is the first + // predecessor of EarlyExitIRBB, we swap the operands of phi recipes, + // thereby bringing them to match VPEarlyExitBlock's predecessor order, + // with early exit being last (second). Otherwise they already match. + if (*pred_begin(VPEarlyExitBlock->getIRBasicBlock()) == + UncountableExitingBlock) + ExitIRI->swapOperands(); + + // The first of two operands corresponds to the latch exit, via MiddleVPBB + // predecessor. Extract its last lane. + ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder); } + VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx); auto IsVector = [](ElementCount VF) { return VF.isVector(); }; // When the VFs are vectors, need to add `extract` to get the incoming value // from early exit. When the range contains scalar VF, limit the range to @@ -2526,14 +2535,15 @@ void VPlanTransforms::handleUncountableEarlyExit( // and vector VFs. if (!IncomingFromEarlyExit->isLiveIn() && LoopVectorizationPlanner::getDecisionAndClampRange(IsVector, Range)) { + // Update the incoming value from the early exit. VPValue *FirstActiveLane = EarlyExitB.createNaryOp( VPInstruction::FirstActiveLane, {EarlyExitTakenCond}, nullptr, "first.active.lane"); IncomingFromEarlyExit = EarlyExitB.createNaryOp( Instruction::ExtractElement, {IncomingFromEarlyExit, FirstActiveLane}, nullptr, "early.exit.value"); + ExitIRI->setOperand(EarlyExitIdx, IncomingFromEarlyExit); } - ExitIRI->addOperand(IncomingFromEarlyExit); } MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});