diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index bab2c6efd4035..6e183b0abe45e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3058,11 +3058,9 @@ void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) { PHINode *NewPhi = cast(State.get(VPPhi)); // Make sure the builder has a valid insert point. Builder.SetInsertPoint(NewPhi); - for (unsigned Idx = 0; Idx < VPPhi->getNumOperands(); ++Idx) { - VPValue *Inc = VPPhi->getIncomingValue(Idx); - VPBasicBlock *VPBB = VPPhi->getIncomingBlock(Idx); + + for (const auto &[Inc, VPBB] : VPPhi->incoming_values_and_blocks()) NewPhi->addIncoming(State.get(Inc), State.CFG.VPBB2IRBB[VPBB]); - } } } } @@ -9074,14 +9072,14 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, VPValue *OneVPV = Plan.getOrAddLiveIn( ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1)); for (VPRecipeBase &ScalarPhiR : *Plan.getScalarHeader()) { - auto *ScalarPhiIRI = cast(&ScalarPhiR); - auto *ScalarPhiI = dyn_cast(&ScalarPhiIRI->getInstruction()); - if (!ScalarPhiI) + auto *ScalarPhiIRI = dyn_cast(&ScalarPhiR); + if (!ScalarPhiIRI) break; // TODO: Extract final value from induction recipe initially, optimize to // pre-computed end value together in optimizeInductionExitUsers. - auto *VectorPhiR = cast(Builder.getRecipe(ScalarPhiI)); + auto *VectorPhiR = + cast(Builder.getRecipe(&ScalarPhiIRI->getIRPhi())); if (auto *WideIVR = dyn_cast(VectorPhiR)) { if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction( WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, @@ -9131,11 +9129,8 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder, continue; for (VPRecipeBase &R : *ExitVPBB) { - auto *ExitIRI = dyn_cast(&R); + auto *ExitIRI = dyn_cast(&R); if (!ExitIRI) - continue; - auto *ExitPhi = dyn_cast(&ExitIRI->getInstruction()); - if (!ExitPhi) break; if (ExitVPBB->getSinglePredecessor() != Plan.getMiddleBlock()) { assert(ExitIRI->getNumOperands() == @@ -9143,8 +9138,10 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder, "early-exit must update exit values on construction"); continue; } + + PHINode &ExitPhi = ExitIRI->getIRPhi(); BasicBlock *ExitingBB = OrigLoop->getLoopLatch(); - Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB); + Value *IncomingValue = ExitPhi.getIncomingValueForBlock(ExitingBB); VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue); ExitIRI->addOperand(V); if (V->isLiveIn()) @@ -10325,11 +10322,10 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { cast(R.getVPSingleValue()->getUnderlyingValue())); } for (VPRecipeBase &R : make_early_inc_range(*MainPlan.getScalarHeader())) { - auto *VPIRInst = cast(&R); - auto *IRI = dyn_cast(&VPIRInst->getInstruction()); - if (!IRI) + auto *VPIRInst = dyn_cast(&R); + if (!VPIRInst) break; - if (EpiWidenedPhis.contains(IRI)) + if (EpiWidenedPhis.contains(&VPIRInst->getIRPhi())) continue; // There is no corresponding wide induction in the epilogue plan that would // need a resume value. Remove the VPIRInst wrapping the scalar header phi diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index e595347d62bf5..871203a6732ab 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1227,7 +1227,7 @@ VPIRBasicBlock *VPlan::createVPIRBasicBlock(BasicBlock *IRBB) { auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB); for (Instruction &I : make_range(IRBB->begin(), IRBB->getTerminator()->getIterator())) - VPIRBB->appendRecipe(new VPIRInstruction(I)); + VPIRBB->appendRecipe(VPIRInstruction::create(I)); return VPIRBB; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 43dc30c40bb53..3246e5e9f315d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1033,16 +1033,19 @@ class VPInstruction : public VPRecipeWithIRFlags, class VPIRInstruction : public VPRecipeBase { Instruction &I; -public: +protected: VPIRInstruction(Instruction &I) : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef()), I(I) {} +public: ~VPIRInstruction() override = default; + static VPIRInstruction *create(Instruction &I); + VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC) VPIRInstruction *clone() override { - auto *R = new VPIRInstruction(I); + auto *R = create(I); for (auto *Op : operands()) R->addOperand(Op); return R; @@ -1086,6 +1089,83 @@ class VPIRInstruction : public VPRecipeBase { void extractLastLaneOfOperand(VPBuilder &Builder); }; +/// Helper type to provide functions to access incoming values and blocks for +/// phi-like recipes. RecipeTy must be a sub-class of VPRecipeBase. +template class VPPhiAccessors { + /// Return a VPRecipeBase* to the current object. + const VPRecipeBase *getAsRecipe() const { + return static_cast(this); + } + +public: + /// Returns the \p I th incoming VPValue. + VPValue *getIncomingValue(unsigned I) const { + return getAsRecipe()->getOperand(I); + } + + /// Returns an interator range over the incoming values + VPUser::const_operand_range incoming_values() const { + return getAsRecipe()->operands(); + } + + /// Returns the \p I th incoming block. + const VPBasicBlock *getIncomingBlock(unsigned Idx) const; + + using const_incoming_block_iterator = + mapped_iterator>; + using const_incoming_blocks_range = + iterator_range; + + const_incoming_block_iterator incoming_block_begin() const { + return const_incoming_block_iterator( + detail::index_iterator(0), + [this](size_t Idx) { return getIncomingBlock(Idx); }); + } + const_incoming_block_iterator incoming_block_end() const { + return const_incoming_block_iterator( + detail::index_iterator(getAsRecipe()->getVPDefID() == + VPDef::VPWidenIntOrFpInductionSC + ? 2 + : getAsRecipe()->getNumOperands()), + [this](size_t Idx) { return getIncomingBlock(Idx); }); + } + + /// Returns an iterator range over the incoming blocks. + const_incoming_blocks_range incoming_blocks() const { + return make_range(incoming_block_begin(), incoming_block_end()); + } + + /// Returns an iterator range over pairs of incoming values and corrsponding + /// incoming blocks. + detail::zippy + incoming_values_and_blocks() const { + return zip(incoming_values(), incoming_blocks()); + } +}; + +/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use +/// cast/dyn_cast/isa and execute() implementation. +struct VPIRPhi : public VPIRInstruction, public VPPhiAccessors { + VPIRPhi(PHINode &PN) : VPIRInstruction(PN) {} + + static inline bool classof(const VPRecipeBase *U) { + auto *R = dyn_cast(U); + return R && isa(R->getInstruction()); + } + + PHINode &getIRPhi() { return cast(getInstruction()); } + + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// VPWidenRecipe is a recipe for producing a widened instruction using the /// opcode and operands of the recipe. This recipe covers most of the /// traditional vectorization cases where each recipe transforms into a @@ -1923,7 +2003,8 @@ class VPScalarPHIRecipe : public VPHeaderPHIRecipe { /// recipe is placed in an entry block to a (non-replicate) region, it must have /// exactly 2 incoming values, the first from the predecessor of the region and /// the second from the exiting block of the region. -class VPWidenPHIRecipe : public VPSingleDefRecipe { +class VPWidenPHIRecipe : public VPSingleDefRecipe, + public VPPhiAccessors { /// Name to use for the generated IR instruction for the widened phi. std::string Name; @@ -1954,12 +2035,6 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe { void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; #endif - - /// Returns the \p I th incoming VPBasicBlock. - VPBasicBlock *getIncomingBlock(unsigned I); - - /// Returns the \p I th incoming VPValue. - VPValue *getIncomingValue(unsigned I) { return getOperand(I); } }; /// A recipe for handling first-order recurrence phis. The start value is the diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 0d3c9aeec6be4..321dc12cb406a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -972,30 +972,15 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, } #endif -void VPIRInstruction::execute(VPTransformState &State) { - assert((isa(&I) || getNumOperands() == 0) && - "Only PHINodes can have extra operands"); - for (const auto &[Idx, Op] : enumerate(operands())) { - VPValue *ExitValue = Op; - auto Lane = vputils::isUniformAfterVectorization(ExitValue) - ? VPLane::getFirstLane() - : VPLane::getLastLaneForVF(State.VF); - VPBlockBase *Pred = getParent()->getPredecessors()[Idx]; - auto *PredVPBB = Pred->getExitingBasicBlock(); - BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; - // Set insertion point in PredBB in case an extract needs to be generated. - // TODO: Model extracts explicitly. - State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt()); - Value *V = State.get(ExitValue, VPLane(Lane)); - auto *Phi = cast(&I); - // If there is no existing block for PredBB in the phi, add a new incoming - // value. Otherwise update the existing incoming value for PredBB. - if (Phi->getBasicBlockIndex(PredBB) == -1) - Phi->addIncoming(V, PredBB); - else - Phi->setIncomingValueForBlock(PredBB, V); - } +VPIRInstruction *VPIRInstruction ::create(Instruction &I) { + if (auto *Phi = dyn_cast(&I)) + return new VPIRPhi(*Phi); + return new VPIRInstruction(I); +} +void VPIRInstruction::execute(VPTransformState &State) { + assert(!isa(this) && getNumOperands() == 0 && + "PHINodes must be handled by VPIRPhi"); // Advance the insert point after the wrapped IR instruction. This allows // interleaving VPIRInstructions and other recipes. State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator())); @@ -1028,15 +1013,74 @@ void VPIRInstruction::extractLastLaneOfOperand(VPBuilder &Builder) { void VPIRInstruction::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "IR " << I; +} +#endif + +static const VPBasicBlock *getIncomingBlockForRecipe(const VPRecipeBase *R, + unsigned I) { + const VPBasicBlock *Parent = R->getParent(); + const VPBlockBase *Pred = nullptr; + if (Parent->getNumPredecessors() > 0) { + Pred = Parent->getPredecessors()[I]; + } else { + auto *Region = Parent->getParent(); + assert(Region && !Region->isReplicator() && Region->getEntry() == Parent && + "must be in the entry block of a non-replicate region"); + assert( + I < 2 && + (R->getNumOperands() == 2 || isa(R)) && + "when placed in an entry block, only 2 incoming blocks are available"); + + // I == 0 selects the predecessor of the region, I == 1 selects the region + // itself whose exiting block feeds the phi across the backedge. + Pred = I == 0 ? Region->getSinglePredecessor() : Region; + } + + return Pred->getExitingBasicBlock(); +} + +template <> +const VPBasicBlock * +VPPhiAccessors::getIncomingBlock(unsigned Idx) const { + return getIncomingBlockForRecipe(getAsRecipe(), Idx); +} + +void VPIRPhi::execute(VPTransformState &State) { + PHINode *Phi = &getIRPhi(); + for (const auto &[ExitValue, IncVPBB] : incoming_values_and_blocks()) { + auto Lane = vputils::isUniformAfterVectorization(ExitValue) + ? VPLane::getFirstLane() + : VPLane::getLastLaneForVF(State.VF); + BasicBlock *PredBB = State.CFG.VPBB2IRBB[IncVPBB]; + // Set insertion point in PredBB in case an extract needs to be generated. + // TODO: Model extracts explicitly. + State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt()); + Value *V = State.get(ExitValue, VPLane(Lane)); + // If there is no existing block for PredBB in the phi, add a new incoming + // value. Otherwise update the existing incoming value for PredBB. + if (Phi->getBasicBlockIndex(PredBB) == -1) + Phi->addIncoming(V, PredBB); + else + Phi->setIncomingValueForBlock(PredBB, V); + } + + // Advance the insert point after the wrapped IR instruction. This allows + // interleaving VPIRInstructions and other recipes. + State.Builder.SetInsertPoint(Phi->getParent(), std::next(Phi->getIterator())); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPIRPhi::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + VPIRInstruction::print(O, Indent, SlotTracker); if (getNumOperands() != 0) { O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": "; - interleaveComma( - enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) { - Op.value()->printAsOperand(O, SlotTracker); - O << " from "; - getParent()->getPredecessors()[Op.index()]->printAsOperand(O); - }); + interleaveComma(incoming_values_and_blocks(), O, [&O, &SlotTracker](auto Op) { + std::get<0>(Op)->printAsOperand(O, SlotTracker); + O << " from "; + std::get<1>(Op)->printAsOperand(O); + }); O << ")"; } } @@ -3589,25 +3633,10 @@ void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, } #endif -VPBasicBlock *VPWidenPHIRecipe::getIncomingBlock(unsigned I) { - VPBasicBlock *Parent = getParent(); - VPBlockBase *Pred = nullptr; - if (Parent->getNumPredecessors() > 0) { - Pred = Parent->getPredecessors()[I]; - } else { - auto *Region = Parent->getParent(); - assert(Region && !Region->isReplicator() && Region->getEntry() == Parent && - "must be in the entry block of a non-replicate region"); - assert( - I < 2 && getNumOperands() == 2 && - "when placed in an entry block, only 2 incoming blocks are available"); - - // I == 0 selects the predecessor of the region, I == 1 selects the region - // itself whose exiting block feeds the phi across the backedge. - Pred = I == 0 ? Region->getSinglePredecessor() : Region; - } - - return Pred->getExitingBasicBlock(); +template <> +const VPBasicBlock * +VPPhiAccessors::getIncomingBlock(unsigned Idx) const { + return getIncomingBlockForRecipe(getAsRecipe(), Idx); } void VPWidenPHIRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index ecf52673480e2..7126d649ec86b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -792,11 +792,11 @@ void VPlanTransforms::optimizeInductionExitUsers( VPlan &Plan, DenseMap &EndValues) { VPBlockBase *MiddleVPBB = Plan.getMiddleBlock(); VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); - for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) { - for (VPRecipeBase &R : *ExitVPBB) { - auto *ExitIRI = cast(&R); - if (!isa(ExitIRI->getInstruction())) - break; + VPBuilder B(Plan.getMiddleBlock()->getTerminator()); + for (VPRecipeBase &R : *ExitVPBB) { + auto *ExitIRI = dyn_cast(&R); + if (!ExitIRI) + break; for (auto [Idx, PredVPBB] : enumerate(ExitVPBB->getPredecessors())) { if (PredVPBB == MiddleVPBB) @@ -2139,20 +2139,20 @@ void VPlanTransforms::handleUncountableEarlyExit( VPBuilder MiddleBuilder(NewMiddle); VPBuilder EarlyExitB(VectorEarlyExitVPBB); for (VPRecipeBase &R : *VPEarlyExitBlock) { - auto *ExitIRI = cast(&R); - auto *ExitPhi = dyn_cast(&ExitIRI->getInstruction()); - if (!ExitPhi) + auto *ExitIRI = dyn_cast(&R); + if (!ExitIRI) break; + PHINode &ExitPhi = ExitIRI->getIRPhi(); VPValue *IncomingFromEarlyExit = RecipeBuilder.getVPValueOrAddLiveIn( - ExitPhi->getIncomingValueForBlock(UncountableExitingBlock)); + ExitPhi.getIncomingValueForBlock(UncountableExitingBlock)); if (OrigLoop->getUniqueExitBlock()) { // If there's a unique exit block, VPEarlyExitBlock has 2 predecessors // (MiddleVPBB and NewMiddle). Add the incoming value from MiddleVPBB // which is coming from the original latch. VPValue *IncomingFromLatch = RecipeBuilder.getVPValueOrAddLiveIn( - ExitPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch())); + ExitPhi.getIncomingValueForBlock(OrigLoop->getLoopLatch())); ExitIRI->addOperand(IncomingFromLatch); ExitIRI->extractLastLaneOfOperand(MiddleBuilder); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 1b3b69ea6a13d..7ef0b7c816d99 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -205,10 +205,8 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { for (const VPUser *U : V->users()) { auto *UI = cast(U); // TODO: check dominance of incoming values for phis properly. - if (!UI || - isa(UI) || - (isa(UI) && - isa(cast(UI)->getInstruction()))) + if (!UI || isa(UI)) continue; // If the user is in the same block, check it comes after R in the