From 063d70f7c1425147f6fd78077609a4a043f7e7f1 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 5 May 2025 20:41:34 +0100 Subject: [PATCH 1/5] [VPlan] Remove ResumePhi opcode, use regular PHI instead (NFC). Use regular VPPhi instead of a separate opcode for resume phis. This removes an unneeded specialized opcode and unifies the code (verification, printing, updating when CFG is changed). Depends on https://github.com/llvm/llvm-project/pull/140132. --- .../Transforms/Vectorize/LoopVectorize.cpp | 34 +++++++----------- llvm/lib/Transforms/Vectorize/VPlan.h | 11 +++--- .../Transforms/Vectorize/VPlanAnalysis.cpp | 1 - .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 36 ++++++++----------- .../Transforms/Vectorize/VPlanTransforms.cpp | 9 +++-- .../Transforms/Vectorize/VPlanVerifier.cpp | 4 +-- .../vplan-printing-before-execute.ll | 8 ++--- .../LoopVectorize/vplan-printing.ll | 26 +++++++------- 8 files changed, 54 insertions(+), 75 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 4f305bb383465..f6b1913bcba60 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2381,8 +2381,8 @@ void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) { // We just connected a new block to the scalar preheader. Update all // ResumePhis by adding an incoming value for it, replicating the last value. for (VPRecipeBase &R : *cast(ScalarPH)) { - auto *ResumePhi = dyn_cast(&R); - if (!ResumePhi || ResumePhi->getOpcode() != VPInstruction::ResumePhi) + auto *ResumePhi = cast(&R); + if (ResumePhi->getNumIncoming() == ScalarPH->getNumPredecessors()) continue; ResumePhi->addOperand( ResumePhi->getOperand(ResumePhi->getNumOperands() - 1)); @@ -2533,7 +2533,8 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) { static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) { VPIRBasicBlock *IRVPBB = VPBB->getPlan()->createVPIRBasicBlock(IRBB); for (auto &R : make_early_inc_range(*VPBB)) { - assert(!R.isPhi() && "Tried to move phi recipe to end of block"); + assert((IRVPBB->empty() || IRVPBB->back().isPhi() || !R.isPhi()) && + "Tried to move phi recipe to end of block"); R.moveBefore(*IRVPBB, IRVPBB->end()); } @@ -7591,10 +7592,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( // created a bc.merge.rdx Phi after the main vector body. Ensure that we carry // over the incoming values correctly. using namespace VPlanPatternMatch; - auto IsResumePhi = [](VPUser *U) { - auto *VPI = dyn_cast(U); - return VPI && VPI->getOpcode() == VPInstruction::ResumePhi; - }; + auto IsResumePhi = [](VPUser *U) { return isa(U); }; assert(count_if(EpiRedResult->users(), IsResumePhi) == 1 && "ResumePhi must have a single user"); auto *EpiResumePhiVPI = @@ -8776,9 +8774,8 @@ static VPInstruction *addResumePhiRecipeForInduction( WideIV->getDebugLoc()); } - auto *ResumePhiRecipe = - ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start}, - WideIV->getDebugLoc(), "bc.resume.val"); + auto *ResumePhiRecipe = ScalarPHBuilder.createScalarPhi( + {EndValue, Start}, WideIV->getDebugLoc(), "bc.resume.val"); return ResumePhiRecipe; } @@ -8807,8 +8804,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction( WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, &Plan.getVectorTripCount())) { - assert(ResumePhi->getOpcode() == VPInstruction::ResumePhi && - "Expected a ResumePhi"); + assert(isa(ResumePhi) && "Expected a phi"); IVEndValues[WideIVR] = ResumePhi->getOperand(0); ScalarPhiIRI->addOperand(ResumePhi); continue; @@ -8833,8 +8829,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, VPInstruction::ExtractLastElement, {ResumeFromVectorLoop}, {}, "vector.recur.extract"); StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx"; - auto *ResumePhiR = ScalarPHBuilder.createNaryOp( - VPInstruction::ResumePhi, + auto *ResumePhiR = ScalarPHBuilder.createScalarPhi( {ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name); ScalarPhiIRI->addOperand(ResumePhiR); } @@ -10010,9 +10005,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { VPI->setOperand(1, Freeze); if (UpdateResumePhis) OrigStart->replaceUsesWithIf(Freeze, [Freeze](VPUser &U, unsigned) { - return Freeze != &U && isa(&U) && - cast(&U)->getOpcode() == - VPInstruction::ResumePhi; + return Freeze != &U && isa(&U); }); } }; @@ -10025,13 +10018,12 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { // scalar (which will become vector) epilogue loop we are done. Otherwise // create it below. if (any_of(*MainScalarPH, [VectorTC](VPRecipeBase &R) { - return match(&R, m_VPInstruction( - m_Specific(VectorTC), m_SpecificInt(0))); + return match(&R, m_VPInstruction(m_Specific(VectorTC), + m_SpecificInt(0))); })) return; VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin()); - ScalarPHBuilder.createNaryOp( - VPInstruction::ResumePhi, + ScalarPHBuilder.createScalarPhi( {VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {}, "vec.epilog.resume.val"); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index fd58b8a8ec758..9646c3183f285 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -887,11 +887,6 @@ class VPInstruction : public VPRecipeWithIRFlags, SLPStore, ActiveLaneMask, ExplicitVectorLength, - /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan. - /// The first operand is the incoming value from the predecessor in VPlan, - /// the second operand is the incoming value for all other predecessors - /// (which are currently not modeled in VPlan). - ResumePhi, CalculateTripCountMinusVF, // Increment the canonical IV separately for each unrolled part. CanonicalIVIncrementForPart, @@ -1137,11 +1132,15 @@ struct VPPhi : public VPInstruction, public VPPhiAccessors { VPPhi(ArrayRef Operands, DebugLoc DL, const Twine &Name = "") : VPInstruction(Instruction::PHI, Operands, DL, Name) {} - static inline bool classof(const VPRecipeBase *U) { + static inline bool classof(const VPUser *U) { auto *R = dyn_cast(U); return R && R->getOpcode() == Instruction::PHI; } + VPPhi *clone() override { + return new VPPhi(operands(), getDebugLoc(), getName()); + } + void execute(VPTransformState &State) override; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index ac0f30cb4693c..926490bfad7d0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -101,7 +101,6 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { return inferScalarType(R->getOperand(0)); case VPInstruction::FirstOrderRecurrenceSplice: case VPInstruction::Not: - case VPInstruction::ResumePhi: case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::AnyOf: diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 18229780bc4a5..a5dbae00e402d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -732,17 +732,6 @@ Value *VPInstruction::generate(VPTransformState &State) { Value *Addend = State.get(getOperand(1), VPLane(0)); return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags()); } - case VPInstruction::ResumePhi: { - auto *NewPhi = - Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name); - for (const auto &[IncVPV, PredVPBB] : - zip(operands(), getParent()->getPredecessors())) { - Value *IncV = State.get(IncVPV, /* IsScalar */ true); - BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(cast(PredVPBB)); - NewPhi->addIncoming(IncV, PredBB); - } - return NewPhi; - } case VPInstruction::AnyOf: { Value *A = State.get(getOperand(0)); return Builder.CreateOrReduce(A); @@ -841,8 +830,7 @@ bool VPInstruction::isVectorToScalar() const { } bool VPInstruction::isSingleScalar() const { - return getOpcode() == VPInstruction::ResumePhi || - getOpcode() == Instruction::PHI; + return getOpcode() == Instruction::PHI; } void VPInstruction::execute(VPTransformState &State) { @@ -928,7 +916,6 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::BranchOnCount: case VPInstruction::BranchOnCond: - case VPInstruction::ResumePhi: return true; case VPInstruction::PtrAdd: return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this); @@ -985,9 +972,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::ActiveLaneMask: O << "active lane mask"; break; - case VPInstruction::ResumePhi: - O << "resume-phi"; - break; case VPInstruction::ExplicitVectorLength: O << "EXPLICIT-VECTOR-LENGTH"; break; @@ -1095,11 +1079,19 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent, void VPPhi::execute(VPTransformState &State) { State.setDebugLocFrom(getDebugLoc()); - BasicBlock *VectorPH = State.CFG.VPBB2IRBB.at(getIncomingBlock(0)); - Value *Start = State.get(getIncomingValue(0), VPLane(0)); - PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, getName()); - Phi->addIncoming(Start, VectorPH); - State.set(this, Phi, VPLane(0)); + PHINode *NewPhi = State.Builder.CreatePHI( + State.TypeAnalysis.inferScalarType(this), 2, getName()); + // TODO: Fixup incoming values once other recipes are enabled. + unsigned NumIncoming = + getParent() == getParent()->getPlan()->getScalarPreheader() + ? getNumIncoming() + : 1; + for (unsigned Idx = 0; Idx != NumIncoming; ++Idx) { + Value *IncV = State.get(getIncomingValue(Idx), VPLane(0)); + BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(getIncomingBlock(Idx)); + NewPhi->addIncoming(IncV, PredBB); + } + State.set(this, NewPhi, VPLane(0)); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index dc5be520505eb..9d7d451bb5741 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1863,8 +1863,8 @@ static void removeBranchOnCondTrue(VPlan &Plan) { !isa(cast(&R)->getInstruction())) && !isa(&R) && "Cannot update VPIRInstructions wrapping phis or header phis yet"); - auto *VPI = dyn_cast(&R); - if (!VPI || VPI->getOpcode() != VPInstruction::ResumePhi) + auto *VPI = dyn_cast(&R); + if (!VPI) break; VPBuilder B(VPI); SmallVector NewOperands; @@ -1874,9 +1874,8 @@ static void removeBranchOnCondTrue(VPlan &Plan) { continue; NewOperands.push_back(Op); } - VPI->replaceAllUsesWith(B.createNaryOp(VPInstruction::ResumePhi, - NewOperands, VPI->getDebugLoc(), - VPI->getName())); + VPI->replaceAllUsesWith( + B.createScalarPhi(NewOperands, VPI->getDebugLoc(), VPI->getName())); VPI->eraseFromParent(); } // Disconnect blocks and remove the terminator. RemovedSucc will be deleted diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 54cf8ac2ed04a..45010d0021581 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -243,9 +243,7 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { continue; } // TODO: Also verify VPPredInstPHIRecipe. - if (isa(UI) || - (isa(UI) && (cast(UI)->getOpcode() == - VPInstruction::ResumePhi))) + if (isa(UI)) continue; // If the user is in the same block, check it comes after R in the diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll index 3816def9e13c2..5dcb7d214d359 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll @@ -49,8 +49,8 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: -; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi vp<[[END1]]>, ir<%and> -; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi vp<[[END2]]>, ir<%A> +; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = phi [ vp<[[END1]]>, middle.block ], [ ir<%and>, ir-bb ] +; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = phi [ vp<[[END2]]>, middle.block ], [ ir<%A>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -99,8 +99,8 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi vp<[[END1]]>, ir<%and> -; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi vp<[[END2]]>, ir<%A> +; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = phi [ vp<[[END1]]>, middle.block ], [ ir<%and>, ir-bb ] +; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = phi [ vp<[[END2]]>, middle.block ], [ ir<%A>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 3504558d1b545..9219c8bfba681 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -46,7 +46,7 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -117,7 +117,7 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x, ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -206,7 +206,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -289,7 +289,7 @@ define void @print_interleave_groups(i32 %C, i32 %D) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[IV_END]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[IV_END]]>, middle.block ], [ ir<0>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -394,7 +394,7 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -479,7 +479,7 @@ define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[IV_END]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[IV_END]]>, middle.block ], [ ir<0>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -548,7 +548,7 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -616,7 +616,7 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr % ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -686,7 +686,7 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -776,7 +776,7 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -855,7 +855,7 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -972,8 +972,8 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22> -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_P:%.*]]> = phi [ vp<[[RESUME_1]]>, middle.block ], [ ir<22>, ir-bb ] +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: From 74eddb586e3bec545515544b61a952e3e38963e5 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 28 May 2025 16:27:26 +0100 Subject: [PATCH 2/5] !fixup address latest comments, thanks --- .../Transforms/Vectorize/LoopVectorize.cpp | 6 ++--- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 23 ++++++++++------ .../vplan-printing-before-execute.ll | 8 +++--- .../LoopVectorize/vplan-printing.ll | 26 +++++++++---------- 4 files changed, 34 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index c9263c4cd6ed8..eadaa687fb009 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2380,10 +2380,8 @@ void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) { // We just connected a new block to the scalar preheader. Update all // ResumePhis by adding an incoming value for it, replicating the last value. - for (VPRecipeBase &R : *cast(ScalarPH)) { + for (VPRecipeBase &R : cast(ScalarPH)->phis()) { auto *ResumePhi = cast(&R); - if (ResumePhi->getNumIncoming() == ScalarPH->getNumPredecessors()) - continue; ResumePhi->addOperand( ResumePhi->getOperand(ResumePhi->getNumOperands() - 1)); } @@ -2534,7 +2532,7 @@ static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) { VPIRBasicBlock *IRVPBB = VPBB->getPlan()->createVPIRBasicBlock(IRBB); for (auto &R : make_early_inc_range(*VPBB)) { assert((IRVPBB->empty() || IRVPBB->back().isPhi() || !R.isPhi()) && - "Tried to move phi recipe to end of block"); + "Tried to move phi recipe after a non-phi recipe"); R.moveBefore(*IRVPBB, IRVPBB->end()); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 3464e833710b7..8a7dde5eddebb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1081,11 +1081,12 @@ void VPPhi::execute(VPTransformState &State) { State.setDebugLocFrom(getDebugLoc()); PHINode *NewPhi = State.Builder.CreatePHI( State.TypeAnalysis.inferScalarType(this), 2, getName()); - // TODO: Fixup incoming values once other recipes are enabled. - unsigned NumIncoming = - getParent() == getParent()->getPlan()->getScalarPreheader() - ? getNumIncoming() - : 1; + unsigned NumIncoming = getNumIncoming(); + if (getParent() != getParent()->getPlan()->getScalarPreheader()) { + // TODO: Fixup all incoming values of header phis once recipes defining them + // are introduced. + NumIncoming = 1; + } for (unsigned Idx = 0; Idx != NumIncoming; ++Idx) { Value *IncV = State.get(getIncomingValue(Idx), VPLane(0)); BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(getIncomingBlock(Idx)); @@ -1099,9 +1100,15 @@ void VPPhi::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "EMIT "; printAsOperand(O, SlotTracker); - O << " = phi "; - - printPhiOperands(O, SlotTracker); + const VPBasicBlock *Parent = getParent(); + if (Parent == Parent->getPlan()->getScalarPreheader()) { + // TODO: Use regular printing for resume-phis as well + O << " = resume-phi "; + printOperands(O, SlotTracker); + } else { + O << " = phi "; + printPhiOperands(O, SlotTracker); + } } #endif diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll index 76052886ea7af..b487911379f30 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll @@ -49,8 +49,8 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: -; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = phi [ vp<[[END1]]>, middle.block ], [ ir<%and>, ir-bb ] -; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = phi [ vp<[[END2]]>, middle.block ], [ ir<%A>, ir-bb ] +; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi vp<[[END1]]>, ir<%and> +; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi vp<[[END2]]>, ir<%A> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -98,8 +98,8 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = phi [ vp<[[END1]]>, middle.block ], [ ir<%and>, ir-bb ] -; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = phi [ vp<[[END2]]>, middle.block ], [ ir<%A>, ir-bb ] +; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi vp<[[END1]]>, ir<%and> +; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi vp<[[END2]]>, ir<%A> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 9219c8bfba681..3504558d1b545 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -46,7 +46,7 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -117,7 +117,7 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x, ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -206,7 +206,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -289,7 +289,7 @@ define void @print_interleave_groups(i32 %C, i32 %D) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[IV_END]]>, middle.block ], [ ir<0>, ir-bb ] +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[IV_END]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -394,7 +394,7 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -479,7 +479,7 @@ define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[IV_END]]>, middle.block ], [ ir<0>, ir-bb ] +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[IV_END]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -548,7 +548,7 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -616,7 +616,7 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr % ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -686,7 +686,7 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -776,7 +776,7 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -855,7 +855,7 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -972,8 +972,8 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_P:%.*]]> = phi [ vp<[[RESUME_1]]>, middle.block ], [ ir<22>, ir-bb ] -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] +; CHECK-NEXT: EMIT vp<[[RESUME_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: From 801b98dd57d1bf8fb3e95d8db421ce830e5e2a55 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 28 May 2025 16:36:18 +0100 Subject: [PATCH 3/5] !fixup update comment --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index eadaa687fb009..10e888e694e11 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2379,7 +2379,7 @@ void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) { PreVectorPH->swapSuccessors(); // We just connected a new block to the scalar preheader. Update all - // ResumePhis by adding an incoming value for it, replicating the last value. + // VPPhis by adding an incoming value for it, replicating the last value. for (VPRecipeBase &R : cast(ScalarPH)->phis()) { auto *ResumePhi = cast(&R); ResumePhi->addOperand( From d3df2c398c0d729198f1c0fa82df1a424f80a79f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 29 May 2025 11:03:01 +0100 Subject: [PATCH 4/5] !fixup update after merging 5b85e4b08d2e356c40f5fbe8da98f94467c2f0cb. --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 267bf88e72b69..5565c4b8aac1e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1104,7 +1104,7 @@ void VPPhi::execute(VPTransformState &State) { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPPhi::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << Indent << "EMIT "; + O << Indent << "EMIT" << (isSingleScalar() ? "-SCALAR" : "") << " "; printAsOperand(O, SlotTracker); const VPBasicBlock *Parent = getParent(); if (Parent == Parent->getPlan()->getScalarPreheader()) { From 0b399267741d08853e5137c707be427065891f7a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 30 May 2025 11:16:39 +0100 Subject: [PATCH 5/5] !fixup use IsaPred. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 5 ++--- llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp | 2 ++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 657cc334ab4a0..ecdba9d1a1032 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7534,11 +7534,10 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( // created a bc.merge.rdx Phi after the main vector body. Ensure that we carry // over the incoming values correctly. using namespace VPlanPatternMatch; - auto IsResumePhi = [](VPUser *U) { return isa(U); }; - assert(count_if(EpiRedResult->users(), IsResumePhi) == 1 && + assert(count_if(EpiRedResult->users(), IsaPred) == 1 && "ResumePhi must have a single user"); auto *EpiResumePhiVPI = - cast(*find_if(EpiRedResult->users(), IsResumePhi)); + cast(*find_if(EpiRedResult->users(), IsaPred)); auto *EpiResumePhi = cast(State.get(EpiResumePhiVPI, true)); EpiResumePhi->setIncomingValueForBlock( BypassBlock, MainResumePhi->getIncomingValueForBlock(BypassBlock)); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp index fa7a9aa5549ee..5394472381454 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp @@ -151,6 +151,7 @@ TEST_F(VPVerifierTest, VPPhiIncomingValueDoesntDominateIncomingBlock) { VPBasicBlock *VPBB1 = Plan.getEntry(); VPBasicBlock *VPBB2 = Plan.createVPBasicBlock(""); VPBasicBlock *VPBB3 = Plan.createVPBasicBlock(""); + VPBasicBlock *VPBB4 = Plan.createVPBasicBlock(""); VPInstruction *DefI = new VPInstruction(Instruction::Add, {Zero}); VPPhi *Phi = new VPPhi({DefI}, {}); @@ -162,6 +163,7 @@ TEST_F(VPVerifierTest, VPPhiIncomingValueDoesntDominateIncomingBlock) { VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB3, VPBB3, "R1"); VPBlockUtils::connectBlocks(VPBB1, VPBB2); VPBlockUtils::connectBlocks(VPBB2, R1); + VPBlockUtils::connectBlocks(VPBB4, Plan.getScalarHeader()); #if GTEST_HAS_STREAM_REDIRECTION ::testing::internal::CaptureStderr(); #endif