diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index cbfbc29360b0b..9b91b520b4333 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1423,6 +1423,11 @@ class LoopVectorizationCostModel { return InLoopReductions.contains(Phi); } + /// Returns the set of in-loop reduction PHIs. + const SmallPtrSetImpl &getInLoopReductions() const { + return InLoopReductions; + } + /// Returns true if the predicated reduction select should be used to set the /// incoming value for the reduction phi. bool usePredicatedReductionSelect() const { @@ -7626,58 +7631,6 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI, VPIRMetadata(*Store, LVer), VPI->getDebugLoc()); } -/// Creates a VPWidenIntOrFpInductionRecipe for \p PhiR. If needed, it will -/// also insert a recipe to expand the step for the induction recipe. -static VPWidenIntOrFpInductionRecipe * -createWidenInductionRecipes(VPInstruction *PhiR, - const InductionDescriptor &IndDesc, VPlan &Plan, - ScalarEvolution &SE, Loop &OrigLoop) { - assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) && - "step must be loop invariant"); - - VPValue *Start = PhiR->getOperand(0); - assert(Plan.getLiveIn(IndDesc.getStartValue()) == Start && - "Start VPValue must match IndDesc's start value"); - - VPValue *Step = - vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep()); - - // Update wide induction increments to use the same step as the corresponding - // wide induction. This enables detecting induction increments directly in - // VPlan and removes redundant splats. - using namespace llvm::VPlanPatternMatch; - if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue()))) - PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step); - - PHINode *Phi = cast(PhiR->getUnderlyingInstr()); - return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(), - IndDesc, PhiR->getDebugLoc()); -} - -VPHeaderPHIRecipe * -VPRecipeBuilder::tryToOptimizeInductionPHI(VPInstruction *VPI, VFRange &Range) { - auto *Phi = cast(VPI->getUnderlyingInstr()); - - // Check if this is an integer or fp induction. If so, build the recipe that - // produces its scalar and vector values. - if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi)) - return createWidenInductionRecipes(VPI, *II, Plan, *PSE.getSE(), *OrigLoop); - - // Check if this is pointer induction. If so, build the recipe for it. - if (auto *II = Legal->getPointerInductionDescriptor(Phi)) { - VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep()); - return new VPWidenPointerInductionRecipe( - Phi, VPI->getOperand(0), Step, &Plan.getVFxUF(), *II, - LoopVectorizationPlanner::getDecisionAndClampRange( - [&](ElementCount VF) { - return CM.isScalarAfterVectorization(Phi, VF); - }, - Range), - VPI->getDebugLoc()); - } - return nullptr; -} - VPWidenIntOrFpInductionRecipe * VPRecipeBuilder::tryToOptimizeInductionTruncate(VPInstruction *VPI, VFRange &Range) { @@ -8154,45 +8107,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, // First, check for specific widening recipes that deal with inductions, Phi // nodes, calls and memory operations. VPRecipeBase *Recipe; - if (auto *PhiR = dyn_cast(R)) { - VPBasicBlock *Parent = PhiR->getParent(); - [[maybe_unused]] VPRegionBlock *LoopRegionOf = - Parent->getEnclosingLoopRegion(); - assert(LoopRegionOf && LoopRegionOf->getEntry() == Parent && - "Non-header phis should have been handled during predication"); - auto *Phi = cast(R->getUnderlyingInstr()); - assert(R->getNumOperands() == 2 && "Must have 2 operands for header phis"); - if ((Recipe = tryToOptimizeInductionPHI(PhiR, Range))) - return Recipe; - - VPHeaderPHIRecipe *PhiRecipe = nullptr; - assert((Legal->isReductionVariable(Phi) || - Legal->isFixedOrderRecurrence(Phi)) && - "can only widen reductions and fixed-order recurrences here"); - VPValue *StartV = R->getOperand(0); - if (Legal->isReductionVariable(Phi)) { - const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(Phi); - assert(RdxDesc.getRecurrenceStartValue() == - Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())); - - // If the PHI is used by a partial reduction, set the scale factor. - unsigned ScaleFactor = - getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1); - PhiRecipe = new VPReductionPHIRecipe( - Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi), - CM.useOrderedReductions(RdxDesc), ScaleFactor); - } else { - // TODO: Currently fixed-order recurrences are modeled as chains of - // first-order recurrences. If there are no users of the intermediate - // recurrences in the chain, the fixed order recurrence should be modeled - // directly, enabling more efficient codegen. - PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV); - } - // Add backedge value. - PhiRecipe->addOperand(R->getOperand(1)); - return PhiRecipe; - } - assert(!R->isPhi() && "only VPPhi nodes expected at this point"); + assert(!R->isPhi() && "phis must be handled earlier"); auto *VPI = cast(R); Instruction *Instr = R->getUnderlyingInstr(); @@ -8249,6 +8164,9 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction, if (isa(BinOp) || isa(BinOp)) std::swap(BinOp, Accumulator); + if (auto *RedPhiR = dyn_cast(Accumulator)) + RedPhiR->setVFScaleFactor(ScaleFactor); + assert(ScaleFactor == vputils::getVFScaleFactor(Accumulator->getDefiningRecipe()) && "all accumulators in chain must have same scale factor"); @@ -8295,6 +8213,12 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, OrigLoop, *LI, Legal->getWidestInductionType(), getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE); + // Create recipes for header phis. + VPlanTransforms::createHeaderPhiRecipes( + *VPlan0, *PSE.getSE(), *OrigLoop, Legal->getInductionVars(), + Legal->getReductionVars(), Legal->getFixedOrderRecurrences(), + CM.getInLoopReductions(), Hints.allowReordering()); + auto MaxVFTimes2 = MaxVF * 2; for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) { VFRange SubRange = {VF, MaxVFTimes2}; @@ -8415,25 +8339,18 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // Mapping from VPValues in the initial plan to their widened VPValues. Needed // temporarily to update created block masks. DenseMap Old2New; + + // Now process all other blocks and instructions. for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(RPOT)) { // Convert input VPInstructions to widened recipes. for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { - auto *SingleDef = cast(&R); - auto *UnderlyingValue = SingleDef->getUnderlyingValue(); - // Skip recipes that do not need transforming, including canonical IV, - // wide canonical IV and VPInstructions without underlying values. The - // latter are added above for masking. - // FIXME: Migrate code relying on the underlying instruction from VPlan0 - // to construct recipes below to not use the underlying instruction. - if (isa( - &R) || - (isa(&R) && !UnderlyingValue)) + auto *SingleDef = dyn_cast(&R); + if (!SingleDef || !SingleDef->getUnderlyingValue()) continue; - assert(isa(&R) && UnderlyingValue && "unsupported recipe"); // TODO: Gradually replace uses of underlying instruction by analyses on // VPlan. - Instruction *Instr = cast(UnderlyingValue); + Instruction *Instr = cast(SingleDef->getUnderlyingValue()); Builder.setInsertPoint(SingleDef); // The stores with invariant address inside the loop will be deleted, and diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index a7000aff06379..367b42d72633d 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -98,11 +98,6 @@ class VPRecipeBuilder { /// recipe that takes an additional VPInstruction for the mask. VPWidenMemoryRecipe *tryToWidenMemory(VPInstruction *VPI, VFRange &Range); - /// Check if an induction recipe should be constructed for \p VPI. If so build - /// and return it. If not, return null. - VPHeaderPHIRecipe *tryToOptimizeInductionPHI(VPInstruction *VPI, - VFRange &Range); - /// Optimize the special case where the operand of \p VPI is a constant /// integer induction variable. VPWidenIntOrFpInductionRecipe * diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index ea88eaa42d945..b8522e7305ca0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1084,7 +1084,7 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, OpcodeTy Opcode; /// An optional name that can be used for the generated IR instruction. - const std::string Name; + std::string Name; /// Returns true if we can generate a scalar for the first lane only if /// needed. @@ -1183,6 +1183,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, /// Returns the symbolic name assigned to the VPInstruction. StringRef getName() const { return Name; } + + void setName(StringRef NewName) { Name = NewName.str(); } }; /// A specialization of VPInstruction augmenting it with a dedicated result @@ -2211,19 +2213,15 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { }; class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe { - bool IsScalarAfterVectorization; - public: /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p /// Start and the number of elements unrolled \p NumUnrolledElems, typically /// VF*UF. VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, - const InductionDescriptor &IndDesc, - bool IsScalarAfterVectorization, DebugLoc DL) + const InductionDescriptor &IndDesc, DebugLoc DL) : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start, - Step, IndDesc, DL), - IsScalarAfterVectorization(IsScalarAfterVectorization) { + Step, IndDesc, DL) { addOperand(NumUnrolledElems); } @@ -2232,8 +2230,7 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe { VPWidenPointerInductionRecipe *clone() override { return new VPWidenPointerInductionRecipe( cast(getUnderlyingInstr()), getOperand(0), getOperand(1), - getOperand(2), getInductionDescriptor(), IsScalarAfterVectorization, - getDebugLoc()); + getOperand(2), getInductionDescriptor(), getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC) @@ -2309,8 +2306,10 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe { VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC) VPFirstOrderRecurrencePHIRecipe *clone() override { - return new VPFirstOrderRecurrencePHIRecipe( + auto *R = new VPFirstOrderRecurrencePHIRecipe( cast(getUnderlyingInstr()), *getOperand(0)); + R->addOperand(getOperand(1)); + return R; } void execute(VPTransformState &State) override; @@ -2379,6 +2378,8 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, /// Get the factor that the VF of this recipe's output should be scaled by. unsigned getVFScaleFactor() const { return VFScaleFactor; } + void setVFScaleFactor(unsigned ScaleFactor) { VFScaleFactor = ScaleFactor; } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 4ffd5577d31a4..d517271b868f0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -533,6 +533,15 @@ static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, Plan.getEntry()->swapSuccessors(); createExtractsForLiveOuts(Plan, MiddleVPBB); + + VPBuilder ScalarPHBuilder(ScalarPH); + for (const auto &[PhiR, ScalarPhiR] : zip_equal( + drop_begin(HeaderVPBB->phis()), Plan.getScalarHeader()->phis())) { + auto *VectorPhiR = cast(&PhiR); + auto *ResumePhiR = ScalarPHBuilder.createScalarPhi( + {VectorPhiR, VectorPhiR->getOperand(0)}, VectorPhiR->getDebugLoc()); + cast(&ScalarPhiR)->addOperand(ResumePhiR); + } } std::unique_ptr @@ -544,6 +553,93 @@ VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, return VPlan0; } +/// Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe +/// for \p Phi based on \p IndDesc. +static VPHeaderPHIRecipe * +createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR, + const InductionDescriptor &IndDesc, VPlan &Plan, + ScalarEvolution &SE, Loop &OrigLoop) { + assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) && + "step must be loop invariant"); + + VPValue *Start = PhiR->getOperand(0); + assert(Plan.getLiveIn(IndDesc.getStartValue()) == Start && + "Start VPValue must match IndDesc's start value"); + VPValue *Step = + vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep()); + + if (IndDesc.getKind() == InductionDescriptor::IK_PtrInduction) + return new VPWidenPointerInductionRecipe(Phi, Start, Step, &Plan.getVFxUF(), + IndDesc, PhiR->getDebugLoc()); + + // Update wide induction increments to use the same step as the corresponding + // wide induction. This enables detecting induction increments directly in + // VPlan and removes redundant splats. + using namespace llvm::VPlanPatternMatch; + if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue()))) + PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step); + + return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(), + IndDesc, PhiR->getDebugLoc()); +} + +void VPlanTransforms::createHeaderPhiRecipes( + VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop, + const MapVector &Inductions, + const MapVector &Reductions, + const SmallPtrSetImpl &FixedOrderRecurrences, + const SmallPtrSetImpl &InLoopReductions, bool AllowReordering) { + + VPBasicBlock *HeaderVPBB = cast( + Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor()); + + for (VPRecipeBase &R : make_early_inc_range(*HeaderVPBB)) { + if (isa(&R)) + continue; + auto *PhiR = dyn_cast(&R); + if (!PhiR) + break; + + // TODO: Gradually replace uses of underlying instruction by analyses on + // VPlan. + auto *Phi = cast(PhiR->getUnderlyingInstr()); + assert(PhiR->getNumOperands() == 2 && + "Must have 2 operands for header phis"); + + VPHeaderPHIRecipe *HeaderPhiR = nullptr; + auto InductionIt = Inductions.find(Phi); + if (InductionIt != Inductions.end()) { + HeaderPhiR = createWidenInductionRecipe(Phi, PhiR, InductionIt->second, + Plan, SE, OrigLoop); + } else { + VPValue *Start = PhiR->getOperand(0); + auto ReductionIt = Reductions.find(Phi); + if (ReductionIt != Reductions.end()) { + const RecurrenceDescriptor &RdxDesc = ReductionIt->second; + assert(RdxDesc.getRecurrenceStartValue() == + Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader())); + + bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered(); + HeaderPhiR = new VPReductionPHIRecipe( + Phi, RdxDesc.getRecurrenceKind(), *Start, + InLoopReductions.contains(Phi), UseOrderedReductions); + } else { + assert(FixedOrderRecurrences.contains(Phi) && + "can only widen reductions and fixed-order recurrences here"); + // TODO: Currently fixed-order recurrences are modeled as chains of + // first-order recurrences. If there are no users of the intermediate + // recurrences in the chain, the fixed order recurrence should be + // modeled directly, enabling more efficient codegen. + HeaderPhiR = new VPFirstOrderRecurrencePHIRecipe(Phi, *Start); + } + HeaderPhiR->addOperand(PhiR->getOperand(1)); + } + HeaderPhiR->insertBefore(PhiR); + PhiR->replaceAllUsesWith(HeaderPhiR); + PhiR->eraseFromParent(); + } +} + void VPlanTransforms::handleEarlyExits(VPlan &Plan, bool HasUncountableEarlyExit) { auto *MiddleVPBB = cast( diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index aa85bd435ee9e..5071941eb1413 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -4310,7 +4310,7 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, #endif bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) { - return IsScalarAfterVectorization && + return vputils::onlyScalarValuesUsed(this) && (!IsScalable || vputils::onlyFirstLaneUsed(this)); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index d05c22e3aeb61..b80c43661c53c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -4459,9 +4459,10 @@ void VPlanTransforms::addBranchWeightToMiddleTerminator( /// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the /// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute /// the end value of the induction. -static VPInstruction *addResumePhiRecipeForInduction( - VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder, - VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC) { +static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV, + VPBuilder &VectorPHBuilder, + VPTypeAnalysis &TypeInfo, + VPValue *VectorTC) { auto *WideIntOrFp = dyn_cast(WideIV); // Truncated wide inductions resume from the last lane of their vector value // in the last vector iteration which is handled elsewhere. @@ -4487,9 +4488,7 @@ static VPInstruction *addResumePhiRecipeForInduction( WideIV->getDebugLoc()); } - auto *ResumePhiRecipe = ScalarPHBuilder.createScalarPhi( - {EndValue, Start}, WideIV->getDebugLoc(), "bc.resume.val"); - return ResumePhiRecipe; + return EndValue; } void VPlanTransforms::addScalarResumePhis( @@ -4502,21 +4501,18 @@ void VPlanTransforms::addScalarResumePhis( VPBuilder VectorPHBuilder( cast(VectorRegion->getSinglePredecessor())); VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi()); - VPBuilder ScalarPHBuilder(ScalarPH); - for (VPRecipeBase &ScalarPhiR : Plan.getScalarHeader()->phis()) { - auto *ScalarPhiIRI = cast(&ScalarPhiR); + for (VPRecipeBase &PhiR : Plan.getScalarPreheader()->phis()) { + auto *ResumePhiR = cast(&PhiR); // TODO: Extract final value from induction recipe initially, optimize to // pre-computed end value together in optimizeInductionExitUsers. - auto *VectorPhiR = - cast(Builder.getRecipe(&ScalarPhiIRI->getIRPhi())); + auto *VectorPhiR = cast(ResumePhiR->getOperand(0)); if (auto *WideIVR = dyn_cast(VectorPhiR)) { - if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction( - WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, - &Plan.getVectorTripCount())) { - assert(isa(ResumePhi) && "Expected a phi"); - IVEndValues[WideIVR] = ResumePhi->getOperand(0); - ScalarPhiIRI->addOperand(ResumePhi); + if (VPValue *ResumeV = addResumePhiRecipeForInduction( + WideIVR, VectorPHBuilder, TypeInfo, &Plan.getVectorTripCount())) { + IVEndValues[WideIVR] = ResumeV; + ResumePhiR->setOperand(0, ResumeV); + ResumePhiR->setName("bc.resume.val"); continue; } // TODO: Also handle truncated inductions here. Computing end-values @@ -4538,10 +4534,8 @@ void VPlanTransforms::addScalarResumePhis( ResumeFromVectorLoop = MiddleBuilder.createNaryOp( VPInstruction::ExtractLastElement, {ResumeFromVectorLoop}, {}, "vector.recur.extract"); - StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx"; - auto *ResumePhiR = ScalarPHBuilder.createScalarPhi( - {ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name); - ScalarPhiIRI->addOperand(ResumePhiR); + ResumePhiR->setName(IsFOR ? "scalar.recur.init" : "bc.merge.rdx"); + ResumePhiR->setOperand(0, ResumeFromVectorLoop); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index e3bde8a47dcbc..84224be96ab68 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -101,6 +101,17 @@ struct VPlanTransforms { buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE); + /// Replace VPPhi recipes in \p Plan's header with corresponding + /// VPHeaderPHIRecipe subclasses for inductions, reductions, and + /// fixed-order recurrences. This processes all header phis and creates + /// the appropriate widened recipe for each one. + static void createHeaderPhiRecipes( + VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop, + const MapVector &Inductions, + const MapVector &Reductions, + const SmallPtrSetImpl &FixedOrderRecurrences, + const SmallPtrSetImpl &InLoopReductions, bool AllowReordering); + /// Update \p Plan to account for all early exits. LLVM_ABI_FOR_TEST static void handleEarlyExits(VPlan &Plan, bool HasUncountableExit); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index d23e3c29b59e5..28b2d30b03fd6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -11,21 +11,14 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 10000 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x ptr> [[TMP4]], ptr [[NEXT_GEP1]], i32 1 -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP2]] -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP2]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 1 +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR_START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> +; CHECK-NEXT: [[NEXT_GEP:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, <2 x ptr> [[TMP5]], <2 x i64> splat (i64 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x ptr> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 @@ -40,25 +33,24 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[NEXT_GEP]], align 1 ; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 4 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END4:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 -; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP0]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX6]], 0 -; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX6]], 1 -; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP17]] -; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP18]] -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP7]], i32 0 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> [[TMP19]], ptr [[NEXT_GEP8]], i32 1 +; CHECK-NEXT: [[POINTER_PHI2:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[VEC_EPILOG_PH]] ], [ [[PTR_IND5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[POINTER_PHI2]], <2 x i64> +; CHECK-NEXT: [[NEXT_GEP7:%.*]] = extractelement <2 x ptr> [[TMP20]], i32 0 ; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <2 x ptr> [[TMP20]], zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP21]], i32 0 ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP21]], i32 1 @@ -66,6 +58,7 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]]) ; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[NEXT_GEP7]], align 1 ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2 +; CHECK-NEXT: [[PTR_IND5]] = getelementptr i8, ptr [[POINTER_PHI2]], i64 2 ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT9]], 10000 ; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: @@ -136,7 +129,7 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 2 @@ -221,7 +214,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END5:%.*]] = add i64 [[START]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -304,7 +297,7 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 2 @@ -416,7 +409,7 @@ define void @test_widen_truncated_induction(ptr %A) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[VEC_EPILOG_RESUME_VAL]] to i8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index f223786a07cdf..456c03824106a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -112,15 +112,11 @@ define void @pointer_induction(ptr noalias %start, i64 %N) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[VECTOR_GEP]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX2]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP15]], align 1 ; CHECK-NEXT: [[TMP17:%.*]] = add [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: store [[TMP17]], ptr [[TMP15]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP4]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll index 3a07bcca523ce..13da121fe2dc2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll @@ -25,17 +25,14 @@ define void @test_pr55375_interleave_opaque_ptr(ptr %start, ptr %end) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 16 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> [[TMP9]], ptr [[TMP8]], i32 1 +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x ptr> [[TMP10]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x ptr> zeroinitializer, <2 x ptr> [[TMP10]], <4 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[TMP12]], <4 x ptr> poison, <4 x i32> ; CHECK-NEXT: store <4 x ptr> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 32 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll index cebf90ae9f8ce..94392f856c386 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll @@ -15,16 +15,20 @@ define void @wide_ptr_induction_index_width_smaller_than_iv_width(ptr noalias %s ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[SRC]], %[[VECTOR_PH]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 2 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 3 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[DOTCAST]], 8 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 8 +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 16 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[OFFSET_IDX]], 24 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP6]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP16]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP17]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP2]] @@ -36,7 +40,6 @@ define void @wide_ptr_induction_index_width_smaller_than_iv_width(ptr noalias %s ; CHECK-NEXT: store ptr [[TMP13]], ptr [[TMP9]], align 8 ; CHECK-NEXT: store ptr [[TMP14]], ptr [[TMP10]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 32 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp index b99d656c5c50f..2cc6fdb96c74e 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -113,12 +113,13 @@ compound=true N0 -> N2 [ label="F"] N1 [label = "scalar.ph:\l" + + " EMIT-SCALAR vp\<%6\> = phi [ ir\<%indvars.iv\>, middle.block ], [ ir\<0\>, ir-bb\ ]\l" + "Successor(s): ir-bb\\l" ] N1 -> N3 [ label=""] N3 [label = "ir-bb\:\l" + - " IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\l" + + " IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] (extra operand: vp\<%6\> from scalar.ph)\l" + " IR %arr.idx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv\l" + " IR %l1 = load i32, ptr %arr.idx, align 4\l" + " IR %res = add i32 %l1, 10\l" + @@ -282,12 +283,13 @@ compound=true N0 -> N2 [ label="F"] N1 [label = "scalar.ph:\l" + + " EMIT-SCALAR vp\<%6\> = phi [ ir\<%iv\>, middle.block ], [ ir\<0\>, ir-bb\ ]\l" + "Successor(s): ir-bb\\l" ] N1 -> N3 [ label=""] N3 [label = "ir-bb\:\l" + - " IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]\l" + + " IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] (extra operand: vp\<%6\> from scalar.ph)\l" + " IR %arr.idx = getelementptr inbounds i32, ptr %A, i64 %iv\l" + " IR %l1 = load i32, ptr %arr.idx, align 4\l" + " IR %c = icmp eq i32 %l1, 0\l" + diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp index 169114ed6c310..cca779142bce4 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp @@ -346,6 +346,8 @@ TEST_F(VPIRVerifierTest, testVerifyIRPhiInScalarHeaderVPIRBB) { Function *F = M.getFunction("f"); BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor(); auto Plan = buildVPlan(LoopHeader); + VPValue *Zero = Plan->getConstantInt(32, 0); + Plan->getScalarHeader()->front().addOperand(Zero); #if GTEST_HAS_STREAM_REDIRECTION ::testing::internal::CaptureStderr(); @@ -387,8 +389,6 @@ TEST_F(VPIRVerifierTest, testVerifyIRPhiInExitVPIRBB) { {HeaderBlock->front().getVPSingleValue()}); DefI->insertBefore(Plan->getMiddleBlock()->getTerminator()); Plan->getExitBlocks()[0]->front().addOperand(DefI); - VPValue *Zero = Plan->getConstantInt(32, 0); - Plan->getScalarHeader()->front().addOperand(Zero); #if GTEST_HAS_STREAM_REDIRECTION ::testing::internal::CaptureStderr();