@@ -1241,9 +1241,9 @@ class LoopVectorizationCostModel {
12411241 getDivRemSpeculationCost (Instruction *I,
12421242 ElementCount VF) const ;
12431243
1244- // / Returns widening decision (CM_Widen or CM_Widen_Reverse ) if \p I is a
1245- // / memory instruction with consecutive access that can be widened, or
1246- // / CM_Unknown otherwise.
1244+ // / Returns widening decision (CM_Widen, CM_Widen_Reverse or CM_Compressed ) if
1245+ // / \p I is a memory instruction with consecutive access that can be widened,
1246+ // / or CM_Unknown otherwise.
12471247 InstWidening memoryInstructionCanBeWidened (Instruction *I, ElementCount VF);
12481248
12491249 // / Returns true if \p I is a memory instruction in an interleaved-group
@@ -3000,6 +3000,9 @@ LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I,
30003000 auto *Ptr = getLoadStorePointerOperand (I);
30013001 auto *ScalarTy = getLoadStoreType (I);
30023002
3003+ if (Legal->isCompressedPtr (ScalarTy, Ptr, I->getParent ()))
3004+ return CM_Compressed;
3005+
30033006 // In order to be widened, the pointer should be consecutive, first of all.
30043007 auto Stride = Legal->isConsecutivePtr (ScalarTy, Ptr);
30053008 if (!Stride)
@@ -3257,6 +3260,39 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
32573260 AddToWorklistIfAllowed (IndUpdate);
32583261 }
32593262
3263+ // Handle monotonic phis (similarly to induction vars).
3264+ for (const auto &MonotonicPHI : Legal->getMonotonicPHIs ()) {
3265+ auto *Phi = MonotonicPHI.first ;
3266+ auto *PhiUpdate = cast<Instruction>(Phi->getIncomingValueForBlock (Latch));
3267+ const auto &Desc = MonotonicPHI.second ;
3268+
3269+ auto UniformPhi = llvm::all_of (Phi->users (), [&](User *U) -> bool {
3270+ auto *I = cast<Instruction>(U);
3271+ if (I == Desc.getStepInst ())
3272+ return true ;
3273+ if (auto *PN = dyn_cast<PHINode>(I); PN && Desc.getChain ().contains (PN))
3274+ return true ;
3275+ return !TheLoop->contains (I) || Worklist.count (I) ||
3276+ IsVectorizedMemAccessUse (I, Phi);
3277+ });
3278+ if (!UniformPhi)
3279+ continue ;
3280+
3281+ auto UniformPhiUpdate =
3282+ llvm::all_of (PhiUpdate->users (), [&](User *U) -> bool {
3283+ auto *I = cast<Instruction>(U);
3284+ if (I == Phi)
3285+ return true ;
3286+ return !TheLoop->contains (I) || Worklist.count (I) ||
3287+ IsVectorizedMemAccessUse (I, Phi);
3288+ });
3289+ if (!UniformPhiUpdate)
3290+ continue ;
3291+
3292+ AddToWorklistIfAllowed (Phi);
3293+ AddToWorklistIfAllowed (PhiUpdate);
3294+ }
3295+
32603296 Uniforms[VF].insert_range (Worklist);
32613297}
32623298
@@ -4048,6 +4084,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
40484084 case VPDef::VPEVLBasedIVPHISC:
40494085 case VPDef::VPPredInstPHISC:
40504086 case VPDef::VPBranchOnMaskSC:
4087+ case VPDef::VPMonotonicPHISC:
40514088 continue ;
40524089 case VPDef::VPReductionSC:
40534090 case VPDef::VPActiveLaneMaskPHISC:
@@ -4561,6 +4598,10 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
45614598 if (Plan.hasEarlyExit ())
45624599 return 1 ;
45634600
4601+ // Monotonic vars don't support interleaving.
4602+ if (Legal->hasMonotonicPHIs ())
4603+ return 1 ;
4604+
45644605 const bool HasReductions =
45654606 any_of (Plan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis (),
45664607 IsaPred<VPReductionPHIRecipe>);
@@ -8075,11 +8116,19 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
80758116 return Recipe;
80768117
80778118 VPHeaderPHIRecipe *PhiRecipe = nullptr ;
8078- assert ((Legal->isReductionVariable (Phi) ||
8119+ assert ((Legal->isMonotonicPHI (Phi) || Legal-> isReductionVariable (Phi) ||
80798120 Legal->isFixedOrderRecurrence (Phi)) &&
8080- " can only widen reductions and fixed-order recurrences here" );
8121+ " can only widen monotonic phis, reductions and fixed-order "
8122+ " recurrences here" );
80818123 VPValue *StartV = Operands[0 ];
8082- if (Legal->isReductionVariable (Phi)) {
8124+ Value *IncomingVal =
8125+ Phi->getIncomingValueForBlock (OrigLoop->getLoopPreheader ());
8126+ if (Legal->isMonotonicPHI (Phi)) {
8127+ const MonotonicDescriptor &Desc =
8128+ Legal->getMonotonicPHIs ().find (Phi)->second ;
8129+ assert (Desc.getExpr ()->getStart () == PSE.getSCEV (IncomingVal));
8130+ PhiRecipe = new VPMonotonicPHIRecipe (Phi, Desc, StartV);
8131+ } else if (Legal->isReductionVariable (Phi)) {
80838132 const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor (Phi);
80848133 assert (RdxDesc.getRecurrenceStartValue () ==
80858134 Phi->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
@@ -8430,6 +8479,46 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84308479 // bring the VPlan to its final state.
84318480 // ---------------------------------------------------------------------------
84328481
8482+ // Adjust the recipes for any monotonic phis.
8483+ for (VPRecipeBase &R : HeaderVPBB->phis ()) {
8484+ auto *MonotonicPhi = dyn_cast<VPMonotonicPHIRecipe>(&R);
8485+ if (!MonotonicPhi)
8486+ continue ;
8487+
8488+ // Prohibit scalarization of monotonic phis.
8489+ if (!all_of (Range, [&](ElementCount VF) {
8490+ return CM.isUniformAfterVectorization (
8491+ MonotonicPhi->getUnderlyingInstr (), VF);
8492+ }))
8493+ return nullptr ;
8494+
8495+ // Obtain mask value for the predicate edge from the last VPBlendRecipe in
8496+ // chain.
8497+ VPValue *Chain = MonotonicPhi->getBackedgeValue ();
8498+ VPValue *Mask = nullptr ;
8499+ while (auto *BlendR = dyn_cast<VPBlendRecipe>(Chain))
8500+ for (unsigned I = 0 , E = BlendR->getNumIncomingValues (); I != E; ++I)
8501+ if (auto *IncomingVal = BlendR->getIncomingValue (I);
8502+ IncomingVal != MonotonicPhi) {
8503+ Chain = IncomingVal;
8504+ Mask = BlendR->getMask (I);
8505+ break ;
8506+ }
8507+ assert (Mask);
8508+
8509+ auto &Desc = MonotonicPhi->getDescriptor ();
8510+ auto &SE = *PSE.getSE ();
8511+ auto *Step = vputils::getOrCreateVPValueForSCEVExpr (
8512+ *Plan, Desc.getExpr ()->getStepRecurrence (SE));
8513+
8514+ auto *MonotonicI =
8515+ new VPInstruction (VPInstruction::ComputeMonotonicResult,
8516+ {MonotonicPhi, Mask, Step}, *Desc.getStepInst ());
8517+ auto *InsertBlock = MonotonicPhi->getBackedgeRecipe ().getParent ();
8518+ InsertBlock->insert (MonotonicI, InsertBlock->getFirstNonPhi ());
8519+ MonotonicPhi->getBackedgeValue ()->replaceAllUsesWith (MonotonicI);
8520+ }
8521+
84338522 // Adjust the recipes for any inloop reductions.
84348523 adjustRecipesForReductions (Plan, RecipeBuilder, Range.Start );
84358524
@@ -9892,6 +9981,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
98929981 IC = LVP.selectInterleaveCount (LVP.getPlanFor (VF.Width ), VF.Width , VF.Cost );
98939982
98949983 unsigned SelectedIC = std::max (IC, UserIC);
9984+
9985+ if (LVL.hasMonotonicPHIs () && SelectedIC > 1 ) {
9986+ reportVectorizationFailure (
9987+ " Interleaving of loop with monotonic vars" ,
9988+ " Interleaving of loops with monotonic vars is not supported" ,
9989+ " CantInterleaveWithMonotonicVars" , ORE, L);
9990+ return false ;
9991+ }
9992+
98959993 // Optimistically generate runtime checks if they are needed. Drop them if
98969994 // they turn out to not be profitable.
98979995 if (VF.Width .isVector () || SelectedIC > 1 ) {
0 commit comments