@@ -1241,9 +1241,9 @@ class LoopVectorizationCostModel {
12411241 getDivRemSpeculationCost (Instruction *I,
12421242 ElementCount VF) const ;
12431243
1244- // / Returns widening decision (CM_Widen or CM_Widen_Reverse ) if \p I is a
1245- // / memory instruction with consecutive access that can be widened, or
1246- // / CM_Unknown otherwise.
1244+ // / Returns widening decision (CM_Widen, CM_Widen_Reverse or CM_Compressed ) if
1245+ // / \p I is a memory instruction with consecutive access that can be widened,
1246+ // / or CM_Unknown otherwise.
12471247 InstWidening memoryInstructionCanBeWidened (Instruction *I, ElementCount VF);
12481248
12491249 // / Returns true if \p I is a memory instruction in an interleaved-group
@@ -3000,6 +3000,9 @@ LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I,
30003000 auto *Ptr = getLoadStorePointerOperand (I);
30013001 auto *ScalarTy = getLoadStoreType (I);
30023002
3003+ if (Legal->isCompressedPtr (ScalarTy, Ptr, I->getParent ()))
3004+ return CM_Compressed;
3005+
30033006 // In order to be widened, the pointer should be consecutive, first of all.
30043007 auto Stride = Legal->isConsecutivePtr (ScalarTy, Ptr);
30053008 if (!Stride)
@@ -3256,6 +3259,39 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
32563259 AddToWorklistIfAllowed (IndUpdate);
32573260 }
32583261
3262+ // Handle monotonic phis (similarly to induction vars).
3263+ for (const auto &MonotonicPHI : Legal->getMonotonicPHIs ()) {
3264+ auto *Phi = MonotonicPHI.first ;
3265+ auto *PhiUpdate = cast<Instruction>(Phi->getIncomingValueForBlock (Latch));
3266+ const auto &Desc = MonotonicPHI.second ;
3267+
3268+ auto UniformPhi = llvm::all_of (Phi->users (), [&](User *U) -> bool {
3269+ auto *I = cast<Instruction>(U);
3270+ if (I == Desc.getStepInst ())
3271+ return true ;
3272+ if (auto *PN = dyn_cast<PHINode>(I); PN && Desc.getChain ().contains (PN))
3273+ return true ;
3274+ return !TheLoop->contains (I) || Worklist.count (I) ||
3275+ IsVectorizedMemAccessUse (I, Phi);
3276+ });
3277+ if (!UniformPhi)
3278+ continue ;
3279+
3280+ auto UniformPhiUpdate =
3281+ llvm::all_of (PhiUpdate->users (), [&](User *U) -> bool {
3282+ auto *I = cast<Instruction>(U);
3283+ if (I == Phi)
3284+ return true ;
3285+ return !TheLoop->contains (I) || Worklist.count (I) ||
3286+ IsVectorizedMemAccessUse (I, Phi);
3287+ });
3288+ if (!UniformPhiUpdate)
3289+ continue ;
3290+
3291+ AddToWorklistIfAllowed (Phi);
3292+ AddToWorklistIfAllowed (PhiUpdate);
3293+ }
3294+
32593295 Uniforms[VF].insert_range (Worklist);
32603296}
32613297
@@ -4047,6 +4083,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
40474083 case VPDef::VPEVLBasedIVPHISC:
40484084 case VPDef::VPPredInstPHISC:
40494085 case VPDef::VPBranchOnMaskSC:
4086+ case VPDef::VPMonotonicPHISC:
40504087 continue ;
40514088 case VPDef::VPReductionSC:
40524089 case VPDef::VPActiveLaneMaskPHISC:
@@ -4560,6 +4597,10 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
45604597 if (Plan.hasEarlyExit ())
45614598 return 1 ;
45624599
4600+ // Monotonic vars don't support interleaving.
4601+ if (Legal->hasMonotonicPHIs ())
4602+ return 1 ;
4603+
45634604 const bool HasReductions =
45644605 any_of (Plan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis (),
45654606 IsaPred<VPReductionPHIRecipe>);
@@ -8074,11 +8115,19 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
80748115 return Recipe;
80758116
80768117 VPHeaderPHIRecipe *PhiRecipe = nullptr ;
8077- assert ((Legal->isReductionVariable (Phi) ||
8118+ assert ((Legal->isMonotonicPHI (Phi) || Legal-> isReductionVariable (Phi) ||
80788119 Legal->isFixedOrderRecurrence (Phi)) &&
8079- " can only widen reductions and fixed-order recurrences here" );
8120+ " can only widen monotonic phis, reductions and fixed-order "
8121+ " recurrences here" );
80808122 VPValue *StartV = Operands[0 ];
8081- if (Legal->isReductionVariable (Phi)) {
8123+ Value *IncomingVal =
8124+ Phi->getIncomingValueForBlock (OrigLoop->getLoopPreheader ());
8125+ if (Legal->isMonotonicPHI (Phi)) {
8126+ const MonotonicDescriptor &Desc =
8127+ Legal->getMonotonicPHIs ().find (Phi)->second ;
8128+ assert (Desc.getExpr ()->getStart () == PSE.getSCEV (IncomingVal));
8129+ PhiRecipe = new VPMonotonicPHIRecipe (Phi, Desc, StartV);
8130+ } else if (Legal->isReductionVariable (Phi)) {
80828131 const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor (Phi);
80838132 assert (RdxDesc.getRecurrenceStartValue () ==
80848133 Phi->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
@@ -8429,6 +8478,46 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84298478 // bring the VPlan to its final state.
84308479 // ---------------------------------------------------------------------------
84318480
8481+ // Adjust the recipes for any monotonic phis.
8482+ for (VPRecipeBase &R : HeaderVPBB->phis ()) {
8483+ auto *MonotonicPhi = dyn_cast<VPMonotonicPHIRecipe>(&R);
8484+ if (!MonotonicPhi)
8485+ continue ;
8486+
8487+ // Prohibit scalarization of monotonic phis.
8488+ if (!all_of (Range, [&](ElementCount VF) {
8489+ return CM.isUniformAfterVectorization (
8490+ MonotonicPhi->getUnderlyingInstr (), VF);
8491+ }))
8492+ return nullptr ;
8493+
8494+ // Obtain mask value for the predicate edge from the last VPBlendRecipe in
8495+ // chain.
8496+ VPValue *Chain = MonotonicPhi->getBackedgeValue ();
8497+ VPValue *Mask = nullptr ;
8498+ while (auto *BlendR = dyn_cast<VPBlendRecipe>(Chain))
8499+ for (unsigned I = 0 , E = BlendR->getNumIncomingValues (); I != E; ++I)
8500+ if (auto *IncomingVal = BlendR->getIncomingValue (I);
8501+ IncomingVal != MonotonicPhi) {
8502+ Chain = IncomingVal;
8503+ Mask = BlendR->getMask (I);
8504+ break ;
8505+ }
8506+ assert (Mask);
8507+
8508+ auto &Desc = MonotonicPhi->getDescriptor ();
8509+ auto &SE = *PSE.getSE ();
8510+ auto *Step = vputils::getOrCreateVPValueForSCEVExpr (
8511+ *Plan, Desc.getExpr ()->getStepRecurrence (SE));
8512+
8513+ auto *MonotonicI =
8514+ new VPInstruction (VPInstruction::ComputeMonotonicResult,
8515+ {MonotonicPhi, Mask, Step}, *Desc.getStepInst ());
8516+ auto *InsertBlock = MonotonicPhi->getBackedgeRecipe ().getParent ();
8517+ InsertBlock->insert (MonotonicI, InsertBlock->getFirstNonPhi ());
8518+ MonotonicPhi->getBackedgeValue ()->replaceAllUsesWith (MonotonicI);
8519+ }
8520+
84328521 // Adjust the recipes for any inloop reductions.
84338522 adjustRecipesForReductions (Plan, RecipeBuilder, Range.Start );
84348523
@@ -9891,6 +9980,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
98919980 IC = LVP.selectInterleaveCount (LVP.getPlanFor (VF.Width ), VF.Width , VF.Cost );
98929981
98939982 unsigned SelectedIC = std::max (IC, UserIC);
9983+
9984+ if (LVL.hasMonotonicPHIs () && SelectedIC > 1 ) {
9985+ reportVectorizationFailure (
9986+ " Interleaving of loop with monotonic vars" ,
9987+ " Interleaving of loops with monotonic vars is not supported" ,
9988+ " CantInterleaveWithMonotonicVars" , ORE, L);
9989+ return false ;
9990+ }
9991+
98949992 // Optimistically generate runtime checks if they are needed. Drop them if
98959993 // they turn out to not be profitable.
98969994 if (VF.Width .isVector () || SelectedIC > 1 ) {
0 commit comments