@@ -1027,6 +1027,7 @@ class LoopVectorizationCostModel {
10271027 CM_Widen_Reverse, // For consecutive accesses with stride -1.
10281028 CM_Interleave,
10291029 CM_GatherScatter,
1030+ CM_Compressed,
10301031 CM_Scalarize,
10311032 CM_VectorCall,
10321033 CM_IntrinsicCall
@@ -1240,9 +1241,9 @@ class LoopVectorizationCostModel {
12401241 getDivRemSpeculationCost (Instruction *I,
12411242 ElementCount VF) const ;
12421243
1243- // / Returns widening decision (CM_Widen or CM_Widen_Reverse ) if \p I is a
1244- // / memory instruction with consecutive access that can be widened, or
1245- // / CM_Unknown otherwise.
1244+ // / Returns widening decision (CM_Widen, CM_Widen_Reverse or CM_Compressed ) if
1245+ // / \p I is a memory instruction with consecutive access that can be widened,
1246+ // / or CM_Unknown otherwise.
12461247 InstWidening memoryInstructionCanBeWidened (Instruction *I, ElementCount VF);
12471248
12481249 // / Returns true if \p I is a memory instruction in an interleaved-group
@@ -2999,6 +3000,9 @@ LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I,
29993000 auto *Ptr = getLoadStorePointerOperand (I);
30003001 auto *ScalarTy = getLoadStoreType (I);
30013002
3003+ if (Legal->isCompressedPtr (ScalarTy, Ptr, I->getParent ()))
3004+ return CM_Compressed;
3005+
30023006 // In order to be widened, the pointer should be consecutive, first of all.
30033007 auto Stride = Legal->isConsecutivePtr (ScalarTy, Ptr);
30043008 if (!Stride)
@@ -3108,9 +3112,9 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
31083112 if (IsUniformMemOpUse (I))
31093113 return true ;
31103114
3111- return (WideningDecision == CM_Widen ||
3112- WideningDecision == CM_Widen_Reverse ||
3113- WideningDecision == CM_Interleave );
3115+ return (
3116+ WideningDecision == CM_Widen || WideningDecision == CM_Widen_Reverse ||
3117+ WideningDecision == CM_Interleave || WideningDecision == CM_Compressed );
31143118 };
31153119
31163120 // Returns true if Ptr is the pointer operand of a memory access instruction
@@ -3255,6 +3259,39 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
32553259 AddToWorklistIfAllowed (IndUpdate);
32563260 }
32573261
3262+ // Handle monotonic phis (similarly to induction vars).
3263+ for (const auto &MonotonicPHI : Legal->getMonotonicPHIs ()) {
3264+ auto *Phi = MonotonicPHI.first ;
3265+ auto *PhiUpdate = cast<Instruction>(Phi->getIncomingValueForBlock (Latch));
3266+ const auto &Desc = MonotonicPHI.second ;
3267+
3268+ auto UniformPhi = llvm::all_of (Phi->users (), [&](User *U) -> bool {
3269+ auto *I = cast<Instruction>(U);
3270+ if (I == Desc.getStepInst ())
3271+ return true ;
3272+ if (auto *PN = dyn_cast<PHINode>(I); PN && Desc.getChain ().contains (PN))
3273+ return true ;
3274+ return !TheLoop->contains (I) || Worklist.count (I) ||
3275+ IsVectorizedMemAccessUse (I, Phi);
3276+ });
3277+ if (!UniformPhi)
3278+ continue ;
3279+
3280+ auto UniformPhiUpdate =
3281+ llvm::all_of (PhiUpdate->users (), [&](User *U) -> bool {
3282+ auto *I = cast<Instruction>(U);
3283+ if (I == Phi)
3284+ return true ;
3285+ return !TheLoop->contains (I) || Worklist.count (I) ||
3286+ IsVectorizedMemAccessUse (I, Phi);
3287+ });
3288+ if (!UniformPhiUpdate)
3289+ continue ;
3290+
3291+ AddToWorklistIfAllowed (Phi);
3292+ AddToWorklistIfAllowed (PhiUpdate);
3293+ }
3294+
32583295 Uniforms[VF].insert_range (Worklist);
32593296}
32603297
@@ -4046,6 +4083,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
40464083 case VPDef::VPEVLBasedIVPHISC:
40474084 case VPDef::VPPredInstPHISC:
40484085 case VPDef::VPBranchOnMaskSC:
4086+ case VPDef::VPMonotonicPHISC:
40494087 continue ;
40504088 case VPDef::VPReductionSC:
40514089 case VPDef::VPActiveLaneMaskPHISC:
@@ -4559,6 +4597,10 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
45594597 if (Plan.hasEarlyExit ())
45604598 return 1 ;
45614599
4600+ // Monotonic vars don't support interleaving.
4601+ if (Legal->hasMonotonicPHIs ())
4602+ return 1 ;
4603+
45624604 const bool HasReductions =
45634605 any_of (Plan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis (),
45644606 IsaPred<VPReductionPHIRecipe>);
@@ -5191,12 +5233,17 @@ InstructionCost LoopVectorizationCostModel::getConsecutiveMemOpCost(
51915233 Instruction *I, ElementCount VF, InstWidening Decision) {
51925234 Type *ValTy = getLoadStoreType (I);
51935235 auto *VectorTy = cast<VectorType>(toVectorTy (ValTy, VF));
5236+ const Align Alignment = getLoadStoreAlignment (I);
51945237 unsigned AS = getLoadStoreAddressSpace (I);
51955238 enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
51965239
5240+ if (Decision == CM_Compressed)
5241+ return TTI.getExpandCompressMemoryOpCost (I->getOpcode (), VectorTy,
5242+ /* VariableMask*/ true , Alignment,
5243+ CostKind, I);
5244+
51975245 assert ((Decision == CM_Widen || Decision == CM_Widen_Reverse) &&
51985246 " Expected widen decision." );
5199- const Align Alignment = getLoadStoreAlignment (I);
52005247 InstructionCost Cost = 0 ;
52015248 if (Legal->isMaskRequired (I)) {
52025249 Cost += TTI.getMaskedMemoryOpCost (I->getOpcode (), VectorTy, Alignment, AS,
@@ -6299,6 +6346,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
62996346 switch (getWideningDecision (I, VF)) {
63006347 case LoopVectorizationCostModel::CM_GatherScatter:
63016348 return TTI::CastContextHint::GatherScatter;
6349+ case LoopVectorizationCostModel::CM_Compressed:
6350+ return TTI::CastContextHint::Compressed;
63026351 case LoopVectorizationCostModel::CM_Interleave:
63036352 return TTI::CastContextHint::Interleave;
63046353 case LoopVectorizationCostModel::CM_Scalarize:
@@ -7514,8 +7563,9 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
75147563 LoopVectorizationCostModel::InstWidening Decision =
75157564 CM.getWideningDecision (I, Range.Start );
75167565 bool Reverse = Decision == LoopVectorizationCostModel::CM_Widen_Reverse;
7566+ bool Compressed = Decision == LoopVectorizationCostModel::CM_Compressed;
75177567 bool Consecutive =
7518- Reverse || Decision == LoopVectorizationCostModel::CM_Widen;
7568+ Reverse || Compressed || Decision == LoopVectorizationCostModel::CM_Widen;
75197569
75207570 VPValue *Ptr = isa<LoadInst>(I) ? Operands[0 ] : Operands[1 ];
75217571 if (Consecutive) {
@@ -7545,11 +7595,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
75457595 }
75467596 if (LoadInst *Load = dyn_cast<LoadInst>(I))
75477597 return new VPWidenLoadRecipe (*Load, Ptr, Mask, Consecutive, Reverse,
7548- VPIRMetadata (*Load, LVer), I->getDebugLoc ());
7598+ Compressed, VPIRMetadata (*Load, LVer),
7599+ I->getDebugLoc ());
75497600
75507601 StoreInst *Store = cast<StoreInst>(I);
75517602 return new VPWidenStoreRecipe (*Store, Ptr, Operands[0 ], Mask, Consecutive,
7552- Reverse, VPIRMetadata (*Store, LVer),
7603+ Reverse, Compressed, VPIRMetadata (*Store, LVer),
75537604 I->getDebugLoc ());
75547605}
75557606
@@ -8064,11 +8115,19 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
80648115 return Recipe;
80658116
80668117 VPHeaderPHIRecipe *PhiRecipe = nullptr ;
8067- assert ((Legal->isReductionVariable (Phi) ||
8118+ assert ((Legal->isMonotonicPHI (Phi) || Legal-> isReductionVariable (Phi) ||
80688119 Legal->isFixedOrderRecurrence (Phi)) &&
8069- " can only widen reductions and fixed-order recurrences here" );
8120+ " can only widen monotonic phis, reductions and fixed-order "
8121+ " recurrences here" );
80708122 VPValue *StartV = Operands[0 ];
8071- if (Legal->isReductionVariable (Phi)) {
8123+ Value *IncomingVal =
8124+ Phi->getIncomingValueForBlock (OrigLoop->getLoopPreheader ());
8125+ if (Legal->isMonotonicPHI (Phi)) {
8126+ const MonotonicDescriptor &Desc =
8127+ Legal->getMonotonicPHIs ().find (Phi)->second ;
8128+ assert (Desc.getExpr ()->getStart () == PSE.getSCEV (IncomingVal));
8129+ PhiRecipe = new VPMonotonicPHIRecipe (Phi, Desc, StartV);
8130+ } else if (Legal->isReductionVariable (Phi)) {
80728131 const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor (Phi);
80738132 assert (RdxDesc.getRecurrenceStartValue () ==
80748133 Phi->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
@@ -8419,6 +8478,46 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84198478 // bring the VPlan to its final state.
84208479 // ---------------------------------------------------------------------------
84218480
8481+ // Adjust the recipes for any monotonic phis.
8482+ for (VPRecipeBase &R : HeaderVPBB->phis ()) {
8483+ auto *MonotonicPhi = dyn_cast<VPMonotonicPHIRecipe>(&R);
8484+ if (!MonotonicPhi)
8485+ continue ;
8486+
8487+ // Prohibit scalarization of monotonic phis.
8488+ if (!all_of (Range, [&](ElementCount VF) {
8489+ return CM.isUniformAfterVectorization (
8490+ MonotonicPhi->getUnderlyingInstr (), VF);
8491+ }))
8492+ return nullptr ;
8493+
8494+ // Obtain mask value for the predicate edge from the last VPBlendRecipe in
8495+ // chain.
8496+ VPValue *Chain = MonotonicPhi->getBackedgeValue ();
8497+ VPValue *Mask = nullptr ;
8498+ while (auto *BlendR = dyn_cast<VPBlendRecipe>(Chain))
8499+ for (unsigned I = 0 , E = BlendR->getNumIncomingValues (); I != E; ++I)
8500+ if (auto *IncomingVal = BlendR->getIncomingValue (I);
8501+ IncomingVal != MonotonicPhi) {
8502+ Chain = IncomingVal;
8503+ Mask = BlendR->getMask (I);
8504+ break ;
8505+ }
8506+ assert (Mask);
8507+
8508+ auto &Desc = MonotonicPhi->getDescriptor ();
8509+ auto &SE = *PSE.getSE ();
8510+ auto *Step = vputils::getOrCreateVPValueForSCEVExpr (
8511+ *Plan, Desc.getExpr ()->getStepRecurrence (SE));
8512+
8513+ auto *MonotonicI =
8514+ new VPInstruction (VPInstruction::ComputeMonotonicResult,
8515+ {MonotonicPhi, Mask, Step}, *Desc.getStepInst ());
8516+ auto *InsertBlock = MonotonicPhi->getBackedgeRecipe ().getParent ();
8517+ InsertBlock->insert (MonotonicI, InsertBlock->getFirstNonPhi ());
8518+ MonotonicPhi->getBackedgeValue ()->replaceAllUsesWith (MonotonicI);
8519+ }
8520+
84228521 // Adjust the recipes for any inloop reductions.
84238522 adjustRecipesForReductions (Plan, RecipeBuilder, Range.Start );
84248523
@@ -9881,6 +9980,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
98819980 IC = LVP.selectInterleaveCount (LVP.getPlanFor (VF.Width ), VF.Width , VF.Cost );
98829981
98839982 unsigned SelectedIC = std::max (IC, UserIC);
9983+
9984+ if (LVL.hasMonotonicPHIs () && SelectedIC > 1 ) {
9985+ reportVectorizationFailure (
9986+ " Interleaving of loop with monotonic vars" ,
9987+ " Interleaving of loops with monotonic vars is not supported" ,
9988+ " CantInterleaveWithMonotonicVars" , ORE, L);
9989+ return false ;
9990+ }
9991+
98849992 // Optimistically generate runtime checks if they are needed. Drop them if
98859993 // they turn out to not be profitable.
98869994 if (VF.Width .isVector () || SelectedIC > 1 ) {
0 commit comments