@@ -1147,6 +1147,7 @@ class LoopVectorizationCostModel {
11471147 CM_Widen_Reverse, // For consecutive accesses with stride -1.
11481148 CM_Interleave,
11491149 CM_GatherScatter,
1150+ CM_Strided,
11501151 CM_Scalarize,
11511152 CM_VectorCall,
11521153 CM_IntrinsicCall
@@ -6160,6 +6161,17 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
61606161 " Expected consecutive stride." );
61616162 InstWidening Decision =
61626163 ConsecutiveStride == 1 ? CM_Widen : CM_Widen_Reverse;
6164+ // Consider using strided load/store for consecutive reverse accesses to
6165+ // achieve more efficient memory operations.
6166+ if (ConsecutiveStride == -1 ) {
6167+ const InstructionCost StridedLoadStoreCost =
6168+ isLegalStridedLoadStore (&I, VF) ? getStridedLoadStoreCost (&I, VF)
6169+ : InstructionCost::getInvalid ();
6170+ if (StridedLoadStoreCost < Cost) {
6171+ Decision = CM_Strided;
6172+ Cost = StridedLoadStoreCost;
6173+ }
6174+ }
61636175 setWideningDecision (&I, VF, Decision, Cost);
61646176 continue ;
61656177 }
@@ -6806,6 +6818,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
68066818 return TTI::CastContextHint::Normal;
68076819
68086820 switch (getWideningDecision (I, VF)) {
6821+ // TODO: New CastContextHint for strided accesses.
6822+ case LoopVectorizationCostModel::CM_Strided:
68096823 case LoopVectorizationCostModel::CM_GatherScatter:
68106824 return TTI::CastContextHint::GatherScatter;
68116825 case LoopVectorizationCostModel::CM_Interleave:
@@ -8356,6 +8370,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
83568370 bool Reverse = Decision == LoopVectorizationCostModel::CM_Widen_Reverse;
83578371 bool Consecutive =
83588372 Reverse || Decision == LoopVectorizationCostModel::CM_Widen;
8373+ bool Strided = Decision == LoopVectorizationCostModel::CM_Strided;
83598374
83608375 VPValue *Ptr = isa<LoadInst>(I) ? Operands[0 ] : Operands[1 ];
83618376 if (Consecutive) {
@@ -8382,12 +8397,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
83828397 Ptr = VectorPtr;
83838398 }
83848399 if (LoadInst *Load = dyn_cast<LoadInst>(I))
8385- return new VPWidenLoadRecipe (*Load, Ptr, Mask, Consecutive, Reverse, false ,
8386- I->getDebugLoc ());
8400+ return new VPWidenLoadRecipe (*Load, Ptr, Mask, Consecutive, Reverse,
8401+ Strided, I->getDebugLoc ());
83878402
83888403 StoreInst *Store = cast<StoreInst>(I);
83898404 return new VPWidenStoreRecipe (*Store, Ptr, Operands[0 ], Mask, Consecutive,
8390- Reverse, false , I->getDebugLoc ());
8405+ Reverse, Strided , I->getDebugLoc ());
83918406}
83928407
83938408// / Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
0 commit comments