@@ -1240,9 +1240,10 @@ class LoopVectorizationCostModel {
12401240 getDivRemSpeculationCost (Instruction *I,
12411241 ElementCount VF) const ;
12421242
1243- // / Returns true if \p I is a memory instruction with consecutive memory
1244- // / access that can be widened.
1245- bool memoryInstructionCanBeWidened (Instruction *I, ElementCount VF);
1243+ // / Returns widening decision (CM_Widen or CM_Widen_Reverse) if \p I is a
1244+ // / memory instruction with consecutive access that can be widened, or
1245+ // / CM_Unknown otherwise.
1246+ InstWidening memoryInstructionCanBeWidened (Instruction *I, ElementCount VF);
12461247
12471248 // / Returns true if \p I is a memory instruction in an interleaved-group
12481249 // / of memory accesses that can be vectorized with wide vector loads/stores
@@ -1509,7 +1510,8 @@ class LoopVectorizationCostModel {
15091510
15101511 // / The cost computation for widening instruction \p I with consecutive
15111512 // / memory access.
1512- InstructionCost getConsecutiveMemOpCost (Instruction *I, ElementCount VF);
1513+ InstructionCost getConsecutiveMemOpCost (Instruction *I, ElementCount VF,
1514+ InstWidening Decision);
15131515
15141516 // / The cost calculation for Load/Store instruction \p I with uniform pointer -
15151517 // / Load: scalar load + broadcast.
@@ -2988,30 +2990,32 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
29882990 : TTI.isLegalMaskedStore (Ty, Alignment, AS);
29892991}
29902992
2991- bool LoopVectorizationCostModel::memoryInstructionCanBeWidened (
2992- Instruction *I, ElementCount VF) {
2993+ LoopVectorizationCostModel::InstWidening
2994+ LoopVectorizationCostModel::memoryInstructionCanBeWidened (Instruction *I,
2995+ ElementCount VF) {
29932996 // Get and ensure we have a valid memory instruction.
29942997 assert ((isa<LoadInst, StoreInst>(I)) && " Invalid memory instruction" );
29952998
29962999 auto *Ptr = getLoadStorePointerOperand (I);
29973000 auto *ScalarTy = getLoadStoreType (I);
29983001
29993002 // In order to be widened, the pointer should be consecutive, first of all.
3000- if (!Legal->isConsecutivePtr (ScalarTy, Ptr))
3001- return false ;
3003+ auto Stride = Legal->isConsecutivePtr (ScalarTy, Ptr);
3004+ if (!Stride)
3005+ return CM_Unknown;
30023006
30033007 // If the instruction is a store located in a predicated block, it will be
30043008 // scalarized.
30053009 if (isScalarWithPredication (I, VF))
3006- return false ;
3010+ return CM_Unknown ;
30073011
30083012 // If the instruction's allocated size doesn't equal it's type size, it
30093013 // requires padding and will be scalarized.
30103014 auto &DL = I->getDataLayout ();
30113015 if (hasIrregularType (ScalarTy, DL))
3012- return false ;
3016+ return CM_Unknown ;
30133017
3014- return true ;
3018+ return Stride == 1 ? CM_Widen : CM_Widen_Reverse ;
30153019}
30163020
30173021void LoopVectorizationCostModel::collectLoopUniforms (ElementCount VF) {
@@ -5183,17 +5187,15 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
51835187 return Cost;
51845188}
51855189
5186- InstructionCost
5187- LoopVectorizationCostModel::getConsecutiveMemOpCost (Instruction *I,
5188- ElementCount VF) {
5190+ InstructionCost LoopVectorizationCostModel::getConsecutiveMemOpCost (
5191+ Instruction *I, ElementCount VF, InstWidening Decision) {
51895192 Type *ValTy = getLoadStoreType (I);
51905193 auto *VectorTy = cast<VectorType>(toVectorTy (ValTy, VF));
5191- Value *Ptr = getLoadStorePointerOperand (I);
51925194 unsigned AS = getLoadStoreAddressSpace (I);
5193- int ConsecutiveStride = Legal-> isConsecutivePtr (ValTy, Ptr) ;
5195+ enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput ;
51945196
5195- assert ((ConsecutiveStride == 1 || ConsecutiveStride == - 1 ) &&
5196- " Stride should be 1 or -1 for consecutive memory access " );
5197+ assert ((Decision == CM_Widen || Decision == CM_Widen_Reverse ) &&
5198+ " Expected widen decision. " );
51975199 const Align Alignment = getLoadStoreAlignment (I);
51985200 InstructionCost Cost = 0 ;
51995201 if (Legal->isMaskRequired (I)) {
@@ -5205,8 +5207,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
52055207 CostKind, OpInfo, I);
52065208 }
52075209
5208- bool Reverse = ConsecutiveStride < 0 ;
5209- if (Reverse)
5210+ if (Decision == CM_Widen_Reverse)
52105211 Cost += TTI.getShuffleCost (TargetTransformInfo::SK_Reverse, VectorTy,
52115212 VectorTy, {}, CostKind, 0 );
52125213 return Cost;
@@ -5617,14 +5618,8 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
56175618 }
56185619
56195620 // We assume that widening is the best solution when possible.
5620- if (memoryInstructionCanBeWidened (&I, VF)) {
5621- InstructionCost Cost = getConsecutiveMemOpCost (&I, VF);
5622- int ConsecutiveStride = Legal->isConsecutivePtr (
5623- getLoadStoreType (&I), getLoadStorePointerOperand (&I));
5624- assert ((ConsecutiveStride == 1 || ConsecutiveStride == -1 ) &&
5625- " Expected consecutive stride." );
5626- InstWidening Decision =
5627- ConsecutiveStride == 1 ? CM_Widen : CM_Widen_Reverse;
5621+ if (auto Decision = memoryInstructionCanBeWidened (&I, VF)) {
5622+ InstructionCost Cost = getConsecutiveMemOpCost (&I, VF, Decision);
56285623 setWideningDecision (&I, VF, Decision, Cost);
56295624 continue ;
56305625 }
0 commit comments