@@ -1240,9 +1240,10 @@ class LoopVectorizationCostModel {
12401240 getDivRemSpeculationCost (Instruction *I,
12411241 ElementCount VF) const ;
12421242
1243- // / Returns true if \p I is a memory instruction with consecutive memory
1244- // / access that can be widened.
1245- bool memoryInstructionCanBeWidened (Instruction *I, ElementCount VF);
1243+ // / Returns widening decision (CM_Widen or CM_Widen_Reverse) if \p I is a
1244+ // / memory instruction with consecutive access that can be widened, or
1245+ // / CM_Unknown otherwise.
1246+ InstWidening memoryInstructionCanBeWidened (Instruction *I, ElementCount VF);
12461247
12471248 // / Returns true if \p I is a memory instruction in an interleaved-group
12481249 // / of memory accesses that can be vectorized with wide vector loads/stores
@@ -1509,7 +1510,8 @@ class LoopVectorizationCostModel {
15091510
15101511 // / The cost computation for widening instruction \p I with consecutive
15111512 // / memory access.
1512- InstructionCost getConsecutiveMemOpCost (Instruction *I, ElementCount VF);
1513+ InstructionCost getConsecutiveMemOpCost (Instruction *I, ElementCount VF,
1514+ InstWidening Decision);
15131515
15141516 // / The cost calculation for Load/Store instruction \p I with uniform pointer -
15151517 // / Load: scalar load + broadcast.
@@ -2988,30 +2990,33 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
29882990 : TTI.isLegalMaskedStore (Ty, Alignment, AS);
29892991}
29902992
2991- bool LoopVectorizationCostModel::memoryInstructionCanBeWidened (
2992- Instruction *I, ElementCount VF) {
2993+ LoopVectorizationCostModel::InstWidening
2994+ LoopVectorizationCostModel::memoryInstructionCanBeWidened (Instruction *I,
2995+ ElementCount VF) {
29932996 // Get and ensure we have a valid memory instruction.
29942997 assert ((isa<LoadInst, StoreInst>(I)) && " Invalid memory instruction" );
29952998
29962999 auto *Ptr = getLoadStorePointerOperand (I);
29973000 auto *ScalarTy = getLoadStoreType (I);
29983001
29993002 // In order to be widened, the pointer should be consecutive, first of all.
3000- if (!Legal->isConsecutivePtr (ScalarTy, Ptr))
3001- return false ;
3003+ auto Stride = Legal->isConsecutivePtr (ScalarTy, Ptr);
3004+ if (!Stride)
3005+ return CM_Unknown;
3006+ assert ((Stride == 1 || Stride == -1 ) && " Expected consecutive stride." );
30023007
30033008 // If the instruction is a store located in a predicated block, it will be
30043009 // scalarized.
30053010 if (isScalarWithPredication (I, VF))
3006- return false ;
3011+ return CM_Unknown ;
30073012
30083013 // If the instruction's allocated size doesn't equal it's type size, it
30093014 // requires padding and will be scalarized.
30103015 auto &DL = I->getDataLayout ();
30113016 if (hasIrregularType (ScalarTy, DL))
3012- return false ;
3017+ return CM_Unknown ;
30133018
3014- return true ;
3019+ return Stride == 1 ? CM_Widen : CM_Widen_Reverse ;
30153020}
30163021
30173022void LoopVectorizationCostModel::collectLoopUniforms (ElementCount VF) {
@@ -5183,17 +5188,14 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
51835188 return Cost;
51845189}
51855190
5186- InstructionCost
5187- LoopVectorizationCostModel::getConsecutiveMemOpCost (Instruction *I,
5188- ElementCount VF) {
5191+ InstructionCost LoopVectorizationCostModel::getConsecutiveMemOpCost (
5192+ Instruction *I, ElementCount VF, InstWidening Decision) {
51895193 Type *ValTy = getLoadStoreType (I);
51905194 auto *VectorTy = cast<VectorType>(toVectorTy (ValTy, VF));
5191- Value *Ptr = getLoadStorePointerOperand (I);
51925195 unsigned AS = getLoadStoreAddressSpace (I);
5193- int ConsecutiveStride = Legal->isConsecutivePtr (ValTy, Ptr);
51945196
5195- assert ((ConsecutiveStride == 1 || ConsecutiveStride == - 1 ) &&
5196- " Stride should be 1 or -1 for consecutive memory access " );
5197+ assert ((Decision == CM_Widen || Decision == CM_Widen_Reverse ) &&
5198+ " Expected widen decision. " );
51975199 const Align Alignment = getLoadStoreAlignment (I);
51985200 InstructionCost Cost = 0 ;
51995201 if (Legal->isMaskRequired (I)) {
@@ -5205,8 +5207,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
52055207 CostKind, OpInfo, I);
52065208 }
52075209
5208- bool Reverse = ConsecutiveStride < 0 ;
5209- if (Reverse)
5210+ if (Decision == CM_Widen_Reverse)
52105211 Cost += TTI.getShuffleCost (TargetTransformInfo::SK_Reverse, VectorTy,
52115212 VectorTy, {}, CostKind, 0 );
52125213 return Cost;
@@ -5617,14 +5618,9 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
56175618 }
56185619
56195620 // We assume that widening is the best solution when possible.
5620- if (memoryInstructionCanBeWidened (&I, VF)) {
5621- InstructionCost Cost = getConsecutiveMemOpCost (&I, VF);
5622- int ConsecutiveStride = Legal->isConsecutivePtr (
5623- getLoadStoreType (&I), getLoadStorePointerOperand (&I));
5624- assert ((ConsecutiveStride == 1 || ConsecutiveStride == -1 ) &&
5625- " Expected consecutive stride." );
5626- InstWidening Decision =
5627- ConsecutiveStride == 1 ? CM_Widen : CM_Widen_Reverse;
5621+ if (auto Decision = memoryInstructionCanBeWidened (&I, VF);
5622+ Decision != CM_Unknown) {
5623+ InstructionCost Cost = getConsecutiveMemOpCost (&I, VF, Decision);
56285624 setWideningDecision (&I, VF, Decision, Cost);
56295625 continue ;
56305626 }
0 commit comments