@@ -4757,13 +4757,12 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
4757
4757
});
4758
4758
});
4759
4759
const unsigned AbsoluteDiff = std::abs(*Diff);
4760
- if (IsPossibleStrided &&
4761
- (IsAnyPointerUsedOutGraph ||
4762
- ((Sz > MinProfitableStridedLoads ||
4763
- (AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
4764
- has_single_bit(AbsoluteDiff))) &&
4765
- AbsoluteDiff > Sz) ||
4766
- *Diff == -(static_cast<int>(Sz) - 1))) {
4760
+ if (IsPossibleStrided && (IsAnyPointerUsedOutGraph ||
4761
+ ((Sz > MinProfitableStridedLoads ||
4762
+ (AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
4763
+ has_single_bit(AbsoluteDiff))) &&
4764
+ AbsoluteDiff > Sz) ||
4765
+ *Diff == -(static_cast<int>(Sz) - 1))) {
4767
4766
int Stride = *Diff / static_cast<int>(Sz - 1);
4768
4767
if (*Diff == Stride * static_cast<int>(Sz - 1)) {
4769
4768
Align Alignment =
@@ -4778,8 +4777,7 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
4778
4777
if (Ptr == PtrN)
4779
4778
Dist = *Diff;
4780
4779
else if (Ptr != Ptr0)
4781
- Dist =
4782
- *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
4780
+ Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
4783
4781
// If the strides are not the same or repeated, we can't
4784
4782
// vectorize.
4785
4783
if (((Dist / Stride) * Stride) != Dist ||
@@ -4822,14 +4820,14 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
4822
4820
if (VectorizedCnt == VL.size() / VF) {
4823
4821
// Compare masked gather cost and loads + insersubvector costs.
4824
4822
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
4825
- auto [ScalarGEPCost, VectorGEPCost] = getGEPCosts(
4826
- TTI, PointerOps, PointerOps.front(), Instruction::GetElementPtr ,
4827
- CostKind, ScalarTy, VecTy);
4823
+ auto [ScalarGEPCost, VectorGEPCost] =
4824
+ getGEPCosts( TTI, PointerOps, PointerOps.front(),
4825
+ Instruction::GetElementPtr, CostKind, ScalarTy, VecTy);
4828
4826
InstructionCost MaskedGatherCost =
4829
- TTI.getGatherScatterOpCost(
4830
- Instruction::Load, VecTy ,
4831
- cast<LoadInst>(VL0)->getPointerOperand() ,
4832
- /*VariableMask=*/false, CommonAlignment, CostKind) +
4827
+ TTI.getGatherScatterOpCost(Instruction::Load, VecTy,
4828
+ cast<LoadInst>(VL0)->getPointerOperand() ,
4829
+ /*VariableMask=*/false, CommonAlignment ,
4830
+ CostKind) +
4833
4831
VectorGEPCost - ScalarGEPCost;
4834
4832
InstructionCost VecLdCost = 0;
4835
4833
auto *SubVecTy = getWidenedType(ScalarTy, VF);
@@ -4853,23 +4851,23 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
4853
4851
getGEPCosts(TTI, ArrayRef(PointerOps).slice(I * VF, VF),
4854
4852
LI0->getPointerOperand(), Instruction::Load,
4855
4853
CostKind, ScalarTy, SubVecTy);
4856
- VecLdCost +=
4857
- TTI.getStridedMemoryOpCost(
4858
- Instruction::Load, SubVecTy, LI0->getPointerOperand() ,
4859
- /*VariableMask=*/false, CommonAlignment, CostKind) +
4860
- VectorGEPCost - ScalarGEPCost;
4854
+ VecLdCost += TTI.getStridedMemoryOpCost(Instruction::Load, SubVecTy,
4855
+ LI0->getPointerOperand(),
4856
+ /*VariableMask=*/false ,
4857
+ CommonAlignment, CostKind) +
4858
+ VectorGEPCost - ScalarGEPCost;
4861
4859
break;
4862
4860
}
4863
4861
case LoadsState::ScatterVectorize: {
4864
4862
auto [ScalarGEPCost, VectorGEPCost] = getGEPCosts(
4865
4863
TTI, ArrayRef(PointerOps).slice(I * VF, VF),
4866
- LI0->getPointerOperand(), Instruction::GetElementPtr,
4867
- CostKind, ScalarTy, SubVecTy);
4868
- VecLdCost +=
4869
- TTI.getGatherScatterOpCost(
4870
- Instruction::Load, SubVecTy, LI0->getPointerOperand() ,
4871
- /*VariableMask=*/false, CommonAlignment, CostKind) +
4872
- VectorGEPCost - ScalarGEPCost;
4864
+ LI0->getPointerOperand(), Instruction::GetElementPtr, CostKind,
4865
+ ScalarTy, SubVecTy);
4866
+ VecLdCost += TTI.getGatherScatterOpCost(Instruction::Load, SubVecTy,
4867
+ LI0->getPointerOperand(),
4868
+ /*VariableMask=*/false ,
4869
+ CommonAlignment, CostKind) +
4870
+ VectorGEPCost - ScalarGEPCost;
4873
4871
break;
4874
4872
}
4875
4873
case LoadsState::Gather:
@@ -4880,8 +4878,8 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
4880
4878
for (int Idx : seq<int>(0, VL.size()))
4881
4879
ShuffleMask[Idx] = Idx / VF == I ? VL.size() + Idx % VF : Idx;
4882
4880
VecLdCost +=
4883
- ::getShuffleCost(TTI, TTI::SK_InsertSubvector, VecTy,
4884
- ShuffleMask, CostKind, I * VF, SubVecTy);
4881
+ ::getShuffleCost(TTI, TTI::SK_InsertSubvector, VecTy, ShuffleMask,
4882
+ CostKind, I * VF, SubVecTy);
4885
4883
}
4886
4884
// If masked gather cost is higher - better to vectorize, so
4887
4885
// consider it as a gather node. It will be better estimated
@@ -4897,10 +4895,9 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
4897
4895
// increases the cost.
4898
4896
Loop *L = LI->getLoopFor(cast<LoadInst>(VL0)->getParent());
4899
4897
bool ProfitableGatherPointers =
4900
- L && Sz > 2 &&
4901
- static_cast<unsigned>(count_if(PointerOps, [L](Value *V) {
4902
- return L->isLoopInvariant(V);
4903
- })) <= Sz / 2;
4898
+ L && Sz > 2 && static_cast<unsigned>(count_if(PointerOps, [L](Value *V) {
4899
+ return L->isLoopInvariant(V);
4900
+ })) <= Sz / 2;
4904
4901
if (ProfitableGatherPointers || all_of(PointerOps, [IsSorted](Value *P) {
4905
4902
auto *GEP = dyn_cast<GetElementPtrInst>(P);
4906
4903
return (IsSorted && !GEP && doesNotNeedToBeScheduled(P)) ||
0 commit comments