@@ -6529,7 +6529,8 @@ static bool isReverseOrder(ArrayRef<unsigned> Order) {
65296529/// Otherwise, SCEV* of the stride value is returned.
65306530static const SCEV *calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
65316531 const DataLayout &DL, ScalarEvolution &SE,
6532- SmallVectorImpl<unsigned> &SortedIndices) {
6532+ SmallVectorImpl<unsigned> &SortedIndices,
6533+ SmallVectorImpl<int64_t> &Coeffs) {
65336534 SmallVector<const SCEV *> SCEVs;
65346535 const SCEV *PtrSCEVLowest = nullptr;
65356536 const SCEV *PtrSCEVHighest = nullptr;
@@ -6604,12 +6605,14 @@ static const SCEV *calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
66046605 const auto *SC = dyn_cast<SCEVConstant>(Coeff);
66056606 if (!SC || isa<SCEVCouldNotCompute>(SC))
66066607 return nullptr;
6608+ Coeffs.push_back((int64_t)SC->getAPInt().getLimitedValue());
66076609 if (!SE.getMinusSCEV(PtrSCEV, SE.getAddExpr(PtrSCEVLowest,
66086610 SE.getMulExpr(Stride, SC)))
66096611 ->isZero())
66106612 return nullptr;
66116613 Dist = SC->getAPInt().getZExtValue();
6612- }
6614+ } else
6615+ Coeffs.push_back(0);
66136616 // If the strides are not the same or repeated, we can't vectorize.
66146617 if ((Dist / Size) * Size != Dist || (Dist / Size) >= SCEVs.size())
66156618 return nullptr;
@@ -7105,18 +7108,134 @@ bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps,
71057108 Type *ScalarTy, Align CommonAlignment,
71067109 SmallVectorImpl<unsigned> &SortedIndices,
71077110 StridedPtrInfo &SPtrInfo) const {
7111+ // If each value in `PointerOps` is of the form `%x + Offset` where `Offset`
7112+ // is constant for each offset we record values from `PointerOps` and their
7113+ // indicies in `PointerOps`.
7114+ SmallDenseMap<int64_t, std::pair<SmallVector<Value *>, SmallVector<unsigned>>>
7115+ OffsetToPointerOpIdxMap;
7116+ for (auto [Idx, Ptr] : enumerate(PointerOps)) {
7117+ const SCEV *PtrSCEV = SE->getSCEV(Ptr);
7118+ if (!PtrSCEV)
7119+ return false;
7120+
7121+ const auto *Add = dyn_cast<SCEVAddExpr>(PtrSCEV);
7122+ int64_t Offset = 0;
7123+ if (Add) {
7124+ for (int I : seq<int>(Add->getNumOperands())) {
7125+ const auto *SC = dyn_cast<SCEVConstant>(Add->getOperand(I));
7126+ if (!SC)
7127+ continue;
7128+ Offset = SC->getAPInt().getSExtValue();
7129+ break;
7130+ }
7131+ }
7132+ OffsetToPointerOpIdxMap[Offset].first.push_back(Ptr);
7133+ OffsetToPointerOpIdxMap[Offset].second.push_back(Idx);
7134+ }
7135+ int NumOffsets = OffsetToPointerOpIdxMap.size();
7136+
71087137 const unsigned Sz = PointerOps.size();
7109- FixedVectorType *StridedLoadTy = getWidenedType(ScalarTy, Sz);
7110- if (Sz <= MinProfitableStridedLoads || !TTI->isTypeLegal(StridedLoadTy) ||
7111- !TTI->isLegalStridedLoadStore(StridedLoadTy, CommonAlignment))
7138+ unsigned VecSz = Sz;
7139+ Type *NewScalarTy = ScalarTy;
7140+ if (NumOffsets > 1) {
7141+ if (Sz % NumOffsets != 0)
7142+ return false;
7143+ VecSz = Sz / NumOffsets;
7144+ NewScalarTy = Type::getIntNTy(SE->getContext(),
7145+ DL->getTypeSizeInBits(ScalarTy).getFixedValue() *
7146+ NumOffsets);
7147+ }
7148+ FixedVectorType *StridedLoadTy = getWidenedType(NewScalarTy, VecSz);
7149+ if (!(Sz > MinProfitableStridedLoads && TTI->isTypeLegal(StridedLoadTy) &&
7150+ TTI->isLegalStridedLoadStore(StridedLoadTy, CommonAlignment)))
71127151 return false;
7113- if (const SCEV *Stride =
7114- calculateRtStride(PointerOps, ScalarTy, *DL, *SE, SortedIndices)) {
7115- SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size());
7116- SPtrInfo.StrideSCEV = Stride;
7117- return true;
7152+
7153+ // Check if the offsets are contiguous.
7154+ SmallVector<int64_t> SortedOffsetsV;
7155+ for (auto [K, _] : OffsetToPointerOpIdxMap)
7156+ SortedOffsetsV.push_back(K);
7157+ sort(SortedOffsetsV);
7158+ if (NumOffsets > 1) {
7159+ int64_t CommonDiff = SortedOffsetsV[1] - SortedOffsetsV[0];
7160+ if (CommonDiff != 1)
7161+ return false;
7162+ for (int I : seq<int>(1, SortedOffsetsV.size() - 1)) {
7163+ if (SortedOffsetsV[I + 1] - SortedOffsetsV[I] != CommonDiff)
7164+ return false;
7165+ }
71187166 }
7119- return false;
7167+
7168+ // For the set of pointers with the same offset check that the distance
7169+ // between adjacent pointers are all equal to the same value (stride). As we
7170+ // do that, also calculate SortedIndices. Since we should not modify
7171+ // `SortedIndices` unless we know that all the checks succeede, record the
7172+ // indicies into `SortedIndicesDraft`.
7173+ int64_t LowestOffset = SortedOffsetsV[0];
7174+ SmallVector<Value *> &PointerOps0 =
7175+ OffsetToPointerOpIdxMap[LowestOffset].first;
7176+ SmallVector<unsigned> &IndicesInAllPointerOps0 =
7177+ OffsetToPointerOpIdxMap[LowestOffset].second;
7178+
7179+ SmallVector<int64_t> Coeffs0;
7180+ SmallVector<unsigned> SortedIndicesForOffset0;
7181+ const SCEV *Stride0 = calculateRtStride(PointerOps0, ScalarTy, *DL, *SE,
7182+ SortedIndicesForOffset0, Coeffs0);
7183+ if (!Stride0)
7184+ return false;
7185+ unsigned NumCoeffs0 = Coeffs0.size();
7186+ if (NumCoeffs0 * NumOffsets != Sz)
7187+ return false;
7188+ sort(Coeffs0);
7189+
7190+ SmallVector<unsigned> SortedIndicesDraft;
7191+ SortedIndicesDraft.resize(Sz);
7192+ auto UpdateSortedIndices =
7193+ [&](SmallVectorImpl<unsigned> &SortedIndicesForOffset,
7194+ const SmallVectorImpl<unsigned> &IndicesInAllPointerOps,
7195+ const int64_t OffsetNum) {
7196+ if (SortedIndicesForOffset.empty()) {
7197+ SortedIndicesForOffset.resize(IndicesInAllPointerOps.size());
7198+ std::iota(SortedIndicesForOffset.begin(),
7199+ SortedIndicesForOffset.end(), 0);
7200+ }
7201+ for (const auto [Num, Idx] : enumerate(SortedIndicesForOffset)) {
7202+ SortedIndicesDraft[Num * NumOffsets + OffsetNum] =
7203+ IndicesInAllPointerOps[Idx];
7204+ }
7205+ };
7206+
7207+ UpdateSortedIndices(SortedIndicesForOffset0, IndicesInAllPointerOps0, 0);
7208+
7209+ SmallVector<int64_t> Coeffs;
7210+ SmallVector<unsigned> SortedIndicesForOffset;
7211+ for (int I : seq<int>(1, NumOffsets)) {
7212+ Coeffs.clear();
7213+ SortedIndicesForOffset.clear();
7214+
7215+ int64_t Offset = SortedOffsetsV[I];
7216+ SmallVector<Value *> &PointerOpsForOffset =
7217+ OffsetToPointerOpIdxMap[Offset].first;
7218+ SmallVector<unsigned> &IndicesInAllPointerOps =
7219+ OffsetToPointerOpIdxMap[Offset].second;
7220+ const SCEV *StrideWithinGroup = calculateRtStride(
7221+ PointerOpsForOffset, ScalarTy, *DL, *SE, SortedIndicesForOffset, Coeffs);
7222+
7223+ if (!StrideWithinGroup || StrideWithinGroup != Stride0)
7224+ return false;
7225+ if (Coeffs.size() != NumCoeffs0)
7226+ return false;
7227+ sort(Coeffs);
7228+ if (Coeffs != Coeffs0)
7229+ return false;
7230+
7231+ UpdateSortedIndices(SortedIndicesForOffset, IndicesInAllPointerOps, I);
7232+ }
7233+
7234+ SortedIndices.clear();
7235+ SortedIndices = SortedIndicesDraft;
7236+ SPtrInfo.StrideSCEV = Stride0;
7237+ SPtrInfo.Ty = StridedLoadTy;
7238+ return true;
71207239}
71217240
71227241BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
0 commit comments