@@ -6525,48 +6525,38 @@ static void gatherPossiblyVectorizableLoads(
65256525 Type *ScalarTy = getValueType(VL.front());
65266526 if (!isValidElementType(ScalarTy))
65276527 return;
6528- const int NumScalars = VL.size();
6529- int NumParts = 1;
6530- if (NumScalars > 1) {
6531- auto *VecTy = getWidenedType(ScalarTy, NumScalars);
6532- NumParts = TTI.getNumberOfParts(VecTy);
6533- if (NumParts == 0 || NumParts >= NumScalars ||
6534- VecTy->getNumElements() % NumParts != 0 ||
6535- !hasFullVectorsOrPowerOf2(TTI, VecTy->getElementType(),
6536- VecTy->getNumElements() / NumParts))
6537- NumParts = 1;
6538- }
6539- unsigned VF = PowerOf2Ceil(NumScalars / NumParts);
65406528 SmallVector<SmallVector<std::pair<LoadInst *, int>>> ClusteredLoads;
6541- for (int I : seq<int>(NumParts)) {
6542- for (Value *V :
6543- VL.slice(I * VF, std::min<unsigned>(VF, VL.size() - I * VF))) {
6544- auto *LI = dyn_cast<LoadInst>(V);
6545- if (!LI)
6529+ SmallVector<DenseMap<int, LoadInst *>> ClusteredDistToLoad;
6530+ for (Value *V : VL) {
6531+ auto *LI = dyn_cast<LoadInst>(V);
6532+ if (!LI)
6533+ continue;
6534+ if (R.isDeleted(LI) || R.isVectorized(LI) || !LI->isSimple())
6535+ continue;
6536+ bool IsFound = false;
6537+ for (auto [Map, Data] : zip(ClusteredDistToLoad, ClusteredLoads)) {
6538+ if (LI->getParent() != Data.front().first->getParent() ||
6539+ LI->getType() != Data.front().first->getType())
65466540 continue;
6547- if (R.isDeleted(LI) || R.isVectorized(LI) || !LI->isSimple())
6541+ std::optional<int> Dist = getPointersDiff(
6542+ LI->getType(), LI->getPointerOperand(), Data.front().first->getType(),
6543+ Data.front().first->getPointerOperand(), DL, SE,
6544+ /*StrictCheck=*/true);
6545+ if (!Dist)
65486546 continue;
6549- bool IsFound = false;
6550- for (auto &Data : ClusteredLoads) {
6551- if (LI->getParent() != Data.front().first->getParent())
6552- continue;
6553- std::optional<int> Dist =
6554- getPointersDiff(LI->getType(), LI->getPointerOperand(),
6555- Data.front().first->getType(),
6556- Data.front().first->getPointerOperand(), DL, SE,
6557- /*StrictCheck=*/true);
6558- if (Dist && all_of(Data, [&](const std::pair<LoadInst *, int> &Pair) {
6559- IsFound |= Pair.first == LI;
6560- return IsFound || Pair.second != *Dist;
6561- })) {
6562- if (!IsFound)
6563- Data.emplace_back(LI, *Dist);
6564- IsFound = true;
6565- break;
6566- }
6547+ auto It = Map.find(*Dist);
6548+ if (It != Map.end() && It->second != LI)
6549+ continue;
6550+ if (It == Map.end()) {
6551+ Data.emplace_back(LI, *Dist);
6552+ Map.try_emplace(*Dist, LI);
65676553 }
6568- if (!IsFound)
6569- ClusteredLoads.emplace_back().emplace_back(LI, 0);
6554+ IsFound = true;
6555+ break;
6556+ }
6557+ if (!IsFound) {
6558+ ClusteredLoads.emplace_back().emplace_back(LI, 0);
6559+ ClusteredDistToLoad.emplace_back().try_emplace(0, LI);
65706560 }
65716561 }
65726562 auto FindMatchingLoads =
@@ -6591,38 +6581,37 @@ static void gatherPossiblyVectorizableLoads(
65916581 Data.front().first->getType(),
65926582 Data.front().first->getPointerOperand(), DL, SE,
65936583 /*StrictCheck=*/true);
6594- if (Dist) {
6595- // Found matching gathered loads - check if all loads are unique or
6596- // can be effectively vectorized.
6597- unsigned NumUniques = 0;
6598- for (auto [Cnt, Pair] : enumerate(Loads)) {
6599- bool Used = any_of(
6600- Data, [&, &P = Pair](const std::pair<LoadInst *, int> &PD) {
6601- return PD.first == P.first;
6602- });
6603- if (!Used &&
6604- none_of(Data,
6605- [&, &P = Pair](const std::pair<LoadInst *, int> &PD) {
6606- return *Dist + P.second == PD.second;
6607- })) {
6608- ++NumUniques;
6609- ToAdd.insert(Cnt);
6610- } else if (Used) {
6611- Repeated.insert(Cnt);
6612- }
6613- }
6614- if (NumUniques > 0 &&
6615- (Loads.size() == NumUniques ||
6616- (Loads.size() - NumUniques >= 2 &&
6617- Loads.size() - NumUniques >= Loads.size() / 2 &&
6618- (has_single_bit(Data.size() + NumUniques) ||
6619- bit_ceil(Data.size()) <
6620- bit_ceil(Data.size() + NumUniques))))) {
6621- Offset = *Dist;
6622- Start = Idx + 1;
6623- return std::next(GatheredLoads.begin(), Idx);
6584+ if (!Dist)
6585+ continue;
6586+ SmallSet<int, 4> DataDists;
6587+ SmallPtrSet<LoadInst *, 4> DataLoads;
6588+ for (std::pair<LoadInst *, int> P : Data) {
6589+ DataDists.insert(P.second);
6590+ DataLoads.insert(P.first);
6591+ }
6592+ // Found matching gathered loads - check if all loads are unique or
6593+ // can be effectively vectorized.
6594+ unsigned NumUniques = 0;
6595+ for (auto [Cnt, Pair] : enumerate(Loads)) {
6596+ bool Used = DataLoads.contains(Pair.first);
6597+ if (!Used && !DataDists.contains(*Dist + Pair.second)) {
6598+ ++NumUniques;
6599+ ToAdd.insert(Cnt);
6600+ } else if (Used) {
6601+ Repeated.insert(Cnt);
66246602 }
66256603 }
6604+ if (NumUniques > 0 &&
6605+ (Loads.size() == NumUniques ||
6606+ (Loads.size() - NumUniques >= 2 &&
6607+ Loads.size() - NumUniques >= Loads.size() / 2 &&
6608+ (has_single_bit(Data.size() + NumUniques) ||
6609+ bit_ceil(Data.size()) <
6610+ bit_ceil(Data.size() + NumUniques))))) {
6611+ Offset = *Dist;
6612+ Start = Idx + 1;
6613+ return std::next(GatheredLoads.begin(), Idx);
6614+ }
66266615 }
66276616 ToAdd.clear();
66286617 return GatheredLoads.end();
0 commit comments