Skip to content

Commit 7692d10

Browse files
committed
[SLP][NFC]Remove dead code + use nlogn lookups instead of n^2
1 parent d0d0c4d commit 7692d10

File tree

1 file changed

+57
-68
lines changed

1 file changed

+57
-68
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 57 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -6525,48 +6525,38 @@ static void gatherPossiblyVectorizableLoads(
65256525
Type *ScalarTy = getValueType(VL.front());
65266526
if (!isValidElementType(ScalarTy))
65276527
return;
6528-
const int NumScalars = VL.size();
6529-
int NumParts = 1;
6530-
if (NumScalars > 1) {
6531-
auto *VecTy = getWidenedType(ScalarTy, NumScalars);
6532-
NumParts = TTI.getNumberOfParts(VecTy);
6533-
if (NumParts == 0 || NumParts >= NumScalars ||
6534-
VecTy->getNumElements() % NumParts != 0 ||
6535-
!hasFullVectorsOrPowerOf2(TTI, VecTy->getElementType(),
6536-
VecTy->getNumElements() / NumParts))
6537-
NumParts = 1;
6538-
}
6539-
unsigned VF = PowerOf2Ceil(NumScalars / NumParts);
65406528
SmallVector<SmallVector<std::pair<LoadInst *, int>>> ClusteredLoads;
6541-
for (int I : seq<int>(NumParts)) {
6542-
for (Value *V :
6543-
VL.slice(I * VF, std::min<unsigned>(VF, VL.size() - I * VF))) {
6544-
auto *LI = dyn_cast<LoadInst>(V);
6545-
if (!LI)
6529+
SmallVector<DenseMap<int, LoadInst *>> ClusteredDistToLoad;
6530+
for (Value *V : VL) {
6531+
auto *LI = dyn_cast<LoadInst>(V);
6532+
if (!LI)
6533+
continue;
6534+
if (R.isDeleted(LI) || R.isVectorized(LI) || !LI->isSimple())
6535+
continue;
6536+
bool IsFound = false;
6537+
for (auto [Map, Data] : zip(ClusteredDistToLoad, ClusteredLoads)) {
6538+
if (LI->getParent() != Data.front().first->getParent() ||
6539+
LI->getType() != Data.front().first->getType())
65466540
continue;
6547-
if (R.isDeleted(LI) || R.isVectorized(LI) || !LI->isSimple())
6541+
std::optional<int> Dist = getPointersDiff(
6542+
LI->getType(), LI->getPointerOperand(), Data.front().first->getType(),
6543+
Data.front().first->getPointerOperand(), DL, SE,
6544+
/*StrictCheck=*/true);
6545+
if (!Dist)
65486546
continue;
6549-
bool IsFound = false;
6550-
for (auto &Data : ClusteredLoads) {
6551-
if (LI->getParent() != Data.front().first->getParent())
6552-
continue;
6553-
std::optional<int> Dist =
6554-
getPointersDiff(LI->getType(), LI->getPointerOperand(),
6555-
Data.front().first->getType(),
6556-
Data.front().first->getPointerOperand(), DL, SE,
6557-
/*StrictCheck=*/true);
6558-
if (Dist && all_of(Data, [&](const std::pair<LoadInst *, int> &Pair) {
6559-
IsFound |= Pair.first == LI;
6560-
return IsFound || Pair.second != *Dist;
6561-
})) {
6562-
if (!IsFound)
6563-
Data.emplace_back(LI, *Dist);
6564-
IsFound = true;
6565-
break;
6566-
}
6547+
auto It = Map.find(*Dist);
6548+
if (It != Map.end() && It->second != LI)
6549+
continue;
6550+
if (It == Map.end()) {
6551+
Data.emplace_back(LI, *Dist);
6552+
Map.try_emplace(*Dist, LI);
65676553
}
6568-
if (!IsFound)
6569-
ClusteredLoads.emplace_back().emplace_back(LI, 0);
6554+
IsFound = true;
6555+
break;
6556+
}
6557+
if (!IsFound) {
6558+
ClusteredLoads.emplace_back().emplace_back(LI, 0);
6559+
ClusteredDistToLoad.emplace_back().try_emplace(0, LI);
65706560
}
65716561
}
65726562
auto FindMatchingLoads =
@@ -6591,38 +6581,37 @@ static void gatherPossiblyVectorizableLoads(
65916581
Data.front().first->getType(),
65926582
Data.front().first->getPointerOperand(), DL, SE,
65936583
/*StrictCheck=*/true);
6594-
if (Dist) {
6595-
// Found matching gathered loads - check if all loads are unique or
6596-
// can be effectively vectorized.
6597-
unsigned NumUniques = 0;
6598-
for (auto [Cnt, Pair] : enumerate(Loads)) {
6599-
bool Used = any_of(
6600-
Data, [&, &P = Pair](const std::pair<LoadInst *, int> &PD) {
6601-
return PD.first == P.first;
6602-
});
6603-
if (!Used &&
6604-
none_of(Data,
6605-
[&, &P = Pair](const std::pair<LoadInst *, int> &PD) {
6606-
return *Dist + P.second == PD.second;
6607-
})) {
6608-
++NumUniques;
6609-
ToAdd.insert(Cnt);
6610-
} else if (Used) {
6611-
Repeated.insert(Cnt);
6612-
}
6613-
}
6614-
if (NumUniques > 0 &&
6615-
(Loads.size() == NumUniques ||
6616-
(Loads.size() - NumUniques >= 2 &&
6617-
Loads.size() - NumUniques >= Loads.size() / 2 &&
6618-
(has_single_bit(Data.size() + NumUniques) ||
6619-
bit_ceil(Data.size()) <
6620-
bit_ceil(Data.size() + NumUniques))))) {
6621-
Offset = *Dist;
6622-
Start = Idx + 1;
6623-
return std::next(GatheredLoads.begin(), Idx);
6584+
if (!Dist)
6585+
continue;
6586+
SmallSet<int, 4> DataDists;
6587+
SmallPtrSet<LoadInst *, 4> DataLoads;
6588+
for (std::pair<LoadInst *, int> P : Data) {
6589+
DataDists.insert(P.second);
6590+
DataLoads.insert(P.first);
6591+
}
6592+
// Found matching gathered loads - check if all loads are unique or
6593+
// can be effectively vectorized.
6594+
unsigned NumUniques = 0;
6595+
for (auto [Cnt, Pair] : enumerate(Loads)) {
6596+
bool Used = DataLoads.contains(Pair.first);
6597+
if (!Used && !DataDists.contains(*Dist + Pair.second)) {
6598+
++NumUniques;
6599+
ToAdd.insert(Cnt);
6600+
} else if (Used) {
6601+
Repeated.insert(Cnt);
66246602
}
66256603
}
6604+
if (NumUniques > 0 &&
6605+
(Loads.size() == NumUniques ||
6606+
(Loads.size() - NumUniques >= 2 &&
6607+
Loads.size() - NumUniques >= Loads.size() / 2 &&
6608+
(has_single_bit(Data.size() + NumUniques) ||
6609+
bit_ceil(Data.size()) <
6610+
bit_ceil(Data.size() + NumUniques))))) {
6611+
Offset = *Dist;
6612+
Start = Idx + 1;
6613+
return std::next(GatheredLoads.begin(), Idx);
6614+
}
66266615
}
66276616
ToAdd.clear();
66286617
return GatheredLoads.end();

0 commit comments

Comments
 (0)