Skip to content

Commit 7aa4aab

Browse files
committed
Fix formatting
Created using spr 1.3.5
1 parent c197b63 commit 7aa4aab

File tree

1 file changed

+165
-166
lines changed

1 file changed

+165
-166
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 165 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -3259,13 +3259,14 @@ class BoUpSLP {
32593259
#endif
32603260

32613261
/// Create a new VectorizableTree entry.
3262-
TreeEntry *
3263-
newTreeEntry(ArrayRef<Value *> VL, std::optional<ScheduleData *> Bundle,
3264-
const InstructionsState &S, const EdgeInfo &UserTreeIdx,
3265-
ArrayRef<int> ReuseShuffleIndices = std::nullopt,
3266-
ArrayRef<unsigned> ReorderIndices = std::nullopt,
3267-
unsigned InterleaveFactor = 0,
3268-
const DenseSet<const TreeEntry *> &Nodes = {}) {
3262+
TreeEntry *newTreeEntry(ArrayRef<Value *> VL,
3263+
std::optional<ScheduleData *> Bundle,
3264+
const InstructionsState &S,
3265+
const EdgeInfo &UserTreeIdx,
3266+
ArrayRef<int> ReuseShuffleIndices = std::nullopt,
3267+
ArrayRef<unsigned> ReorderIndices = std::nullopt,
3268+
unsigned InterleaveFactor = 0,
3269+
const DenseSet<const TreeEntry *> &Nodes = {}) {
32693270
TreeEntry::EntryState EntryState =
32703271
Bundle ? ((InterleaveFactor > 0 && !Nodes.empty())
32713272
? TreeEntry::InterleavedVectorize
@@ -5532,11 +5533,12 @@ void BoUpSLP::reorderTopToBottom() {
55325533
// need to take into account their order when looking for the most used
55335534
// order.
55345535
if (TE->isAltShuffle()) {
5535-
VectorType *VecTy =
5536-
getWidenedType(TE->Scalars[0]->getType(), TE->Scalars.size());
5536+
VectorType *VecTy =
5537+
getWidenedType(TE->Scalars[0]->getType(), TE->Scalars.size());
55375538
unsigned Opcode0 = TE->getOpcode();
55385539
unsigned Opcode1 = TE->getAltOpcode();
5539-
SmallBitVector OpcodeMask(getAltInstrMask(TE->Scalars, Opcode0, Opcode1));
5540+
SmallBitVector OpcodeMask(
5541+
getAltInstrMask(TE->Scalars, Opcode0, Opcode1));
55405542
// If this pattern is supported by the target then we consider the
55415543
// order.
55425544
if (TTIRef.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) {
@@ -6739,173 +6741,170 @@ void BoUpSLP::tryToVectorizeGatheredLoads() {
67396741
}
67406742
return Results;
67416743
};
6742-
auto ProcessGatheredLoads =
6743-
[&](ArrayRef<SmallVector<std::pair<LoadInst *, int>>> GatheredLoads) {
6744-
SmallVector<LoadInst *> NonVectorized;
6745-
for (ArrayRef<std::pair<LoadInst *, int>> LoadsDists : GatheredLoads) {
6746-
SmallVector<std::pair<LoadInst *, int>> LocalLoadsDists(LoadsDists);
6747-
SmallVector<LoadInst *> OriginalLoads(LocalLoadsDists.size());
6748-
transform(
6749-
LoadsDists, OriginalLoads.begin(),
6750-
[](const std::pair<LoadInst *, int> &L) { return L.first; });
6751-
stable_sort(LocalLoadsDists, LoadSorter);
6752-
SmallVector<LoadInst *> Loads;
6753-
for (const std::pair<LoadInst *, int> &L : LocalLoadsDists) {
6754-
if (!getTreeEntry(L.first))
6755-
Loads.push_back(L.first);
6756-
}
6757-
if (Loads.empty())
6758-
continue;
6759-
BoUpSLP::ValueSet VectorizedLoads;
6760-
SmallVector<LoadInst *> SortedNonVectorized;
6761-
SmallVector<std::pair<ArrayRef<Value *>, LoadsState>> Results =
6762-
GetVectorizedRanges(Loads, VectorizedLoads, SortedNonVectorized);
6763-
if (!Results.empty() && !SortedNonVectorized.empty() &&
6764-
all_of(Results,
6765-
[](const std::pair<ArrayRef<Value *>, LoadsState> &P) {
6766-
return P.second == LoadsState::ScatterVectorize;
6767-
})) {
6768-
VectorizedLoads.clear();
6769-
SmallVector<LoadInst *> UnsortedNonVectorized;
6770-
SmallVector<std::pair<ArrayRef<Value *>, LoadsState>>
6771-
UnsortedResults = GetVectorizedRanges(
6772-
OriginalLoads, VectorizedLoads, UnsortedNonVectorized);
6773-
if (SortedNonVectorized.size() >= UnsortedNonVectorized.size()) {
6774-
SortedNonVectorized.swap(UnsortedNonVectorized);
6775-
Results.swap(UnsortedResults);
6776-
}
6777-
}
6778-
for (auto [Slice, _] : Results) {
6779-
LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize gathered loads ("
6780-
<< Slice.size() << ")\n");
6781-
if (any_of(Slice, [&](Value *V) { return getTreeEntry(V); })) {
6782-
for (Value *L : Slice)
6783-
if (!getTreeEntry(L))
6784-
SortedNonVectorized.push_back(cast<LoadInst>(L));
6785-
continue;
6786-
}
6744+
auto ProcessGatheredLoads = [&](ArrayRef<
6745+
SmallVector<std::pair<LoadInst *, int>>>
6746+
GatheredLoads) {
6747+
SmallVector<LoadInst *> NonVectorized;
6748+
for (ArrayRef<std::pair<LoadInst *, int>> LoadsDists : GatheredLoads) {
6749+
SmallVector<std::pair<LoadInst *, int>> LocalLoadsDists(LoadsDists);
6750+
SmallVector<LoadInst *> OriginalLoads(LocalLoadsDists.size());
6751+
transform(LoadsDists, OriginalLoads.begin(),
6752+
[](const std::pair<LoadInst *, int> &L) { return L.first; });
6753+
stable_sort(LocalLoadsDists, LoadSorter);
6754+
SmallVector<LoadInst *> Loads;
6755+
for (const std::pair<LoadInst *, int> &L : LocalLoadsDists) {
6756+
if (!getTreeEntry(L.first))
6757+
Loads.push_back(L.first);
6758+
}
6759+
if (Loads.empty())
6760+
continue;
6761+
BoUpSLP::ValueSet VectorizedLoads;
6762+
SmallVector<LoadInst *> SortedNonVectorized;
6763+
SmallVector<std::pair<ArrayRef<Value *>, LoadsState>> Results =
6764+
GetVectorizedRanges(Loads, VectorizedLoads, SortedNonVectorized);
6765+
if (!Results.empty() && !SortedNonVectorized.empty() &&
6766+
all_of(Results,
6767+
[](const std::pair<ArrayRef<Value *>, LoadsState> &P) {
6768+
return P.second == LoadsState::ScatterVectorize;
6769+
})) {
6770+
VectorizedLoads.clear();
6771+
SmallVector<LoadInst *> UnsortedNonVectorized;
6772+
SmallVector<std::pair<ArrayRef<Value *>, LoadsState>> UnsortedResults =
6773+
GetVectorizedRanges(OriginalLoads, VectorizedLoads,
6774+
UnsortedNonVectorized);
6775+
if (SortedNonVectorized.size() >= UnsortedNonVectorized.size()) {
6776+
SortedNonVectorized.swap(UnsortedNonVectorized);
6777+
Results.swap(UnsortedResults);
6778+
}
6779+
}
6780+
for (auto [Slice, _] : Results) {
6781+
LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize gathered loads ("
6782+
<< Slice.size() << ")\n");
6783+
if (any_of(Slice, [&](Value *V) { return getTreeEntry(V); })) {
6784+
for (Value *L : Slice)
6785+
if (!getTreeEntry(L))
6786+
SortedNonVectorized.push_back(cast<LoadInst>(L));
6787+
continue;
6788+
}
67876789

6788-
// Select maximum VF as a maximum of user gathered nodes and
6789-
// distance between scalar loads in these nodes.
6790-
unsigned MaxVF = Slice.size();
6791-
unsigned UserMaxVF = 0;
6792-
std::optional<unsigned> SegmentedLoadsDistance = 0;
6793-
std::optional<unsigned> CommonVF = 0;
6794-
unsigned Order = 0;
6795-
DenseMap<const TreeEntry *, unsigned> EntryToPosition;
6796-
DenseSet<const TreeEntry *> DeinterleavedNodes;
6797-
for (auto [Idx, V] : enumerate(Slice)) {
6798-
for (const TreeEntry *E : ValueToGatherNodes.at(V)) {
6799-
UserMaxVF = std::max<unsigned>(UserMaxVF, E->Scalars.size());
6800-
unsigned Pos =
6801-
EntryToPosition.try_emplace(E, Idx).first->second;
6802-
UserMaxVF = std::max<unsigned>(UserMaxVF, Idx - Pos + 1);
6803-
if (CommonVF) {
6804-
if (*CommonVF == 0) {
6805-
CommonVF = E->Scalars.size();
6806-
continue;
6807-
}
6808-
if (*CommonVF != E->Scalars.size())
6809-
CommonVF.reset();
6810-
}
6811-
if (Pos != Idx && SegmentedLoadsDistance) {
6812-
DeinterleavedNodes.insert(E);
6813-
if (*SegmentedLoadsDistance == 0) {
6814-
SegmentedLoadsDistance = Idx - Pos;
6815-
continue;
6816-
}
6817-
if ((Idx - Pos) % *SegmentedLoadsDistance != 0 ||
6818-
(Idx - Pos) / *SegmentedLoadsDistance < Order) {
6819-
SegmentedLoadsDistance.reset();
6820-
DeinterleavedNodes.clear();
6821-
}
6822-
Order = (Idx - Pos) / SegmentedLoadsDistance.value_or(1);
6823-
}
6790+
// Select maximum VF as a maximum of user gathered nodes and
6791+
// distance between scalar loads in these nodes.
6792+
unsigned MaxVF = Slice.size();
6793+
unsigned UserMaxVF = 0;
6794+
std::optional<unsigned> SegmentedLoadsDistance = 0;
6795+
std::optional<unsigned> CommonVF = 0;
6796+
unsigned Order = 0;
6797+
DenseMap<const TreeEntry *, unsigned> EntryToPosition;
6798+
DenseSet<const TreeEntry *> DeinterleavedNodes;
6799+
for (auto [Idx, V] : enumerate(Slice)) {
6800+
for (const TreeEntry *E : ValueToGatherNodes.at(V)) {
6801+
UserMaxVF = std::max<unsigned>(UserMaxVF, E->Scalars.size());
6802+
unsigned Pos = EntryToPosition.try_emplace(E, Idx).first->second;
6803+
UserMaxVF = std::max<unsigned>(UserMaxVF, Idx - Pos + 1);
6804+
if (CommonVF) {
6805+
if (*CommonVF == 0) {
6806+
CommonVF = E->Scalars.size();
6807+
continue;
68246808
}
6809+
if (*CommonVF != E->Scalars.size())
6810+
CommonVF.reset();
68256811
}
6826-
unsigned Limit = 2;
6827-
unsigned InterleaveFactor = 0;
6828-
// Check if the large load represents segmented load operation.
6829-
if (SegmentedLoadsDistance.value_or(0) > 1 &&
6830-
CommonVF.value_or(0) != 0) {
6831-
InterleaveFactor = PowerOf2Ceil(*SegmentedLoadsDistance);
6832-
unsigned VF = *CommonVF;
6833-
SmallVector<unsigned> Order;
6834-
SmallVector<Value *> PointerOps;
6835-
// Segmented load detected - vectorize at maximum vector factor.
6836-
if (TTI->isLegalInterleavedAccessType(
6837-
getWidenedType(Slice.front()->getType(), VF),
6838-
InterleaveFactor,
6839-
cast<LoadInst>(Slice.front())->getAlign(),
6840-
cast<LoadInst>(Slice.front())
6841-
->getPointerAddressSpace()) &&
6842-
canVectorizeLoads(Slice, Slice.front(), Order, PointerOps) ==
6843-
LoadsState::Vectorize) {
6844-
UserMaxVF = InterleaveFactor * VF;
6845-
Limit = UserMaxVF;
6846-
} else {
6847-
UserMaxVF = VF;
6812+
if (Pos != Idx && SegmentedLoadsDistance) {
6813+
DeinterleavedNodes.insert(E);
6814+
if (*SegmentedLoadsDistance == 0) {
6815+
SegmentedLoadsDistance = Idx - Pos;
6816+
continue;
6817+
}
6818+
if ((Idx - Pos) % *SegmentedLoadsDistance != 0 ||
6819+
(Idx - Pos) / *SegmentedLoadsDistance < Order) {
6820+
SegmentedLoadsDistance.reset();
68486821
DeinterleavedNodes.clear();
68496822
}
6850-
} else {
6851-
DeinterleavedNodes.clear();
6823+
Order = (Idx - Pos) / SegmentedLoadsDistance.value_or(1);
68526824
}
6853-
// Cannot represent the loads as consecutive vectorizable nodes -
6854-
// just exit.
6855-
unsigned ConsecutiveNodesSize = 0;
6856-
if (!LoadEntriesToVectorize.empty() &&
6857-
(SegmentedLoadsDistance.value_or(0) == 0 ||
6858-
CommonVF.value_or(UserMaxVF) == UserMaxVF) &&
6859-
any_of(zip(LoadEntriesToVectorize, LoadSetsToVectorize),
6860-
[&, Slice = Slice](const auto &P) {
6861-
const auto *It = find_if(Slice, [&](Value *V) {
6862-
return std::get<1>(P).contains(V);
6863-
});
6864-
if (It == Slice.end())
6865-
return false;
6866-
ArrayRef<Value *> VL = std::get<0>(P);
6867-
ConsecutiveNodesSize += VL.size();
6868-
unsigned Start = std::distance(Slice.begin(), It);
6869-
unsigned Sz = Slice.size() - Start;
6870-
return Sz < VL.size() ||
6871-
Slice.slice(std::distance(Slice.begin(), It),
6872-
VL.size()) != VL;
6825+
}
6826+
}
6827+
unsigned Limit = 2;
6828+
unsigned InterleaveFactor = 0;
6829+
// Check if the large load represents segmented load operation.
6830+
if (SegmentedLoadsDistance.value_or(0) > 1 &&
6831+
CommonVF.value_or(0) != 0) {
6832+
InterleaveFactor = PowerOf2Ceil(*SegmentedLoadsDistance);
6833+
unsigned VF = *CommonVF;
6834+
SmallVector<unsigned> Order;
6835+
SmallVector<Value *> PointerOps;
6836+
// Segmented load detected - vectorize at maximum vector factor.
6837+
if (TTI->isLegalInterleavedAccessType(
6838+
getWidenedType(Slice.front()->getType(), VF),
6839+
InterleaveFactor, cast<LoadInst>(Slice.front())->getAlign(),
6840+
cast<LoadInst>(Slice.front())->getPointerAddressSpace()) &&
6841+
canVectorizeLoads(Slice, Slice.front(), Order, PointerOps) ==
6842+
LoadsState::Vectorize) {
6843+
UserMaxVF = InterleaveFactor * VF;
6844+
Limit = UserMaxVF;
6845+
} else {
6846+
UserMaxVF = VF;
6847+
DeinterleavedNodes.clear();
6848+
}
6849+
} else {
6850+
DeinterleavedNodes.clear();
6851+
}
6852+
// Cannot represent the loads as consecutive vectorizable nodes -
6853+
// just exit.
6854+
unsigned ConsecutiveNodesSize = 0;
6855+
if (!LoadEntriesToVectorize.empty() &&
6856+
(SegmentedLoadsDistance.value_or(0) == 0 ||
6857+
CommonVF.value_or(UserMaxVF) == UserMaxVF) &&
6858+
any_of(zip(LoadEntriesToVectorize, LoadSetsToVectorize),
6859+
[&, Slice = Slice](const auto &P) {
6860+
const auto *It = find_if(Slice, [&](Value *V) {
6861+
return std::get<1>(P).contains(V);
6862+
});
6863+
if (It == Slice.end())
6864+
return false;
6865+
ArrayRef<Value *> VL = std::get<0>(P);
6866+
ConsecutiveNodesSize += VL.size();
6867+
unsigned Start = std::distance(Slice.begin(), It);
6868+
unsigned Sz = Slice.size() - Start;
6869+
return Sz < VL.size() ||
6870+
Slice.slice(std::distance(Slice.begin(), It),
6871+
VL.size()) != VL;
6872+
}))
6873+
continue;
6874+
if (Slice.size() != ConsecutiveNodesSize)
6875+
MaxVF = std::min<unsigned>(MaxVF, PowerOf2Ceil(UserMaxVF));
6876+
for (unsigned VF = MaxVF; VF >= Limit; VF /= 2) {
6877+
bool IsVectorized = true;
6878+
for (unsigned I = 0, E = Slice.size(); I < E; I += VF) {
6879+
ArrayRef<Value *> SubSlice = Slice.slice(I, VF);
6880+
if (getTreeEntry(SubSlice.front()))
6881+
continue;
6882+
// Check if the subslice is to be-vectorized entry, which is not
6883+
// equal to entry.
6884+
if (any_of(zip(LoadEntriesToVectorize, LoadSetsToVectorize),
6885+
[&](const auto &P) {
6886+
return !SubSlice.equals(std::get<0>(P)) &&
6887+
set_is_subset(SubSlice, std::get<1>(P));
68736888
}))
68746889
continue;
6875-
if (Slice.size() != ConsecutiveNodesSize)
6876-
MaxVF = std::min<unsigned>(MaxVF, PowerOf2Ceil(UserMaxVF));
6877-
for (unsigned VF = MaxVF; VF >= Limit; VF /= 2) {
6878-
bool IsVectorized = true;
6879-
for (unsigned I = 0, E = Slice.size(); I < E; I += VF) {
6880-
ArrayRef<Value *> SubSlice = Slice.slice(I, VF);
6881-
if (getTreeEntry(SubSlice.front()))
6882-
continue;
6883-
// Check if the subslice is to be-vectorized entry, which is not
6884-
// equal to entry.
6885-
if (any_of(zip(LoadEntriesToVectorize, LoadSetsToVectorize),
6886-
[&](const auto &P) {
6887-
return !SubSlice.equals(std::get<0>(P)) &&
6888-
set_is_subset(SubSlice, std::get<1>(P));
6889-
}))
6890-
continue;
6891-
unsigned Sz = VectorizableTree.size();
6892-
buildTree_rec(SubSlice, 0, EdgeInfo(), InterleaveFactor,
6893-
DeinterleavedNodes);
6894-
if (Sz + 1 == VectorizableTree.size() &&
6895-
VectorizableTree.back()->isGather()) {
6896-
VectorizableTree.pop_back();
6897-
IsVectorized = false;
6898-
continue;
6899-
}
6900-
}
6901-
if (IsVectorized)
6902-
break;
6890+
unsigned Sz = VectorizableTree.size();
6891+
buildTree_rec(SubSlice, 0, EdgeInfo(), InterleaveFactor,
6892+
DeinterleavedNodes);
6893+
if (Sz + 1 == VectorizableTree.size() &&
6894+
VectorizableTree.back()->isGather()) {
6895+
VectorizableTree.pop_back();
6896+
IsVectorized = false;
6897+
continue;
69036898
}
69046899
}
6905-
NonVectorized.append(SortedNonVectorized);
6900+
if (IsVectorized)
6901+
break;
69066902
}
6907-
return NonVectorized;
6908-
};
6903+
}
6904+
NonVectorized.append(SortedNonVectorized);
6905+
}
6906+
return NonVectorized;
6907+
};
69096908
SmallVector<LoadInst *> NonVectorized = ProcessGatheredLoads(GatheredLoads);
69106909
SmallVector<SmallVector<std::pair<LoadInst *, int>>> FinalGatheredLoads;
69116910
for (LoadInst *LI : NonVectorized) {

0 commit comments

Comments
 (0)