Skip to content

Commit 51a0c30

Browse files
committed
[Transform][LoadStoreVectorizer] allow redundant in Chain
This can absorb redundant loads when forming vector load. Can be used to fix the situation created by VectorCombine. See: https://discourse.llvm.org/t/what-is-the-purpose-of-vectorizeloadinsert-in-the-vectorcombine-pass/88532
1 parent 8785276 commit 51a0c30

File tree

1 file changed

+31
-20
lines changed

1 file changed

+31
-20
lines changed

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ using EqClassKey =
157157
struct ChainElem {
158158
Instruction *Inst;
159159
APInt OffsetFromLeader;
160+
bool Redundant = false; // Set to true when load is redundant.
160161
ChainElem(Instruction *Inst, APInt OffsetFromLeader)
161162
: Inst(std::move(Inst)), OffsetFromLeader(std::move(OffsetFromLeader)) {}
162163
};
@@ -626,26 +627,33 @@ std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &C) {
626627
std::vector<Chain> Ret;
627628
Ret.push_back({C.front()});
628629

630+
APInt PrevReadEnd = C[0].OffsetFromLeader +
631+
DL.getTypeSizeInBits(getLoadStoreType(&*C[0].Inst)) / 8;
629632
for (auto It = std::next(C.begin()), End = C.end(); It != End; ++It) {
630633
// `prev` accesses offsets [PrevDistFromBase, PrevReadEnd).
631634
auto &CurChain = Ret.back();
632-
const ChainElem &Prev = CurChain.back();
633-
unsigned SzBits = DL.getTypeSizeInBits(getLoadStoreType(&*Prev.Inst));
635+
unsigned SzBits = DL.getTypeSizeInBits(getLoadStoreType(&*It->Inst));
634636
assert(SzBits % 8 == 0 && "Non-byte sizes should have been filtered out by "
635637
"collectEquivalenceClass");
636-
APInt PrevReadEnd = Prev.OffsetFromLeader + SzBits / 8;
637638

638639
// Add this instruction to the end of the current chain, or start a new one.
640+
APInt ReadEnd = It->OffsetFromLeader + SzBits / 8;
641+
bool IsRedundant = ReadEnd.sle(PrevReadEnd);
639642
bool AreContiguous = It->OffsetFromLeader == PrevReadEnd;
640-
LLVM_DEBUG(dbgs() << "LSV: Instructions are "
641-
<< (AreContiguous ? "" : "not ") << "contiguous: "
642-
<< *Prev.Inst << " (ends at offset " << PrevReadEnd
643-
<< ") -> " << *It->Inst << " (starts at offset "
643+
644+
LLVM_DEBUG(dbgs() << "LSV: Instruction is "
645+
<< (AreContiguous
646+
? "contiguous"
647+
: ((IsRedundant ? "redundant" : "chain-breaker")))
648+
<< *It->Inst << " (starts at offset "
644649
<< It->OffsetFromLeader << ")\n");
645-
if (AreContiguous)
650+
651+
It->Redundant = IsRedundant;
652+
if (AreContiguous || IsRedundant)
646653
CurChain.push_back(*It);
647654
else
648655
Ret.push_back({*It});
656+
PrevReadEnd = APIntOps::smax(PrevReadEnd, ReadEnd);
649657
}
650658

651659
// Filter out length-1 chains, these are uninteresting.
@@ -874,10 +882,12 @@ bool Vectorizer::vectorizeChain(Chain &C) {
874882
Type *VecElemTy = getChainElemTy(C);
875883
bool IsLoadChain = isa<LoadInst>(C[0].Inst);
876884
unsigned AS = getLoadStoreAddressSpace(C[0].Inst);
877-
unsigned ChainBytes = std::accumulate(
878-
C.begin(), C.end(), 0u, [&](unsigned Bytes, const ChainElem &E) {
879-
return Bytes + DL.getTypeStoreSize(getLoadStoreType(E.Inst));
880-
});
885+
unsigned ChainBytes = 0;
886+
for (auto &E : C) {
887+
if (E.Redundant)
888+
continue;
889+
ChainBytes += DL.getTypeStoreSize(getLoadStoreType(E.Inst));
890+
}
881891
assert(ChainBytes % DL.getTypeStoreSize(VecElemTy) == 0);
882892
// VecTy is a power of 2 and 1 byte at smallest, but VecElemTy may be smaller
883893
// than 1 byte (e.g. VecTy == <32 x i1>).
@@ -916,20 +926,19 @@ bool Vectorizer::vectorizeChain(Chain &C) {
916926
getLoadStorePointerOperand(C[0].Inst),
917927
Alignment);
918928

919-
unsigned VecIdx = 0;
920929
for (const ChainElem &E : C) {
921930
Instruction *I = E.Inst;
922931
Value *V;
923932
Type *T = getLoadStoreType(I);
933+
int EOffset = (E.OffsetFromLeader - C[0].OffsetFromLeader).getSExtValue();
934+
int VecIdx = 8 * EOffset / DL.getTypeSizeInBits(VecElemTy);
924935
if (auto *VT = dyn_cast<FixedVectorType>(T)) {
925936
auto Mask = llvm::to_vector<8>(
926937
llvm::seq<int>(VecIdx, VecIdx + VT->getNumElements()));
927938
V = Builder.CreateShuffleVector(VecInst, Mask, I->getName());
928-
VecIdx += VT->getNumElements();
929939
} else {
930940
V = Builder.CreateExtractElement(VecInst, Builder.getInt32(VecIdx),
931941
I->getName());
932-
++VecIdx;
933942
}
934943
if (V->getType() != I->getType())
935944
V = Builder.CreateBitOrPointerCast(V, I->getType());
@@ -964,22 +973,24 @@ bool Vectorizer::vectorizeChain(Chain &C) {
964973

965974
// Build the vector to store.
966975
Value *Vec = PoisonValue::get(VecTy);
967-
unsigned VecIdx = 0;
968-
auto InsertElem = [&](Value *V) {
976+
auto InsertElem = [&](Value *V, unsigned VecIdx) {
969977
if (V->getType() != VecElemTy)
970978
V = Builder.CreateBitOrPointerCast(V, VecElemTy);
971-
Vec = Builder.CreateInsertElement(Vec, V, Builder.getInt32(VecIdx++));
979+
Vec = Builder.CreateInsertElement(Vec, V, Builder.getInt32(VecIdx));
972980
};
973981
for (const ChainElem &E : C) {
974982
auto *I = cast<StoreInst>(E.Inst);
983+
int EOffset = (E.OffsetFromLeader - C[0].OffsetFromLeader).getSExtValue();
984+
int VecIdx = 8 * EOffset / DL.getTypeSizeInBits(VecElemTy);
975985
if (FixedVectorType *VT =
976986
dyn_cast<FixedVectorType>(getLoadStoreType(I))) {
977987
for (int J = 0, JE = VT->getNumElements(); J < JE; ++J) {
978988
InsertElem(Builder.CreateExtractElement(I->getValueOperand(),
979-
Builder.getInt32(J)));
989+
Builder.getInt32(J)),
990+
VecIdx++);
980991
}
981992
} else {
982-
InsertElem(I->getValueOperand());
993+
InsertElem(I->getValueOperand(), VecIdx);
983994
}
984995
}
985996

0 commit comments

Comments
 (0)