Skip to content

Commit ccd258f

Browse files
committed
[Transform][LoadStoreVectorizer] move redundancy calculation to VectoizeChain
1 parent 44fc1ee commit ccd258f

File tree

1 file changed

+24
-22
lines changed

1 file changed

+24
-22
lines changed

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,8 @@ using EqClassKey =
157157
struct ChainElem {
158158
Instruction *Inst;
159159
APInt OffsetFromLeader;
160-
unsigned IncrementalBytes = 0;
161-
ChainElem(Instruction *Inst, APInt OffsetFromLeader,
162-
unsigned IncrementalBytes)
163-
: Inst(std::move(Inst)), OffsetFromLeader(std::move(OffsetFromLeader)),
164-
IncrementalBytes(std::move(IncrementalBytes)) {}
160+
ChainElem(Instruction *Inst, APInt OffsetFromLeader)
161+
: Inst(std::move(Inst)), OffsetFromLeader(std::move(OffsetFromLeader)) {}
165162
};
166163
using Chain = SmallVector<ChainElem, 1>;
167164

@@ -630,9 +627,8 @@ std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &C) {
630627
Ret.push_back({C.front()});
631628

632629
unsigned ElemBytes = DL.getTypeStoreSize(getChainElemTy(C));
633-
assert(C[0].IncrementalBytes ==
634-
DL.getTypeSizeInBits(getLoadStoreType(&*C[0].Inst)) / 8);
635-
APInt PrevReadEnd = C[0].OffsetFromLeader + C[0].IncrementalBytes;
630+
APInt PrevReadEnd = C[0].OffsetFromLeader +
631+
DL.getTypeSizeInBits(getLoadStoreType(&*C[0].Inst)) / 8;
636632
for (auto It = std::next(C.begin()), End = C.end(); It != End; ++It) {
637633
// `prev` accesses offsets [PrevDistFromBase, PrevReadEnd).
638634
auto &CurChain = Ret.back();
@@ -642,23 +638,21 @@ std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &C) {
642638

643639
// Add this instruction to the end of the current chain, or start a new one.
644640
APInt ReadEnd = It->OffsetFromLeader + SzBits / 8;
641+
// Alllow redundancy: partial or full overlaping counts as contiguous.
645642
int ExtraBytes =
646643
PrevReadEnd.sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;
647-
bool AreContiguous =
648-
It->OffsetFromLeader.sle(PrevReadEnd) && ExtraBytes % ElemBytes == 0;
644+
bool AreContiguous = It->OffsetFromLeader.sle(PrevReadEnd) &&
645+
SzBits % ElemBytes == 0 && ExtraBytes % ElemBytes == 0;
649646

650647
LLVM_DEBUG(dbgs() << "LSV: Instruction is "
651648
<< (AreContiguous ? "contiguous" : "chain-breaker")
652649
<< *It->Inst << " (starts at offset "
653650
<< It->OffsetFromLeader << ")\n");
654651

655-
if (AreContiguous) {
656-
It->IncrementalBytes = ExtraBytes;
652+
if (AreContiguous)
657653
CurChain.push_back(*It);
658-
} else {
659-
assert(It->IncrementalBytes == SzBits / 8);
654+
else
660655
Ret.push_back({*It});
661-
}
662656
PrevReadEnd = APIntOps::smax(PrevReadEnd, ReadEnd);
663657
}
664658

@@ -888,9 +882,19 @@ bool Vectorizer::vectorizeChain(Chain &C) {
888882
Type *VecElemTy = getChainElemTy(C);
889883
bool IsLoadChain = isa<LoadInst>(C[0].Inst);
890884
unsigned AS = getLoadStoreAddressSpace(C[0].Inst);
891-
unsigned ChainBytes = 0;
892-
for (auto &E : C)
893-
ChainBytes += E.IncrementalBytes;
885+
int BytesAdded =
886+
DL.getTypeSizeInBits(getLoadStoreType(&*C[0].Inst)) / 8;
887+
APInt PrevReadEnd = C[0].OffsetFromLeader + BytesAdded;
888+
int ChainBytes = BytesAdded;
889+
for (auto It = std::next(C.begin()), End = C.end(); It != End; ++It) {
890+
unsigned SzBits = DL.getTypeSizeInBits(getLoadStoreType(&*It->Inst));
891+
APInt ReadEnd = It->OffsetFromLeader + SzBits / 8;
892+
// Update ChainBytes considering possible overlap.
893+
BytesAdded =
894+
PrevReadEnd.sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;
895+
ChainBytes += BytesAdded;
896+
PrevReadEnd = APIntOps::smax(PrevReadEnd, ReadEnd);
897+
}
894898

895899
assert(ChainBytes % DL.getTypeStoreSize(VecElemTy) == 0);
896900
// VecTy is a power of 2 and 1 byte at smallest, but VecElemTy may be smaller
@@ -1583,8 +1587,7 @@ std::vector<Chain> Vectorizer::gatherChains(ArrayRef<Instruction *> Instrs) {
15831587
(ChainIter->first->comesBefore(I) ? I : ChainIter->first))) {
15841588
// `Offset` might not have the expected number of bits, if e.g. AS has a
15851589
// different number of bits than opaque pointers.
1586-
ChainIter->second.emplace_back(
1587-
I, Offset.value(), DL.getTypeSizeInBits(getLoadStoreType(I)) / 8);
1590+
ChainIter->second.emplace_back(I, Offset.value());
15881591
// Move ChainIter to the front of the MRU list.
15891592
MRU.remove(*ChainIter);
15901593
MRU.push_front(*ChainIter);
@@ -1596,8 +1599,7 @@ std::vector<Chain> Vectorizer::gatherChains(ArrayRef<Instruction *> Instrs) {
15961599
if (!MatchFound) {
15971600
APInt ZeroOffset(ASPtrBits, 0);
15981601
InstrListElem *E = new (Allocator.Allocate()) InstrListElem(I);
1599-
E->second.emplace_back(I, ZeroOffset,
1600-
DL.getTypeSizeInBits(getLoadStoreType(I)) / 8);
1602+
E->second.emplace_back(I, ZeroOffset);
16011603
MRU.push_front(*E);
16021604
Chains.insert(E);
16031605
}

0 commit comments

Comments
 (0)