Skip to content

Commit 9a16e4e

Browse files
committed
[Transform][LoadStoreVectorizer] fix merging result of one-element vector load
1 parent ccd258f commit 9a16e4e

File tree

1 file changed

+12
-8
lines changed

1 file changed

+12
-8
lines changed

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -627,8 +627,8 @@ std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &C) {
627627
Ret.push_back({C.front()});
628628

629629
unsigned ElemBytes = DL.getTypeStoreSize(getChainElemTy(C));
630-
APInt PrevReadEnd = C[0].OffsetFromLeader +
631-
DL.getTypeSizeInBits(getLoadStoreType(&*C[0].Inst)) / 8;
630+
APInt PrevReadEnd = C[0].OffsetFromLeader +
631+
DL.getTypeSizeInBits(getLoadStoreType(&*C[0].Inst)) / 8;
632632
for (auto It = std::next(C.begin()), End = C.end(); It != End; ++It) {
633633
// `prev` accesses offsets [PrevDistFromBase, PrevReadEnd).
634634
auto &CurChain = Ret.back();
@@ -882,8 +882,7 @@ bool Vectorizer::vectorizeChain(Chain &C) {
882882
Type *VecElemTy = getChainElemTy(C);
883883
bool IsLoadChain = isa<LoadInst>(C[0].Inst);
884884
unsigned AS = getLoadStoreAddressSpace(C[0].Inst);
885-
int BytesAdded =
886-
DL.getTypeSizeInBits(getLoadStoreType(&*C[0].Inst)) / 8;
885+
int BytesAdded = DL.getTypeSizeInBits(getLoadStoreType(&*C[0].Inst)) / 8;
887886
APInt PrevReadEnd = C[0].OffsetFromLeader + BytesAdded;
888887
int ChainBytes = BytesAdded;
889888
for (auto It = std::next(C.begin()), End = C.end(); It != End; ++It) {
@@ -899,8 +898,8 @@ bool Vectorizer::vectorizeChain(Chain &C) {
899898
assert(ChainBytes % DL.getTypeStoreSize(VecElemTy) == 0);
900899
// VecTy is a power of 2 and 1 byte at smallest, but VecElemTy may be smaller
901900
// than 1 byte (e.g. VecTy == <32 x i1>).
902-
Type *VecTy = FixedVectorType::get(
903-
VecElemTy, 8 * ChainBytes / DL.getTypeSizeInBits(VecElemTy));
901+
unsigned NumElem = 8 * ChainBytes / DL.getTypeSizeInBits(VecElemTy);
902+
Type *VecTy = FixedVectorType::get(VecElemTy, NumElem);
904903

905904
Align Alignment = getLoadStoreAlignment(C[0].Inst);
906905
// If this is a load/store of an alloca, we might have upgraded the alloca's
@@ -927,7 +926,10 @@ bool Vectorizer::vectorizeChain(Chain &C) {
927926
llvm::min_element(C, [](const auto &A, const auto &B) {
928927
return A.Inst->comesBefore(B.Inst);
929928
})->Inst);
930-
929+
// This can happen due to a chain of redundant loads.
930+
// In this case, just use the element-type, and avoid ExtractElement.
931+
if (NumElem == 1)
932+
VecTy = VecElemTy;
931933
// Chain is in offset order, so C[0] is the instr with the lowest offset,
932934
// i.e. the root of the vector.
933935
VecInst = Builder.CreateAlignedLoad(VecTy,
@@ -944,9 +946,11 @@ bool Vectorizer::vectorizeChain(Chain &C) {
944946
auto Mask = llvm::to_vector<8>(
945947
llvm::seq<int>(VecIdx, VecIdx + VT->getNumElements()));
946948
V = Builder.CreateShuffleVector(VecInst, Mask, I->getName());
947-
} else {
949+
} else if (VecTy != VecElemTy) {
948950
V = Builder.CreateExtractElement(VecInst, Builder.getInt32(VecIdx),
949951
I->getName());
952+
} else {
953+
V = VecInst;
950954
}
951955
if (V->getType() != I->getType())
952956
V = Builder.CreateBitOrPointerCast(V, I->getType());

0 commit comments

Comments
 (0)