@@ -627,8 +627,8 @@ std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &C) {
627627 Ret.push_back ({C.front ()});
628628
629629 unsigned ElemBytes = DL.getTypeStoreSize (getChainElemTy (C));
630- APInt PrevReadEnd = C[0 ].OffsetFromLeader +
631- DL.getTypeSizeInBits (getLoadStoreType (&*C[0 ].Inst )) / 8 ;
630+ APInt PrevReadEnd = C[0 ].OffsetFromLeader +
631+ DL.getTypeSizeInBits (getLoadStoreType (&*C[0 ].Inst )) / 8 ;
632632 for (auto It = std::next (C.begin ()), End = C.end (); It != End; ++It) {
633633 // `prev` accesses offsets [PrevDistFromBase, PrevReadEnd).
634634 auto &CurChain = Ret.back ();
@@ -882,8 +882,7 @@ bool Vectorizer::vectorizeChain(Chain &C) {
882882 Type *VecElemTy = getChainElemTy (C);
883883 bool IsLoadChain = isa<LoadInst>(C[0 ].Inst );
884884 unsigned AS = getLoadStoreAddressSpace (C[0 ].Inst );
885- int BytesAdded =
886- DL.getTypeSizeInBits (getLoadStoreType (&*C[0 ].Inst )) / 8 ;
885+ int BytesAdded = DL.getTypeSizeInBits (getLoadStoreType (&*C[0 ].Inst )) / 8 ;
887886 APInt PrevReadEnd = C[0 ].OffsetFromLeader + BytesAdded;
888887 int ChainBytes = BytesAdded;
889888 for (auto It = std::next (C.begin ()), End = C.end (); It != End; ++It) {
@@ -899,8 +898,8 @@ bool Vectorizer::vectorizeChain(Chain &C) {
899898 assert (ChainBytes % DL.getTypeStoreSize (VecElemTy) == 0 );
900899 // VecTy is a power of 2 and 1 byte at smallest, but VecElemTy may be smaller
901900 // than 1 byte (e.g. VecTy == <32 x i1>).
902- Type *VecTy = FixedVectorType::get (
903- VecElemTy, 8 * ChainBytes / DL. getTypeSizeInBits (VecElemTy) );
901+ unsigned NumElem = 8 * ChainBytes / DL. getTypeSizeInBits (VecElemTy);
902+ Type *VecTy = FixedVectorType::get (VecElemTy, NumElem );
904903
905904 Align Alignment = getLoadStoreAlignment (C[0 ].Inst );
906905 // If this is a load/store of an alloca, we might have upgraded the alloca's
@@ -927,7 +926,10 @@ bool Vectorizer::vectorizeChain(Chain &C) {
927926 llvm::min_element (C, [](const auto &A, const auto &B) {
928927 return A.Inst ->comesBefore (B.Inst );
929928 })->Inst );
930-
929+ // This can happen due to a chain of redundant loads.
930+ // In this case, just use the element-type, and avoid ExtractElement.
931+ if (NumElem == 1 )
932+ VecTy = VecElemTy;
931933 // Chain is in offset order, so C[0] is the instr with the lowest offset,
932934 // i.e. the root of the vector.
933935 VecInst = Builder.CreateAlignedLoad (VecTy,
@@ -944,9 +946,11 @@ bool Vectorizer::vectorizeChain(Chain &C) {
944946 auto Mask = llvm::to_vector<8 >(
945947 llvm::seq<int >(VecIdx, VecIdx + VT->getNumElements ()));
946948 V = Builder.CreateShuffleVector (VecInst, Mask, I->getName ());
947- } else {
949+ } else if (VecTy != VecElemTy) {
948950 V = Builder.CreateExtractElement (VecInst, Builder.getInt32 (VecIdx),
949951 I->getName ());
952+ } else {
953+ V = VecInst;
950954 }
951955 if (V->getType () != I->getType ())
952956 V = Builder.CreateBitOrPointerCast (V, I->getType ());
0 commit comments