@@ -157,6 +157,7 @@ using EqClassKey =
157157struct ChainElem {
158158 Instruction *Inst;
159159 APInt OffsetFromLeader;
160+ bool Redundant = false ; // Set to true when load is redundant.
160161 ChainElem (Instruction *Inst, APInt OffsetFromLeader)
161162 : Inst(std::move(Inst)), OffsetFromLeader(std::move(OffsetFromLeader)) {}
162163};
@@ -626,26 +627,33 @@ std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &C) {
626627 std::vector<Chain> Ret;
627628 Ret.push_back ({C.front ()});
628629
630+ APInt PrevReadEnd = C[0 ].OffsetFromLeader +
631+ DL.getTypeSizeInBits (getLoadStoreType (&*C[0 ].Inst )) / 8 ;
629632 for (auto It = std::next (C.begin ()), End = C.end (); It != End; ++It) {
630633 // `prev` accesses offsets [PrevDistFromBase, PrevReadEnd).
631634 auto &CurChain = Ret.back ();
632- const ChainElem &Prev = CurChain.back ();
633- unsigned SzBits = DL.getTypeSizeInBits (getLoadStoreType (&*Prev.Inst ));
635+ unsigned SzBits = DL.getTypeSizeInBits (getLoadStoreType (&*It->Inst ));
634636 assert (SzBits % 8 == 0 && " Non-byte sizes should have been filtered out by "
635637 " collectEquivalenceClass" );
636- APInt PrevReadEnd = Prev.OffsetFromLeader + SzBits / 8 ;
637638
638639 // Add this instruction to the end of the current chain, or start a new one.
640+ APInt ReadEnd = It->OffsetFromLeader + SzBits / 8 ;
641+ bool IsRedundant = ReadEnd.sle (PrevReadEnd);
639642 bool AreContiguous = It->OffsetFromLeader == PrevReadEnd;
640- LLVM_DEBUG (dbgs () << " LSV: Instructions are "
641- << (AreContiguous ? " " : " not " ) << " contiguous: "
642- << *Prev.Inst << " (ends at offset " << PrevReadEnd
643- << " ) -> " << *It->Inst << " (starts at offset "
643+
644+ LLVM_DEBUG (dbgs () << " LSV: Instruction is "
645+ << (AreContiguous
646+ ? " contiguous"
647+ : ((IsRedundant ? " redundant" : " chain-breaker" )))
648+ << *It->Inst << " (starts at offset "
644649 << It->OffsetFromLeader << " )\n " );
645- if (AreContiguous)
650+
651+ It->Redundant = IsRedundant;
652+ if (AreContiguous || IsRedundant)
646653 CurChain.push_back (*It);
647654 else
648655 Ret.push_back ({*It});
656+ PrevReadEnd = APIntOps::smax (PrevReadEnd, ReadEnd);
649657 }
650658
651659 // Filter out length-1 chains, these are uninteresting.
@@ -874,10 +882,12 @@ bool Vectorizer::vectorizeChain(Chain &C) {
874882 Type *VecElemTy = getChainElemTy (C);
875883 bool IsLoadChain = isa<LoadInst>(C[0 ].Inst );
876884 unsigned AS = getLoadStoreAddressSpace (C[0 ].Inst );
877- unsigned ChainBytes = std::accumulate (
878- C.begin (), C.end (), 0u , [&](unsigned Bytes, const ChainElem &E) {
879- return Bytes + DL.getTypeStoreSize (getLoadStoreType (E.Inst ));
880- });
885+ unsigned ChainBytes = 0 ;
886+ for (auto &E : C) {
887+ if (E.Redundant )
888+ continue ;
889+ ChainBytes += DL.getTypeStoreSize (getLoadStoreType (E.Inst ));
890+ }
881891 assert (ChainBytes % DL.getTypeStoreSize (VecElemTy) == 0 );
882892 // VecTy is a power of 2 and 1 byte at smallest, but VecElemTy may be smaller
883893 // than 1 byte (e.g. VecTy == <32 x i1>).
@@ -916,20 +926,19 @@ bool Vectorizer::vectorizeChain(Chain &C) {
916926 getLoadStorePointerOperand (C[0 ].Inst ),
917927 Alignment);
918928
919- unsigned VecIdx = 0 ;
920929 for (const ChainElem &E : C) {
921930 Instruction *I = E.Inst ;
922931 Value *V;
923932 Type *T = getLoadStoreType (I);
933+ int EOffset = (E.OffsetFromLeader - C[0 ].OffsetFromLeader ).getSExtValue ();
934+ int VecIdx = 8 * EOffset / DL.getTypeSizeInBits (VecElemTy);
924935 if (auto *VT = dyn_cast<FixedVectorType>(T)) {
925936 auto Mask = llvm::to_vector<8 >(
926937 llvm::seq<int >(VecIdx, VecIdx + VT->getNumElements ()));
927938 V = Builder.CreateShuffleVector (VecInst, Mask, I->getName ());
928- VecIdx += VT->getNumElements ();
929939 } else {
930940 V = Builder.CreateExtractElement (VecInst, Builder.getInt32 (VecIdx),
931941 I->getName ());
932- ++VecIdx;
933942 }
934943 if (V->getType () != I->getType ())
935944 V = Builder.CreateBitOrPointerCast (V, I->getType ());
@@ -964,22 +973,24 @@ bool Vectorizer::vectorizeChain(Chain &C) {
964973
965974 // Build the vector to store.
966975 Value *Vec = PoisonValue::get (VecTy);
967- unsigned VecIdx = 0 ;
968- auto InsertElem = [&](Value *V) {
976+ auto InsertElem = [&](Value *V, unsigned VecIdx) {
969977 if (V->getType () != VecElemTy)
970978 V = Builder.CreateBitOrPointerCast (V, VecElemTy);
971- Vec = Builder.CreateInsertElement (Vec, V, Builder.getInt32 (VecIdx++ ));
979+ Vec = Builder.CreateInsertElement (Vec, V, Builder.getInt32 (VecIdx));
972980 };
973981 for (const ChainElem &E : C) {
974982 auto *I = cast<StoreInst>(E.Inst );
983+ int EOffset = (E.OffsetFromLeader - C[0 ].OffsetFromLeader ).getSExtValue ();
984+ int VecIdx = 8 * EOffset / DL.getTypeSizeInBits (VecElemTy);
975985 if (FixedVectorType *VT =
976986 dyn_cast<FixedVectorType>(getLoadStoreType (I))) {
977987 for (int J = 0 , JE = VT->getNumElements (); J < JE; ++J) {
978988 InsertElem (Builder.CreateExtractElement (I->getValueOperand (),
979- Builder.getInt32 (J)));
989+ Builder.getInt32 (J)),
990+ VecIdx++);
980991 }
981992 } else {
982- InsertElem (I->getValueOperand ());
993+ InsertElem (I->getValueOperand (), VecIdx );
983994 }
984995 }
985996
0 commit comments