@@ -157,11 +157,8 @@ using EqClassKey =
157157struct ChainElem {
158158 Instruction *Inst;
159159 APInt OffsetFromLeader;
160- unsigned IncrementalBytes = 0 ;
161- ChainElem (Instruction *Inst, APInt OffsetFromLeader,
162- unsigned IncrementalBytes)
163- : Inst(std::move(Inst)), OffsetFromLeader(std::move(OffsetFromLeader)),
164- IncrementalBytes (std::move(IncrementalBytes)) {}
160+ ChainElem (Instruction *Inst, APInt OffsetFromLeader)
161+ : Inst(std::move(Inst)), OffsetFromLeader(std::move(OffsetFromLeader)) {}
165162};
166163using Chain = SmallVector<ChainElem, 1 >;
167164
@@ -630,9 +627,8 @@ std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &C) {
630627 Ret.push_back ({C.front ()});
631628
632629 unsigned ElemBytes = DL.getTypeStoreSize (getChainElemTy (C));
633- assert (C[0 ].IncrementalBytes ==
634- DL.getTypeSizeInBits (getLoadStoreType (&*C[0 ].Inst )) / 8 );
635- APInt PrevReadEnd = C[0 ].OffsetFromLeader + C[0 ].IncrementalBytes ;
630+ APInt PrevReadEnd = C[0 ].OffsetFromLeader +
631+ DL.getTypeSizeInBits (getLoadStoreType (&*C[0 ].Inst )) / 8 ;
636632 for (auto It = std::next (C.begin ()), End = C.end (); It != End; ++It) {
637633 // `prev` accesses offsets [PrevDistFromBase, PrevReadEnd).
638634 auto &CurChain = Ret.back ();
@@ -642,23 +638,21 @@ std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &C) {
642638
643639 // Add this instruction to the end of the current chain, or start a new one.
644640 APInt ReadEnd = It->OffsetFromLeader + SzBits / 8 ;
641+ // Alllow redundancy: partial or full overlaping counts as contiguous.
645642 int ExtraBytes =
646643 PrevReadEnd.sle (ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue () : 0 ;
647- bool AreContiguous =
648- It-> OffsetFromLeader . sle (PrevReadEnd) && ExtraBytes % ElemBytes == 0 ;
644+ bool AreContiguous = It-> OffsetFromLeader . sle (PrevReadEnd) &&
645+ SzBits % ElemBytes == 0 && ExtraBytes % ElemBytes == 0 ;
649646
650647 LLVM_DEBUG (dbgs () << " LSV: Instruction is "
651648 << (AreContiguous ? " contiguous" : " chain-breaker" )
652649 << *It->Inst << " (starts at offset "
653650 << It->OffsetFromLeader << " )\n " );
654651
655- if (AreContiguous) {
656- It->IncrementalBytes = ExtraBytes;
652+ if (AreContiguous)
657653 CurChain.push_back (*It);
658- } else {
659- assert (It->IncrementalBytes == SzBits / 8 );
654+ else
660655 Ret.push_back ({*It});
661- }
662656 PrevReadEnd = APIntOps::smax (PrevReadEnd, ReadEnd);
663657 }
664658
@@ -888,9 +882,19 @@ bool Vectorizer::vectorizeChain(Chain &C) {
888882 Type *VecElemTy = getChainElemTy (C);
889883 bool IsLoadChain = isa<LoadInst>(C[0 ].Inst );
890884 unsigned AS = getLoadStoreAddressSpace (C[0 ].Inst );
891- unsigned ChainBytes = 0 ;
892- for (auto &E : C)
893- ChainBytes += E.IncrementalBytes ;
885+ int BytesAdded =
886+ DL.getTypeSizeInBits (getLoadStoreType (&*C[0 ].Inst )) / 8 ;
887+ APInt PrevReadEnd = C[0 ].OffsetFromLeader + BytesAdded;
888+ int ChainBytes = BytesAdded;
889+ for (auto It = std::next (C.begin ()), End = C.end (); It != End; ++It) {
890+ unsigned SzBits = DL.getTypeSizeInBits (getLoadStoreType (&*It->Inst ));
891+ APInt ReadEnd = It->OffsetFromLeader + SzBits / 8 ;
892+ // Update ChainBytes considering possible overlap.
893+ BytesAdded =
894+ PrevReadEnd.sle (ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue () : 0 ;
895+ ChainBytes += BytesAdded;
896+ PrevReadEnd = APIntOps::smax (PrevReadEnd, ReadEnd);
897+ }
894898
895899 assert (ChainBytes % DL.getTypeStoreSize (VecElemTy) == 0 );
896900 // VecTy is a power of 2 and 1 byte at smallest, but VecElemTy may be smaller
@@ -1583,8 +1587,7 @@ std::vector<Chain> Vectorizer::gatherChains(ArrayRef<Instruction *> Instrs) {
15831587 (ChainIter->first ->comesBefore (I) ? I : ChainIter->first ))) {
15841588 // `Offset` might not have the expected number of bits, if e.g. AS has a
15851589 // different number of bits than opaque pointers.
1586- ChainIter->second .emplace_back (
1587- I, Offset.value (), DL.getTypeSizeInBits (getLoadStoreType (I)) / 8 );
1590+ ChainIter->second .emplace_back (I, Offset.value ());
15881591 // Move ChainIter to the front of the MRU list.
15891592 MRU.remove (*ChainIter);
15901593 MRU.push_front (*ChainIter);
@@ -1596,8 +1599,7 @@ std::vector<Chain> Vectorizer::gatherChains(ArrayRef<Instruction *> Instrs) {
15961599 if (!MatchFound) {
15971600 APInt ZeroOffset (ASPtrBits, 0 );
15981601 InstrListElem *E = new (Allocator.Allocate ()) InstrListElem (I);
1599- E->second .emplace_back (I, ZeroOffset,
1600- DL.getTypeSizeInBits (getLoadStoreType (I)) / 8 );
1602+ E->second .emplace_back (I, ZeroOffset);
16011603 MRU.push_front (*E);
16021604 Chains.insert (E);
16031605 }
0 commit comments