@@ -157,11 +157,8 @@ using EqClassKey =
157157struct ChainElem {
158158 Instruction *Inst;
159159 APInt OffsetFromLeader;
160- unsigned IncrementalBytes = 0 ;
161- ChainElem (Instruction *Inst, APInt OffsetFromLeader,
162- unsigned IncrementalBytes)
163- : Inst(std::move(Inst)), OffsetFromLeader(std::move(OffsetFromLeader)),
164- IncrementalBytes (std::move(IncrementalBytes)) {}
160+ ChainElem (Instruction *Inst, APInt OffsetFromLeader)
161+ : Inst(std::move(Inst)), OffsetFromLeader(std::move(OffsetFromLeader)) {}
165162};
166163using Chain = SmallVector<ChainElem, 1 >;
167164
@@ -629,10 +626,8 @@ std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &C) {
629626 std::vector<Chain> Ret;
630627 Ret.push_back ({C.front ()});
631628
632- unsigned ElemBytes = DL.getTypeStoreSize (getChainElemTy (C));
633- assert (C[0 ].IncrementalBytes ==
634- DL.getTypeSizeInBits (getLoadStoreType (&*C[0 ].Inst )) / 8 );
635- APInt PrevReadEnd = C[0 ].OffsetFromLeader + C[0 ].IncrementalBytes ;
629+ APInt PrevReadEnd = C[0 ].OffsetFromLeader +
630+ DL.getTypeSizeInBits (getLoadStoreType (&*C[0 ].Inst )) / 8 ;
636631 for (auto It = std::next (C.begin ()), End = C.end (); It != End; ++It) {
637632 // `prev` accesses offsets [PrevDistFromBase, PrevReadEnd).
638633 auto &CurChain = Ret.back ();
@@ -642,23 +637,18 @@ std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &C) {
642637
643638 // Add this instruction to the end of the current chain, or start a new one.
644639 APInt ReadEnd = It->OffsetFromLeader + SzBits / 8 ;
645- int ExtraBytes =
646- PrevReadEnd.sle (ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue () : 0 ;
647- bool AreContiguous =
648- It->OffsetFromLeader .sle (PrevReadEnd) && ExtraBytes % ElemBytes == 0 ;
640+ // Alllow redundancy: partial or full overlaping counts as contiguous.
641+ bool AreContiguous = It->OffsetFromLeader .sle (PrevReadEnd);
649642
650643 LLVM_DEBUG (dbgs () << " LSV: Instruction is "
651644 << (AreContiguous ? " contiguous" : " chain-breaker" )
652645 << *It->Inst << " (starts at offset "
653646 << It->OffsetFromLeader << " )\n " );
654647
655- if (AreContiguous) {
656- It->IncrementalBytes = ExtraBytes;
648+ if (AreContiguous)
657649 CurChain.push_back (*It);
658- } else {
659- assert (It->IncrementalBytes == SzBits / 8 );
650+ else
660651 Ret.push_back ({*It});
661- }
662652 PrevReadEnd = APIntOps::smax (PrevReadEnd, ReadEnd);
663653 }
664654
@@ -888,9 +878,19 @@ bool Vectorizer::vectorizeChain(Chain &C) {
888878 Type *VecElemTy = getChainElemTy (C);
889879 bool IsLoadChain = isa<LoadInst>(C[0 ].Inst );
890880 unsigned AS = getLoadStoreAddressSpace (C[0 ].Inst );
891- unsigned ChainBytes = 0 ;
892- for (auto &E : C)
893- ChainBytes += E.IncrementalBytes ;
881+ int BytesAdded =
882+ DL.getTypeSizeInBits (getLoadStoreType (&*C[0 ].Inst )) / 8 ;
883+ APInt PrevReadEnd = C[0 ].OffsetFromLeader + BytesAdded;
884+ int ChainBytes = BytesAdded;
885+ for (auto It = std::next (C.begin ()), End = C.end (); It != End; ++It) {
886+ unsigned SzBits = DL.getTypeSizeInBits (getLoadStoreType (&*It->Inst ));
887+ APInt ReadEnd = It->OffsetFromLeader + SzBits / 8 ;
888+ // Update ChainBytes considering possible overlap.
889+ BytesAdded =
890+ PrevReadEnd.sle (ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue () : 0 ;
891+ ChainBytes += BytesAdded;
892+ PrevReadEnd = APIntOps::smax (PrevReadEnd, ReadEnd);
893+ }
894894
895895 assert (ChainBytes % DL.getTypeStoreSize (VecElemTy) == 0 );
896896 // VecTy is a power of 2 and 1 byte at smallest, but VecElemTy may be smaller
@@ -1583,8 +1583,7 @@ std::vector<Chain> Vectorizer::gatherChains(ArrayRef<Instruction *> Instrs) {
15831583 (ChainIter->first ->comesBefore (I) ? I : ChainIter->first ))) {
15841584 // `Offset` might not have the expected number of bits, if e.g. AS has a
15851585 // different number of bits than opaque pointers.
1586- ChainIter->second .emplace_back (
1587- I, Offset.value (), DL.getTypeSizeInBits (getLoadStoreType (I)) / 8 );
1586+ ChainIter->second .emplace_back (I, Offset.value ());
15881587 // Move ChainIter to the front of the MRU list.
15891588 MRU.remove (*ChainIter);
15901589 MRU.push_front (*ChainIter);
@@ -1596,8 +1595,7 @@ std::vector<Chain> Vectorizer::gatherChains(ArrayRef<Instruction *> Instrs) {
15961595 if (!MatchFound) {
15971596 APInt ZeroOffset (ASPtrBits, 0 );
15981597 InstrListElem *E = new (Allocator.Allocate ()) InstrListElem (I);
1599- E->second .emplace_back (I, ZeroOffset,
1600- DL.getTypeSizeInBits (getLoadStoreType (I)) / 8 );
1598+ E->second .emplace_back (I, ZeroOffset);
16011599 MRU.push_front (*E);
16021600 Chains.insert (E);
16031601 }
0 commit comments