@@ -157,9 +157,11 @@ using EqClassKey =
157157struct ChainElem {
158158 Instruction *Inst;
159159 APInt OffsetFromLeader;
160- bool Redundant = false ; // Set to true when load is redundant.
161- ChainElem (Instruction *Inst, APInt OffsetFromLeader)
162- : Inst(std::move(Inst)), OffsetFromLeader(std::move(OffsetFromLeader)) {}
160+ unsigned IncrementalBytes = 0 ;
161+ ChainElem (Instruction *Inst, APInt OffsetFromLeader,
162+ unsigned IncrementalBytes)
163+ : Inst(std::move(Inst)), OffsetFromLeader(std::move(OffsetFromLeader)),
164+ IncrementalBytes (std::move(IncrementalBytes)) {}
163165};
164166using Chain = SmallVector<ChainElem, 1 >;
165167
@@ -627,8 +629,10 @@ std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &C) {
627629 std::vector<Chain> Ret;
628630 Ret.push_back ({C.front ()});
629631
630- APInt PrevReadEnd = C[0 ].OffsetFromLeader +
631- DL.getTypeSizeInBits (getLoadStoreType (&*C[0 ].Inst )) / 8 ;
632+ unsigned ElemBytes = DL.getTypeStoreSize (getChainElemTy (C));
633+ assert (C[0 ].IncrementalBytes ==
634+ DL.getTypeSizeInBits (getLoadStoreType (&*C[0 ].Inst )) / 8 );
635+ APInt PrevReadEnd = C[0 ].OffsetFromLeader + C[0 ].IncrementalBytes ;
632636 for (auto It = std::next (C.begin ()), End = C.end (); It != End; ++It) {
633637 // `prev` accesses offsets [PrevDistFromBase, PrevReadEnd).
634638 auto &CurChain = Ret.back ();
@@ -638,21 +642,23 @@ std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &C) {
638642
639643 // Add this instruction to the end of the current chain, or start a new one.
640644 APInt ReadEnd = It->OffsetFromLeader + SzBits / 8 ;
641- bool IsRedundant = ReadEnd.sle (PrevReadEnd);
642- bool AreContiguous = It->OffsetFromLeader == PrevReadEnd;
645+ int ExtraBytes =
646+ PrevReadEnd.sle (ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue () : 0 ;
647+ bool AreContiguous =
648+ It->OffsetFromLeader .sle (PrevReadEnd) && ExtraBytes % ElemBytes == 0 ;
643649
644650 LLVM_DEBUG (dbgs () << " LSV: Instruction is "
645- << (AreContiguous
646- ? " contiguous"
647- : ((IsRedundant ? " redundant" : " chain-breaker" )))
651+ << (AreContiguous ? " contiguous" : " chain-breaker" )
648652 << *It->Inst << " (starts at offset "
649653 << It->OffsetFromLeader << " )\n " );
650654
651- It-> Redundant = IsRedundant;
652- if (AreContiguous || IsRedundant)
655+ if (AreContiguous) {
656+ It-> IncrementalBytes = ExtraBytes;
653657 CurChain.push_back (*It);
654- else
658+ } else {
659+ assert (It->IncrementalBytes == SzBits / 8 );
655660 Ret.push_back ({*It});
661+ }
656662 PrevReadEnd = APIntOps::smax (PrevReadEnd, ReadEnd);
657663 }
658664
@@ -883,11 +889,9 @@ bool Vectorizer::vectorizeChain(Chain &C) {
883889 bool IsLoadChain = isa<LoadInst>(C[0 ].Inst );
884890 unsigned AS = getLoadStoreAddressSpace (C[0 ].Inst );
885891 unsigned ChainBytes = 0 ;
886- for (auto &E : C) {
887- if (E.Redundant )
888- continue ;
889- ChainBytes += DL.getTypeStoreSize (getLoadStoreType (E.Inst ));
890- }
892+ for (auto &E : C)
893+ ChainBytes += E.IncrementalBytes ;
894+
891895 assert (ChainBytes % DL.getTypeStoreSize (VecElemTy) == 0 );
892896 // VecTy is a power of 2 and 1 byte at smallest, but VecElemTy may be smaller
893897 // than 1 byte (e.g. VecTy == <32 x i1>).
@@ -1579,7 +1583,8 @@ std::vector<Chain> Vectorizer::gatherChains(ArrayRef<Instruction *> Instrs) {
15791583 (ChainIter->first ->comesBefore (I) ? I : ChainIter->first ))) {
15801584 // `Offset` might not have the expected number of bits, if e.g. AS has a
15811585 // different number of bits than opaque pointers.
1582- ChainIter->second .emplace_back (I, Offset.value ());
1586+ ChainIter->second .emplace_back (
1587+ I, Offset.value (), DL.getTypeSizeInBits (getLoadStoreType (I)) / 8 );
15831588 // Move ChainIter to the front of the MRU list.
15841589 MRU.remove (*ChainIter);
15851590 MRU.push_front (*ChainIter);
@@ -1591,7 +1596,8 @@ std::vector<Chain> Vectorizer::gatherChains(ArrayRef<Instruction *> Instrs) {
15911596 if (!MatchFound) {
15921597 APInt ZeroOffset (ASPtrBits, 0 );
15931598 InstrListElem *E = new (Allocator.Allocate ()) InstrListElem (I);
1594- E->second .emplace_back (I, ZeroOffset);
1599+ E->second .emplace_back (I, ZeroOffset,
1600+ DL.getTypeSizeInBits (getLoadStoreType (I)) / 8 );
15951601 MRU.push_front (*E);
15961602 Chains.insert (E);
15971603 }
0 commit comments