@@ -626,35 +626,26 @@ std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &C) {
626626 std::vector<Chain> Ret;
627627 Ret.push_back ({C.front ()});
628628
629- unsigned ElemBytes = DL.getTypeStoreSize (getChainElemTy (C));
630- APInt PrevReadEnd = C[0 ].OffsetFromLeader +
631- DL.getTypeStoreSize (getLoadStoreType (&*C[0 ].Inst ));
632629 for (auto It = std::next (C.begin ()), End = C.end (); It != End; ++It) {
633630 // `prev` accesses offsets [PrevDistFromBase, PrevReadEnd).
634631 auto &CurChain = Ret.back ();
635- unsigned SzBytes = DL.getTypeStoreSize (getLoadStoreType (&*It->Inst ));
632+ const ChainElem &Prev = CurChain.back ();
633+ unsigned SzBits = DL.getTypeSizeInBits (getLoadStoreType (&*Prev.Inst ));
634+ assert (SzBits % 8 == 0 && " Non-byte sizes should have been filtered out by "
635+ " collectEquivalenceClass" );
636+ APInt PrevReadEnd = Prev.OffsetFromLeader + SzBits / 8 ;
636637
637638 // Add this instruction to the end of the current chain, or start a new one.
638- assert (SzBytes % ElemBytes == 0 );
639- APInt ReadEnd = It->OffsetFromLeader + SzBytes;
640- // Allow redundancy: partial or full overlap counts as contiguous.
641- bool AreContiguous = false ;
642- if (It->OffsetFromLeader .sle (PrevReadEnd)) {
643- uint64_t Overlap = (PrevReadEnd - It->OffsetFromLeader ).getZExtValue ();
644- if (Overlap % ElemBytes == 0 )
645- AreContiguous = true ;
646- }
647-
648- LLVM_DEBUG (dbgs () << " LSV: Instruction is "
649- << (AreContiguous ? " contiguous" : " chain-breaker" )
650- << *It->Inst << " (starts at offset "
639+ bool AreContiguous = It->OffsetFromLeader == PrevReadEnd;
640+ LLVM_DEBUG (dbgs () << " LSV: Instructions are "
641+ << (AreContiguous ? " " : " not " ) << " contiguous: "
642+ << *Prev.Inst << " (ends at offset " << PrevReadEnd
643+ << " ) -> " << *It->Inst << " (starts at offset "
651644 << It->OffsetFromLeader << " )\n " );
652-
653645 if (AreContiguous)
654646 CurChain.push_back (*It);
655647 else
656648 Ret.push_back ({*It});
657- PrevReadEnd = APIntOps::smax (PrevReadEnd, ReadEnd);
658649 }
659650
660651 // Filter out length-1 chains, these are uninteresting.
@@ -883,24 +874,15 @@ bool Vectorizer::vectorizeChain(Chain &C) {
883874 Type *VecElemTy = getChainElemTy (C);
884875 bool IsLoadChain = isa<LoadInst>(C[0 ].Inst );
885876 unsigned AS = getLoadStoreAddressSpace (C[0 ].Inst );
886- unsigned BytesAdded = DL.getTypeStoreSize (getLoadStoreType (&*C[0 ].Inst ));
887- APInt PrevReadEnd = C[0 ].OffsetFromLeader + BytesAdded;
888- unsigned ChainBytes = BytesAdded;
889- for (auto It = std::next (C.begin ()), End = C.end (); It != End; ++It) {
890- unsigned SzBytes = DL.getTypeStoreSize (getLoadStoreType (&*It->Inst ));
891- APInt ReadEnd = It->OffsetFromLeader + SzBytes;
892- // Update ChainBytes considering possible overlap.
893- BytesAdded =
894- PrevReadEnd.sle (ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue () : 0 ;
895- ChainBytes += BytesAdded;
896- PrevReadEnd = APIntOps::smax (PrevReadEnd, ReadEnd);
897- }
898-
877+ unsigned ChainBytes = std::accumulate (
878+ C.begin (), C.end (), 0u , [&](unsigned Bytes, const ChainElem &E) {
879+ return Bytes + DL.getTypeStoreSize (getLoadStoreType (E.Inst ));
880+ });
899881 assert (ChainBytes % DL.getTypeStoreSize (VecElemTy) == 0 );
900882 // VecTy is a power of 2 and 1 byte at smallest, but VecElemTy may be smaller
901883 // than 1 byte (e.g. VecTy == <32 x i1>).
902- unsigned NumElem = 8 * ChainBytes / DL. getTypeSizeInBits (VecElemTy);
903- Type *VecTy = FixedVectorType::get ( VecElemTy, NumElem );
884+ Type *VecTy = FixedVectorType::get (
885+ VecElemTy, 8 * ChainBytes / DL. getTypeSizeInBits (VecElemTy) );
904886
905887 Align Alignment = getLoadStoreAlignment (C[0 ].Inst );
906888 // If this is a load/store of an alloca, we might have upgraded the alloca's
@@ -927,31 +909,27 @@ bool Vectorizer::vectorizeChain(Chain &C) {
927909 llvm::min_element (C, [](const auto &A, const auto &B) {
928910 return A.Inst ->comesBefore (B.Inst );
929911 })->Inst );
930- // This can happen due to a chain of redundant loads.
931- // In this case, just use the element-type, and avoid ExtractElement.
932- if (NumElem == 1 )
933- VecTy = VecElemTy;
912+
934913 // Chain is in offset order, so C[0] is the instr with the lowest offset,
935914 // i.e. the root of the vector.
936915 VecInst = Builder.CreateAlignedLoad (VecTy,
937916 getLoadStorePointerOperand (C[0 ].Inst ),
938917 Alignment);
939918
919+ unsigned VecIdx = 0 ;
940920 for (const ChainElem &E : C) {
941921 Instruction *I = E.Inst ;
942922 Value *V;
943923 Type *T = getLoadStoreType (I);
944- int EOffset = (E.OffsetFromLeader - C[0 ].OffsetFromLeader ).getSExtValue ();
945- int VecIdx = 8 * EOffset / DL.getTypeSizeInBits (VecElemTy);
946924 if (auto *VT = dyn_cast<FixedVectorType>(T)) {
947925 auto Mask = llvm::to_vector<8 >(
948926 llvm::seq<int >(VecIdx, VecIdx + VT->getNumElements ()));
949927 V = Builder.CreateShuffleVector (VecInst, Mask, I->getName ());
950- } else if (VecTy != VecElemTy) {
928+ VecIdx += VT->getNumElements ();
929+ } else {
951930 V = Builder.CreateExtractElement (VecInst, Builder.getInt32 (VecIdx),
952931 I->getName ());
953- } else {
954- V = VecInst;
932+ ++VecIdx;
955933 }
956934 if (V->getType () != I->getType ())
957935 V = Builder.CreateBitOrPointerCast (V, I->getType ());
@@ -986,24 +964,22 @@ bool Vectorizer::vectorizeChain(Chain &C) {
986964
987965 // Build the vector to store.
988966 Value *Vec = PoisonValue::get (VecTy);
989- auto InsertElem = [&](Value *V, unsigned VecIdx) {
967+ unsigned VecIdx = 0 ;
968+ auto InsertElem = [&](Value *V) {
990969 if (V->getType () != VecElemTy)
991970 V = Builder.CreateBitOrPointerCast (V, VecElemTy);
992- Vec = Builder.CreateInsertElement (Vec, V, Builder.getInt32 (VecIdx));
971+ Vec = Builder.CreateInsertElement (Vec, V, Builder.getInt32 (VecIdx++ ));
993972 };
994973 for (const ChainElem &E : C) {
995974 auto *I = cast<StoreInst>(E.Inst );
996- int EOffset = (E.OffsetFromLeader - C[0 ].OffsetFromLeader ).getSExtValue ();
997- int VecIdx = 8 * EOffset / DL.getTypeSizeInBits (VecElemTy);
998975 if (FixedVectorType *VT =
999976 dyn_cast<FixedVectorType>(getLoadStoreType (I))) {
1000977 for (int J = 0 , JE = VT->getNumElements (); J < JE; ++J) {
1001978 InsertElem (Builder.CreateExtractElement (I->getValueOperand (),
1002- Builder.getInt32 (J)),
1003- VecIdx++);
979+ Builder.getInt32 (J)));
1004980 }
1005981 } else {
1006- InsertElem (I->getValueOperand (), VecIdx );
982+ InsertElem (I->getValueOperand ());
1007983 }
1008984 }
1009985
0 commit comments