@@ -626,38 +626,26 @@ std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &C) {
626626 std::vector<Chain> Ret;
627627 Ret.push_back ({C.front ()});
628628
629- unsigned ChainElemTyBits = DL.getTypeSizeInBits (getChainElemTy (C));
630- APInt PrevReadEnd = C[0 ].OffsetFromLeader +
631- DL.getTypeStoreSize (getLoadStoreType (&*C[0 ].Inst ));
632629 for (auto It = std::next (C.begin ()), End = C.end (); It != End; ++It) {
630+ // `prev` accesses offsets [PrevDistFromBase, PrevReadEnd).
633631 auto &CurChain = Ret.back ();
634- unsigned SzBytes = DL.getTypeStoreSize (getLoadStoreType (&*It->Inst ));
632+ const ChainElem &Prev = CurChain.back ();
633+ unsigned SzBits = DL.getTypeSizeInBits (getLoadStoreType (&*Prev.Inst ));
634+ assert (SzBits % 8 == 0 && " Non-byte sizes should have been filtered out by "
635+ " collectEquivalenceClass" );
636+ APInt PrevReadEnd = Prev.OffsetFromLeader + SzBits / 8 ;
635637
636638 // Add this instruction to the end of the current chain, or start a new one.
637- assert (
638- 8 * SzBytes % ChainElemTyBits == 0 &&
639- " Every chain-element size must be a multiple of the element size after "
640- " vectorization." );
641- APInt ReadEnd = It->OffsetFromLeader + SzBytes;
642- // Allow redundancy: partial or full overlap counts as contiguous.
643- bool AreContiguous = false ;
644- if (It->OffsetFromLeader .sle (PrevReadEnd)) {
645- // Check overlap is a multiple of the element size after vectorization.
646- uint64_t Overlap = (PrevReadEnd - It->OffsetFromLeader ).getZExtValue ();
647- if (8 * Overlap % ChainElemTyBits == 0 )
648- AreContiguous = true ;
649- }
650-
651- LLVM_DEBUG (dbgs () << " LSV: Instruction is "
652- << (AreContiguous ? " contiguous" : " chain-breaker" )
653- << *It->Inst << " (starts at offset "
639+ bool AreContiguous = It->OffsetFromLeader == PrevReadEnd;
640+ LLVM_DEBUG (dbgs () << " LSV: Instructions are "
641+ << (AreContiguous ? " " : " not " ) << " contiguous: "
642+ << *Prev.Inst << " (ends at offset " << PrevReadEnd
643+ << " ) -> " << *It->Inst << " (starts at offset "
654644 << It->OffsetFromLeader << " )\n " );
655-
656645 if (AreContiguous)
657646 CurChain.push_back (*It);
658647 else
659648 Ret.push_back ({*It});
660- PrevReadEnd = APIntOps::smax (PrevReadEnd, ReadEnd);
661649 }
662650
663651 // Filter out length-1 chains, these are uninteresting.
@@ -739,20 +727,14 @@ std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &C) {
739727 // These chains are over the closed interval [CBegin, CEnd].
740728 SmallVector<std::pair<unsigned /* CEnd*/ , unsigned /* SizeBytes*/ >, 8 >
741729 CandidateChains;
742- // Need to compute the size of every candidate chain from its beginning
743- // because of possible overlapping among chain elements.
744- unsigned Sz = DL.getTypeStoreSize (getLoadStoreType (C[CBegin].Inst ));
745- APInt PrevReadEnd = C[CBegin].OffsetFromLeader + Sz;
746730 for (unsigned CEnd = CBegin + 1 , Size = C.size (); CEnd < Size; ++CEnd) {
747- APInt ReadEnd = C[CEnd].OffsetFromLeader +
748- DL.getTypeStoreSize (getLoadStoreType (C[CEnd].Inst ));
749- unsigned BytesAdded =
750- PrevReadEnd.sle (ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue () : 0 ;
751- Sz += BytesAdded;
752- if (Sz > VecRegBytes)
731+ APInt Sz = C[CEnd].OffsetFromLeader +
732+ DL.getTypeStoreSize (getLoadStoreType (C[CEnd].Inst )) -
733+ C[CBegin].OffsetFromLeader ;
734+ if (Sz.sgt (VecRegBytes))
753735 break ;
754- CandidateChains.emplace_back (CEnd, Sz);
755- PrevReadEnd = APIntOps::smax (PrevReadEnd, ReadEnd );
736+ CandidateChains.emplace_back (CEnd,
737+ static_cast < unsigned >(Sz. getLimitedValue ()) );
756738 }
757739
758740 // Consider the longest chain first.
@@ -892,24 +874,15 @@ bool Vectorizer::vectorizeChain(Chain &C) {
892874 Type *VecElemTy = getChainElemTy (C);
893875 bool IsLoadChain = isa<LoadInst>(C[0 ].Inst );
894876 unsigned AS = getLoadStoreAddressSpace (C[0 ].Inst );
895- unsigned BytesAdded = DL.getTypeStoreSize (getLoadStoreType (&*C[0 ].Inst ));
896- APInt PrevReadEnd = C[0 ].OffsetFromLeader + BytesAdded;
897- unsigned ChainBytes = BytesAdded;
898- for (auto It = std::next (C.begin ()), End = C.end (); It != End; ++It) {
899- unsigned SzBytes = DL.getTypeStoreSize (getLoadStoreType (&*It->Inst ));
900- APInt ReadEnd = It->OffsetFromLeader + SzBytes;
901- // Update ChainBytes considering possible overlap.
902- BytesAdded =
903- PrevReadEnd.sle (ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue () : 0 ;
904- ChainBytes += BytesAdded;
905- PrevReadEnd = APIntOps::smax (PrevReadEnd, ReadEnd);
906- }
907-
908- assert (8 * ChainBytes % DL.getTypeSizeInBits (VecElemTy) == 0 );
877+ unsigned ChainBytes = std::accumulate (
878+ C.begin (), C.end (), 0u , [&](unsigned Bytes, const ChainElem &E) {
879+ return Bytes + DL.getTypeStoreSize (getLoadStoreType (E.Inst ));
880+ });
881+ assert (ChainBytes % DL.getTypeStoreSize (VecElemTy) == 0 );
909882 // VecTy is a power of 2 and 1 byte at smallest, but VecElemTy may be smaller
910883 // than 1 byte (e.g. VecTy == <32 x i1>).
911- unsigned NumElem = 8 * ChainBytes / DL. getTypeSizeInBits (VecElemTy);
912- Type *VecTy = FixedVectorType::get ( VecElemTy, NumElem );
884+ Type *VecTy = FixedVectorType::get (
885+ VecElemTy, 8 * ChainBytes / DL. getTypeSizeInBits (VecElemTy) );
913886
914887 Align Alignment = getLoadStoreAlignment (C[0 ].Inst );
915888 // If this is a load/store of an alloca, we might have upgraded the alloca's
@@ -936,32 +909,27 @@ bool Vectorizer::vectorizeChain(Chain &C) {
936909 llvm::min_element (C, [](const auto &A, const auto &B) {
937910 return A.Inst ->comesBefore (B.Inst );
938911 })->Inst );
939- // This can happen due to a chain of redundant loads.
940- // In this case, just use the element-type, and avoid ExtractElement.
941- if (NumElem == 1 )
942- VecTy = VecElemTy;
912+
943913 // Chain is in offset order, so C[0] is the instr with the lowest offset,
944914 // i.e. the root of the vector.
945915 VecInst = Builder.CreateAlignedLoad (VecTy,
946916 getLoadStorePointerOperand (C[0 ].Inst ),
947917 Alignment);
948918
919+ unsigned VecIdx = 0 ;
949920 for (const ChainElem &E : C) {
950921 Instruction *I = E.Inst ;
951922 Value *V;
952923 Type *T = getLoadStoreType (I);
953- unsigned EOffset =
954- (E.OffsetFromLeader - C[0 ].OffsetFromLeader ).getZExtValue ();
955- unsigned VecIdx = 8 * EOffset / DL.getTypeSizeInBits (VecElemTy);
956924 if (auto *VT = dyn_cast<FixedVectorType>(T)) {
957925 auto Mask = llvm::to_vector<8 >(
958926 llvm::seq<int >(VecIdx, VecIdx + VT->getNumElements ()));
959927 V = Builder.CreateShuffleVector (VecInst, Mask, I->getName ());
960- } else if (VecTy != VecElemTy) {
928+ VecIdx += VT->getNumElements ();
929+ } else {
961930 V = Builder.CreateExtractElement (VecInst, Builder.getInt32 (VecIdx),
962931 I->getName ());
963- } else {
964- V = VecInst;
932+ ++VecIdx;
965933 }
966934 if (V->getType () != I->getType ())
967935 V = Builder.CreateBitOrPointerCast (V, I->getType ());
@@ -996,25 +964,22 @@ bool Vectorizer::vectorizeChain(Chain &C) {
996964
997965 // Build the vector to store.
998966 Value *Vec = PoisonValue::get (VecTy);
999- auto InsertElem = [&](Value *V, unsigned VecIdx) {
967+ unsigned VecIdx = 0 ;
968+ auto InsertElem = [&](Value *V) {
1000969 if (V->getType () != VecElemTy)
1001970 V = Builder.CreateBitOrPointerCast (V, VecElemTy);
1002- Vec = Builder.CreateInsertElement (Vec, V, Builder.getInt32 (VecIdx));
971+ Vec = Builder.CreateInsertElement (Vec, V, Builder.getInt32 (VecIdx++ ));
1003972 };
1004973 for (const ChainElem &E : C) {
1005974 auto *I = cast<StoreInst>(E.Inst );
1006- unsigned EOffset =
1007- (E.OffsetFromLeader - C[0 ].OffsetFromLeader ).getZExtValue ();
1008- unsigned VecIdx = 8 * EOffset / DL.getTypeSizeInBits (VecElemTy);
1009975 if (FixedVectorType *VT =
1010976 dyn_cast<FixedVectorType>(getLoadStoreType (I))) {
1011977 for (int J = 0 , JE = VT->getNumElements (); J < JE; ++J) {
1012978 InsertElem (Builder.CreateExtractElement (I->getValueOperand (),
1013- Builder.getInt32 (J)),
1014- VecIdx++);
979+ Builder.getInt32 (J)));
1015980 }
1016981 } else {
1017- InsertElem (I->getValueOperand (), VecIdx );
982+ InsertElem (I->getValueOperand ());
1018983 }
1019984 }
1020985
0 commit comments