|
104 | 104 | using namespace llvm; |
105 | 105 | using namespace llvm::PatternMatch; |
106 | 106 | using namespace slpvectorizer; |
| 107 | +using namespace std::placeholders; |
107 | 108 |
|
108 | 109 | #define SV_NAME "slp-vectorizer" |
109 | 110 | #define DEBUG_TYPE "SLP" |
@@ -4955,6 +4956,37 @@ getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind, |
4955 | 4956 | return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args); |
4956 | 4957 | } |
4957 | 4958 |
|
| 4959 | +/// Correctly creates insert_subvector, checking that the index is multiple of |
| 4960 | +/// the subvectors length. Otherwise, generates shuffle using \p Generator or |
| 4961 | +/// using default shuffle. |
| 4962 | +static Value *createInsertVector( |
| 4963 | + IRBuilderBase &Builder, Value *Vec, Value *V, unsigned Index, |
| 4964 | + function_ref<Value *(Value *, Value *, ArrayRef<int>)> Generator = {}) { |
| 4965 | + const unsigned SubVecVF = getNumElements(V->getType()); |
| 4966 | + if (Index % SubVecVF == 0) { |
| 4967 | + Vec = Builder.CreateInsertVector(Vec->getType(), Vec, V, |
| 4968 | + Builder.getInt64(Index)); |
| 4969 | + } else { |
| 4970 | + // Create shuffle, insertvector requires that index is multiple of |
| 4971 | + // the subvector length. |
| 4972 | + const unsigned VecVF = getNumElements(Vec->getType()); |
| 4973 | + SmallVector<int> Mask(VecVF, PoisonMaskElem); |
| 4974 | + std::iota(Mask.begin(), std::next(Mask.begin(), Index), 0); |
| 4975 | + for (unsigned I : seq<unsigned>(Index, SubVecVF)) |
| 4976 | + Mask[I] = I - Index + VecVF; |
| 4977 | + if (Generator) { |
| 4978 | + Vec = Generator(Vec, V, Mask); |
| 4979 | + } else { |
| 4980 | + // 1. Resize V to the size of Vec. |
| 4981 | + SmallVector<int> ResizeMask(VecVF, PoisonMaskElem); |
| 4982 | + std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), SubVecVF), 0); |
| 4983 | + V = Builder.CreateShuffleVector(V, ResizeMask); |
| 4984 | + Vec = Builder.CreateShuffleVector(Vec, V, Mask); |
| 4985 | + } |
| 4986 | + } |
| 4987 | + return Vec; |
| 4988 | +} |
| 4989 | + |
4958 | 4990 | BoUpSLP::LoadsState |
4959 | 4991 | BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0, |
4960 | 4992 | SmallVectorImpl<unsigned> &Order, |
@@ -13883,9 +13915,8 @@ Value *BoUpSLP::gather( |
13883 | 13915 | Instruction *InsElt; |
13884 | 13916 | if (auto *VecTy = dyn_cast<FixedVectorType>(Scalar->getType())) { |
13885 | 13917 | assert(SLPReVec && "FixedVectorType is not expected."); |
13886 | | - Vec = InsElt = Builder.CreateInsertVector( |
13887 | | - Vec->getType(), Vec, Scalar, |
13888 | | - Builder.getInt64(Pos * VecTy->getNumElements())); |
| 13918 | + Vec = InsElt = cast<Instruction>(createInsertVector( |
| 13919 | + Builder, Vec, Scalar, Pos * getNumElements(VecTy))); |
13889 | 13920 | auto *II = dyn_cast<IntrinsicInst>(InsElt); |
13890 | 13921 | if (!II || II->getIntrinsicID() != Intrinsic::vector_insert) |
13891 | 13922 | return Vec; |
@@ -14485,23 +14516,10 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { |
14485 | 14516 | V, SimplifyQuery(*R.DL)); |
14486 | 14517 | })); |
14487 | 14518 | unsigned InsertionIndex = Idx * ScalarTyNumElements; |
14488 | | - const unsigned SubVecVF = |
14489 | | - cast<FixedVectorType>(V->getType())->getNumElements(); |
14490 | | - if (InsertionIndex % SubVecVF == 0) { |
14491 | | - Vec = Builder.CreateInsertVector(Vec->getType(), Vec, V, |
14492 | | - Builder.getInt64(InsertionIndex)); |
14493 | | - } else { |
14494 | | - // Create shuffle, insertvector requires that index is multiple of |
14495 | | - // the subvectors length. |
14496 | | - const unsigned VecVF = |
14497 | | - cast<FixedVectorType>(Vec->getType())->getNumElements(); |
14498 | | - SmallVector<int> Mask(VecVF, PoisonMaskElem); |
14499 | | - std::iota(Mask.begin(), Mask.end(), 0); |
14500 | | - for (unsigned I : seq<unsigned>( |
14501 | | - InsertionIndex, (Idx + SubVecVF) * ScalarTyNumElements)) |
14502 | | - Mask[I] = I - Idx + VecVF; |
14503 | | - Vec = createShuffle(Vec, V, Mask); |
14504 | | - } |
| 14519 | + Vec = createInsertVector( |
| 14520 | + Builder, Vec, V, InsertionIndex, |
| 14521 | + std::bind(&ShuffleInstructionBuilder::createShuffle, this, _1, _2, |
| 14522 | + _3)); |
14505 | 14523 | if (!CommonMask.empty()) { |
14506 | 14524 | std::iota( |
14507 | 14525 | std::next(CommonMask.begin(), InsertionIndex), |
@@ -17747,7 +17765,6 @@ bool BoUpSLP::collectValuesToDemote( |
17747 | 17765 | BitWidth = std::max(BitWidth, BitWidth1); |
17748 | 17766 | return BitWidth > 0 && OrigBitWidth >= (BitWidth * 2); |
17749 | 17767 | }; |
17750 | | - using namespace std::placeholders; |
17751 | 17768 | auto FinalAnalysis = [&]() { |
17752 | 17769 | if (!IsProfitableToDemote) |
17753 | 17770 | return false; |
|
0 commit comments