@@ -67,9 +67,10 @@ class VectorCombine {
6767public:
6868 VectorCombine (Function &F, const TargetTransformInfo &TTI,
6969 const DominatorTree &DT, AAResults &AA, AssumptionCache &AC,
70- const DataLayout *DL, bool TryEarlyFoldsOnly)
70+ const DataLayout *DL, TTI::TargetCostKind CostKind,
71+ bool TryEarlyFoldsOnly)
7172 : F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC), DL(DL),
72- TryEarlyFoldsOnly (TryEarlyFoldsOnly) {}
73+ CostKind (CostKind), TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
7374
7475 bool run ();
7576
@@ -81,6 +82,7 @@ class VectorCombine {
8182 AAResults &AA;
8283 AssumptionCache ∾
8384 const DataLayout *DL;
85+ TTI::TargetCostKind CostKind;
8486
8587 // / If true, only perform beneficial early IR transforms. Do not introduce new
8688 // / vector operations.
@@ -91,40 +93,38 @@ class VectorCombine {
9193 // TODO: Direct calls from the top-level "run" loop use a plain "Instruction"
9294 // parameter. That should be updated to specific sub-classes because the
9395 // run loop was changed to dispatch on opcode.
94- bool vectorizeLoadInsert (Instruction &I, TTI::TargetCostKind CostKind );
95- bool widenSubvectorLoad (Instruction &I, TTI::TargetCostKind CostKind );
96+ bool vectorizeLoadInsert (Instruction &I);
97+ bool widenSubvectorLoad (Instruction &I);
9698 ExtractElementInst *getShuffleExtract (ExtractElementInst *Ext0,
9799 ExtractElementInst *Ext1,
98- TTI::TargetCostKind CostKind,
99100 unsigned PreferredExtractIndex) const ;
100101 bool isExtractExtractCheap (ExtractElementInst *Ext0, ExtractElementInst *Ext1,
101102 const Instruction &I,
102103 ExtractElementInst *&ConvertToShuffle,
103- TTI::TargetCostKind CostKind,
104104 unsigned PreferredExtractIndex);
105105 void foldExtExtCmp (ExtractElementInst *Ext0, ExtractElementInst *Ext1,
106106 Instruction &I);
107107 void foldExtExtBinop (ExtractElementInst *Ext0, ExtractElementInst *Ext1,
108108 Instruction &I);
109- bool foldExtractExtract (Instruction &I, TTI::TargetCostKind CostKind );
110- bool foldInsExtFNeg (Instruction &I, TTI::TargetCostKind CostKind );
111- bool foldInsExtVectorToShuffle (Instruction &I, TTI::TargetCostKind CostKind );
112- bool foldBitcastShuffle (Instruction &I, TTI::TargetCostKind CostKind );
113- bool scalarizeBinopOrCmp (Instruction &I, TTI::TargetCostKind CostKind );
114- bool scalarizeVPIntrinsic (Instruction &I, TTI::TargetCostKind CostKind );
115- bool foldExtractedCmps (Instruction &I, TTI::TargetCostKind CostKind );
109+ bool foldExtractExtract (Instruction &I);
110+ bool foldInsExtFNeg (Instruction &I);
111+ bool foldInsExtVectorToShuffle (Instruction &I);
112+ bool foldBitcastShuffle (Instruction &I);
113+ bool scalarizeBinopOrCmp (Instruction &I);
114+ bool scalarizeVPIntrinsic (Instruction &I);
115+ bool foldExtractedCmps (Instruction &I);
116116 bool foldSingleElementStore (Instruction &I);
117- bool scalarizeLoadExtract (Instruction &I, TTI::TargetCostKind CostKind );
118- bool foldPermuteOfBinops (Instruction &I, TTI::TargetCostKind CostKind );
119- bool foldShuffleOfBinops (Instruction &I, TTI::TargetCostKind CostKind );
120- bool foldShuffleOfCastops (Instruction &I, TTI::TargetCostKind CostKind );
121- bool foldShuffleOfShuffles (Instruction &I, TTI::TargetCostKind CostKind );
122- bool foldShuffleOfIntrinsics (Instruction &I, TTI::TargetCostKind CostKind );
123- bool foldShuffleToIdentity (Instruction &I, TTI::TargetCostKind CostKind );
117+ bool scalarizeLoadExtract (Instruction &I);
118+ bool foldPermuteOfBinops (Instruction &I);
119+ bool foldShuffleOfBinops (Instruction &I);
120+ bool foldShuffleOfCastops (Instruction &I);
121+ bool foldShuffleOfShuffles (Instruction &I);
122+ bool foldShuffleOfIntrinsics (Instruction &I);
123+ bool foldShuffleToIdentity (Instruction &I);
124124 bool foldShuffleFromReductions (Instruction &I);
125- bool foldCastFromReductions (Instruction &I, TTI::TargetCostKind CostKind );
125+ bool foldCastFromReductions (Instruction &I);
126126 bool foldSelectShuffle (Instruction &I, bool FromReduction = false );
127- bool shrinkType (Instruction &I, TTI::TargetCostKind CostKind );
127+ bool shrinkType (Instruction &I);
128128
129129 void replaceValue (Value &Old, Value &New) {
130130 Old.replaceAllUsesWith (&New);
@@ -174,8 +174,7 @@ static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI) {
174174 return true ;
175175}
176176
177- bool VectorCombine::vectorizeLoadInsert (Instruction &I,
178- TTI::TargetCostKind CostKind) {
177+ bool VectorCombine::vectorizeLoadInsert (Instruction &I) {
179178 // Match insert into fixed vector of scalar value.
180179 // TODO: Handle non-zero insert index.
181180 Value *Scalar;
@@ -295,8 +294,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I,
295294// / If we are loading a vector and then inserting it into a larger vector with
296295// / undefined elements, try to load the larger vector and eliminate the insert.
297296// / This removes a shuffle in IR and may allow combining of other loaded values.
298- bool VectorCombine::widenSubvectorLoad (Instruction &I,
299- TTI::TargetCostKind CostKind) {
297+ bool VectorCombine::widenSubvectorLoad (Instruction &I) {
300298 // Match subvector insert of fixed vector.
301299 auto *Shuf = cast<ShuffleVectorInst>(&I);
302300 if (!Shuf->isIdentityWithPadding ())
@@ -356,7 +354,6 @@ bool VectorCombine::widenSubvectorLoad(Instruction &I,
356354// / followed by extract from a different index.
357355ExtractElementInst *VectorCombine::getShuffleExtract (
358356 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
359- TTI::TargetCostKind CostKind,
360357 unsigned PreferredExtractIndex = InvalidIndex) const {
361358 auto *Index0C = dyn_cast<ConstantInt>(Ext0->getIndexOperand ());
362359 auto *Index1C = dyn_cast<ConstantInt>(Ext1->getIndexOperand ());
@@ -408,7 +405,6 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
408405 ExtractElementInst *Ext1,
409406 const Instruction &I,
410407 ExtractElementInst *&ConvertToShuffle,
411- TTI::TargetCostKind CostKind,
412408 unsigned PreferredExtractIndex) {
413409 auto *Ext0IndexC = dyn_cast<ConstantInt>(Ext0->getIndexOperand ());
414410 auto *Ext1IndexC = dyn_cast<ConstantInt>(Ext1->getIndexOperand ());
@@ -478,8 +474,7 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
478474 !Ext1->hasOneUse () * Extract1Cost;
479475 }
480476
481- ConvertToShuffle =
482- getShuffleExtract (Ext0, Ext1, CostKind, PreferredExtractIndex);
477+ ConvertToShuffle = getShuffleExtract (Ext0, Ext1, PreferredExtractIndex);
483478 if (ConvertToShuffle) {
484479 if (IsBinOp && DisableBinopExtractShuffle)
485480 return true ;
@@ -593,8 +588,7 @@ void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
593588}
594589
595590// / Match an instruction with extracted vector operands.
596- bool VectorCombine::foldExtractExtract (Instruction &I,
597- TTI::TargetCostKind CostKind) {
591+ bool VectorCombine::foldExtractExtract (Instruction &I) {
598592 // It is not safe to transform things like div, urem, etc. because we may
599593 // create undefined behavior when executing those on unknown vector elements.
600594 if (!isSafeToSpeculativelyExecute (&I))
@@ -626,8 +620,7 @@ bool VectorCombine::foldExtractExtract(Instruction &I,
626620 m_InsertElt (m_Value (), m_Value (), m_ConstantInt (InsertIndex)));
627621
628622 ExtractElementInst *ExtractToChange;
629- if (isExtractExtractCheap (Ext0, Ext1, I, ExtractToChange, CostKind,
630- InsertIndex))
623+ if (isExtractExtractCheap (Ext0, Ext1, I, ExtractToChange, InsertIndex))
631624 return false ;
632625
633626 if (ExtractToChange) {
@@ -654,8 +647,7 @@ bool VectorCombine::foldExtractExtract(Instruction &I,
654647
655648// / Try to replace an extract + scalar fneg + insert with a vector fneg +
656649// / shuffle.
657- bool VectorCombine::foldInsExtFNeg (Instruction &I,
658- TTI::TargetCostKind CostKind) {
650+ bool VectorCombine::foldInsExtFNeg (Instruction &I) {
659651 // Match an insert (op (extract)) pattern.
660652 Value *DestVec;
661653 uint64_t Index;
@@ -718,8 +710,7 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I,
718710// / If this is a bitcast of a shuffle, try to bitcast the source vector to the
719711// / destination type followed by shuffle. This can enable further transforms by
720712// / moving bitcasts or shuffles together.
721- bool VectorCombine::foldBitcastShuffle (Instruction &I,
722- TTI::TargetCostKind CostKind) {
713+ bool VectorCombine::foldBitcastShuffle (Instruction &I) {
723714 Value *V0, *V1;
724715 ArrayRef<int > Mask;
725716 if (!match (&I, m_BitCast (m_OneUse (
@@ -808,8 +799,7 @@ bool VectorCombine::foldBitcastShuffle(Instruction &I,
808799// / VP Intrinsics whose vector operands are both splat values may be simplified
809800// / into the scalar version of the operation and the result splatted. This
810801// / can lead to scalarization down the line.
811- bool VectorCombine::scalarizeVPIntrinsic (Instruction &I,
812- TTI::TargetCostKind CostKind) {
802+ bool VectorCombine::scalarizeVPIntrinsic (Instruction &I) {
813803 if (!isa<VPIntrinsic>(I))
814804 return false ;
815805 VPIntrinsic &VPI = cast<VPIntrinsic>(I);
@@ -929,8 +919,7 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I,
929919
930920// / Match a vector binop or compare instruction with at least one inserted
931921// / scalar operand and convert to scalar binop/cmp followed by insertelement.
932- bool VectorCombine::scalarizeBinopOrCmp (Instruction &I,
933- TTI::TargetCostKind CostKind) {
922+ bool VectorCombine::scalarizeBinopOrCmp (Instruction &I) {
934923 CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
935924 Value *Ins0, *Ins1;
936925 if (!match (&I, m_BinOp (m_Value (Ins0), m_Value (Ins1))) &&
@@ -1058,8 +1047,7 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I,
10581047// / Try to combine a scalar binop + 2 scalar compares of extracted elements of
10591048// / a vector into vector operations followed by extract. Note: The SLP pass
10601049// / may miss this pattern because of implementation problems.
1061- bool VectorCombine::foldExtractedCmps (Instruction &I,
1062- TTI::TargetCostKind CostKind) {
1050+ bool VectorCombine::foldExtractedCmps (Instruction &I) {
10631051 auto *BI = dyn_cast<BinaryOperator>(&I);
10641052
10651053 // We are looking for a scalar binop of booleans.
@@ -1337,8 +1325,7 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
13371325}
13381326
13391327// / Try to scalarize vector loads feeding extractelement instructions.
1340- bool VectorCombine::scalarizeLoadExtract (Instruction &I,
1341- TTI::TargetCostKind CostKind) {
1328+ bool VectorCombine::scalarizeLoadExtract (Instruction &I) {
13421329 Value *Ptr;
13431330 if (!match (&I, m_Load (m_Value (Ptr))))
13441331 return false ;
@@ -1434,8 +1421,7 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I,
14341421
14351422// / Try to convert "shuffle (binop (shuffle, shuffle)), undef"
14361423// / --> "binop (shuffle), (shuffle)".
1437- bool VectorCombine::foldPermuteOfBinops (Instruction &I,
1438- TTI::TargetCostKind CostKind) {
1424+ bool VectorCombine::foldPermuteOfBinops (Instruction &I) {
14391425 BinaryOperator *BinOp;
14401426 ArrayRef<int > OuterMask;
14411427 if (!match (&I,
@@ -1528,8 +1514,7 @@ bool VectorCombine::foldPermuteOfBinops(Instruction &I,
15281514}
15291515
15301516// / Try to convert "shuffle (binop), (binop)" into "binop (shuffle), (shuffle)".
1531- bool VectorCombine::foldShuffleOfBinops (Instruction &I,
1532- TTI::TargetCostKind CostKind) {
1517+ bool VectorCombine::foldShuffleOfBinops (Instruction &I) {
15331518 BinaryOperator *B0, *B1;
15341519 ArrayRef<int > OldMask;
15351520 if (!match (&I, m_Shuffle (m_OneUse (m_BinOp (B0)), m_OneUse (m_BinOp (B1)),
@@ -1616,8 +1601,7 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I,
16161601
16171602// / Try to convert "shuffle (castop), (castop)" with a shared castop operand
16181603// / into "castop (shuffle)".
1619- bool VectorCombine::foldShuffleOfCastops (Instruction &I,
1620- TTI::TargetCostKind CostKind) {
1604+ bool VectorCombine::foldShuffleOfCastops (Instruction &I) {
16211605 Value *V0, *V1;
16221606 ArrayRef<int > OldMask;
16231607 if (!match (&I, m_Shuffle (m_Value (V0), m_Value (V1), m_Mask (OldMask))))
@@ -1720,8 +1704,7 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I,
17201704
17211705// / Try to convert "shuffle (shuffle x, undef), (shuffle y, undef)"
17221706// / into "shuffle x, y".
1723- bool VectorCombine::foldShuffleOfShuffles (Instruction &I,
1724- TTI::TargetCostKind CostKind) {
1707+ bool VectorCombine::foldShuffleOfShuffles (Instruction &I) {
17251708 Value *V0, *V1;
17261709 UndefValue *U0, *U1;
17271710 ArrayRef<int > OuterMask, InnerMask0, InnerMask1;
@@ -1809,8 +1792,7 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I,
18091792
18101793// / Try to convert
18111794// / "shuffle (intrinsic), (intrinsic)" into "intrinsic (shuffle), (shuffle)".
1812- bool VectorCombine::foldShuffleOfIntrinsics (Instruction &I,
1813- TTI::TargetCostKind CostKind) {
1795+ bool VectorCombine::foldShuffleOfIntrinsics (Instruction &I) {
18141796 Value *V0, *V1;
18151797 ArrayRef<int > OldMask;
18161798 if (!match (&I, m_Shuffle (m_OneUse (m_Value (V0)), m_OneUse (m_Value (V1)),
@@ -2048,8 +2030,7 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
20482030// Starting from a shuffle, look up through operands tracking the shuffled index
20492031// of each lane. If we can simplify away the shuffles to identities then
20502032// do so.
2051- bool VectorCombine::foldShuffleToIdentity (Instruction &I,
2052- TTI::TargetCostKind CostKind) {
2033+ bool VectorCombine::foldShuffleToIdentity (Instruction &I) {
20532034 auto *Ty = dyn_cast<FixedVectorType>(I.getType ());
20542035 if (!Ty || I.use_empty ())
20552036 return false ;
@@ -2333,8 +2314,7 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
23332314// / reduce(trunc(x)) -> trunc(reduce(x)).
23342315// / reduce(sext(x)) -> sext(reduce(x)).
23352316// / reduce(zext(x)) -> zext(reduce(x)).
2336- bool VectorCombine::foldCastFromReductions (Instruction &I,
2337- TTI::TargetCostKind CostKind) {
2317+ bool VectorCombine::foldCastFromReductions (Instruction &I) {
23382318 auto *II = dyn_cast<IntrinsicInst>(&I);
23392319 if (!II)
23402320 return false ;
@@ -2718,7 +2698,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
27182698// / lshr((zext(x),y) -> zext(lshr(x,trunc(y)))
27192699// / Cost model calculations takes into account if zext(x) has other users and
27202700// / whether it can be propagated through them too.
2721- bool VectorCombine::shrinkType (Instruction &I, TTI::TargetCostKind CostKind ) {
2701+ bool VectorCombine::shrinkType (Instruction &I) {
27222702 Value *ZExted, *OtherOperand;
27232703 if (!match (&I, m_c_BitwiseLogic (m_ZExt (m_Value (ZExted)),
27242704 m_Value (OtherOperand))) &&
@@ -2809,8 +2789,7 @@ bool VectorCombine::shrinkType(Instruction &I, TTI::TargetCostKind CostKind) {
28092789
28102790// / insert (DstVec, (extract SrcVec, ExtIdx), InsIdx) -->
28112791// / shuffle (DstVec, SrcVec, Mask)
2812- bool VectorCombine::foldInsExtVectorToShuffle (Instruction &I,
2813- TTI::TargetCostKind CostKind) {
2792+ bool VectorCombine::foldInsExtVectorToShuffle (Instruction &I) {
28142793 Value *DstVec, *SrcVec;
28152794 uint64_t ExtIdx, InsIdx;
28162795 if (!match (&I,
@@ -2865,7 +2844,6 @@ bool VectorCombine::run() {
28652844 bool MadeChange = false ;
28662845 auto FoldInst = [this , &MadeChange](Instruction &I) {
28672846 Builder.SetInsertPoint (&I);
2868- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
28692847 bool IsFixedVectorType = isa<FixedVectorType>(I.getType ());
28702848 auto Opcode = I.getOpcode ();
28712849
@@ -2876,10 +2854,10 @@ bool VectorCombine::run() {
28762854 if (IsFixedVectorType) {
28772855 switch (Opcode) {
28782856 case Instruction::InsertElement:
2879- MadeChange |= vectorizeLoadInsert (I, CostKind );
2857+ MadeChange |= vectorizeLoadInsert (I);
28802858 break ;
28812859 case Instruction::ShuffleVector:
2882- MadeChange |= widenSubvectorLoad (I, CostKind );
2860+ MadeChange |= widenSubvectorLoad (I);
28832861 break ;
28842862 default :
28852863 break ;
@@ -2889,9 +2867,9 @@ bool VectorCombine::run() {
28892867 // This transform works with scalable and fixed vectors
28902868 // TODO: Identify and allow other scalable transforms
28912869 if (isa<VectorType>(I.getType ())) {
2892- MadeChange |= scalarizeBinopOrCmp (I, CostKind );
2893- MadeChange |= scalarizeLoadExtract (I, CostKind );
2894- MadeChange |= scalarizeVPIntrinsic (I, CostKind );
2870+ MadeChange |= scalarizeBinopOrCmp (I);
2871+ MadeChange |= scalarizeLoadExtract (I);
2872+ MadeChange |= scalarizeVPIntrinsic (I);
28952873 }
28962874
28972875 if (Opcode == Instruction::Store)
@@ -2908,39 +2886,39 @@ bool VectorCombine::run() {
29082886 if (IsFixedVectorType) {
29092887 switch (Opcode) {
29102888 case Instruction::InsertElement:
2911- MadeChange |= foldInsExtFNeg (I, CostKind );
2912- MadeChange |= foldInsExtVectorToShuffle (I, CostKind );
2889+ MadeChange |= foldInsExtFNeg (I);
2890+ MadeChange |= foldInsExtVectorToShuffle (I);
29132891 break ;
29142892 case Instruction::ShuffleVector:
2915- MadeChange |= foldPermuteOfBinops (I, CostKind );
2916- MadeChange |= foldShuffleOfBinops (I, CostKind );
2917- MadeChange |= foldShuffleOfCastops (I, CostKind );
2918- MadeChange |= foldShuffleOfShuffles (I, CostKind );
2919- MadeChange |= foldShuffleOfIntrinsics (I, CostKind );
2920- MadeChange |= foldSelectShuffle (I, CostKind );
2921- MadeChange |= foldShuffleToIdentity (I, CostKind );
2893+ MadeChange |= foldPermuteOfBinops (I);
2894+ MadeChange |= foldShuffleOfBinops (I);
2895+ MadeChange |= foldShuffleOfCastops (I);
2896+ MadeChange |= foldShuffleOfShuffles (I);
2897+ MadeChange |= foldShuffleOfIntrinsics (I);
2898+ MadeChange |= foldSelectShuffle (I);
2899+ MadeChange |= foldShuffleToIdentity (I);
29222900 break ;
29232901 case Instruction::BitCast:
2924- MadeChange |= foldBitcastShuffle (I, CostKind );
2902+ MadeChange |= foldBitcastShuffle (I);
29252903 break ;
29262904 default :
2927- MadeChange |= shrinkType (I, CostKind );
2905+ MadeChange |= shrinkType (I);
29282906 break ;
29292907 }
29302908 } else {
29312909 switch (Opcode) {
29322910 case Instruction::Call:
29332911 MadeChange |= foldShuffleFromReductions (I);
2934- MadeChange |= foldCastFromReductions (I, CostKind );
2912+ MadeChange |= foldCastFromReductions (I);
29352913 break ;
29362914 case Instruction::ICmp:
29372915 case Instruction::FCmp:
2938- MadeChange |= foldExtractExtract (I, CostKind );
2916+ MadeChange |= foldExtractExtract (I);
29392917 break ;
29402918 default :
29412919 if (Instruction::isBinaryOp (Opcode)) {
2942- MadeChange |= foldExtractExtract (I, CostKind );
2943- MadeChange |= foldExtractedCmps (I, CostKind );
2920+ MadeChange |= foldExtractExtract (I);
2921+ MadeChange |= foldExtractedCmps (I);
29442922 }
29452923 break ;
29462924 }
@@ -2982,7 +2960,8 @@ PreservedAnalyses VectorCombinePass::run(Function &F,
29822960 DominatorTree &DT = FAM.getResult <DominatorTreeAnalysis>(F);
29832961 AAResults &AA = FAM.getResult <AAManager>(F);
29842962 const DataLayout *DL = &F.getDataLayout ();
2985- VectorCombine Combiner (F, TTI, DT, AA, AC, DL, TryEarlyFoldsOnly);
2963+ VectorCombine Combiner (F, TTI, DT, AA, AC, DL, TTI::TCK_RecipThroughput,
2964+ TryEarlyFoldsOnly);
29862965 if (!Combiner.run ())
29872966 return PreservedAnalyses::all ();
29882967 PreservedAnalyses PA;
0 commit comments