@@ -4063,6 +4063,15 @@ class BoUpSLP {
40634063 }
40644064#endif
40654065
4066+ /// Create a new gather TreeEntry
4067+ TreeEntry *newGatherTreeEntry(ArrayRef<Value *> VL,
4068+ const InstructionsState &S,
4069+ const EdgeInfo &UserTreeIdx,
4070+ ArrayRef<int> ReuseShuffleIndices = {}) {
4071+ auto Invalid = ScheduleBundle::invalid();
4072+ return newTreeEntry(VL, Invalid, S, UserTreeIdx, ReuseShuffleIndices);
4073+ }
4074+
40664075 /// Create a new VectorizableTree entry.
40674076 TreeEntry *newTreeEntry(ArrayRef<Value *> VL, ScheduleBundle &Bundle,
40684077 const InstructionsState &S,
@@ -4251,13 +4260,34 @@ class BoUpSLP {
42514260 bool areAltOperandsProfitable(const InstructionsState &S,
42524261 ArrayRef<Value *> VL) const;
42534262
4263+ /// Contains all the outputs of legality analysis for a list of values to
4264+ /// vectorize.
4265+ class ScalarsVectorizationLegality {
4266+ InstructionsState S;
4267+ bool IsLegal;
4268+ bool TryToFindDuplicates;
4269+ bool TrySplitVectorize;
4270+
4271+ public:
4272+ ScalarsVectorizationLegality(InstructionsState S, bool IsLegal,
4273+ bool TryToFindDuplicates = true,
4274+ bool TrySplitVectorize = false)
4275+ : S(S), IsLegal(IsLegal), TryToFindDuplicates(TryToFindDuplicates),
4276+ TrySplitVectorize(TrySplitVectorize) {
4277+ assert((!IsLegal || (S.valid() && TryToFindDuplicates)) &&
4278+ "Inconsistent state");
4279+ }
4280+ const InstructionsState &getInstructionsState() const { return S; };
4281+ bool isLegal() const { return IsLegal; }
4282+ bool tryToFindDuplicates() const { return TryToFindDuplicates; }
4283+ bool trySplitVectorize() const { return TrySplitVectorize; }
4284+ };
4285+
42544286 /// Checks if the specified list of the instructions/values can be vectorized
42554287 /// in general.
4256- bool isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
4257- const EdgeInfo &UserTreeIdx,
4258- InstructionsState &S,
4259- bool &TryToFindDuplicates,
4260- bool &TrySplitVectorize) const;
4288+ ScalarsVectorizationLegality
4289+ getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth,
4290+ const EdgeInfo &UserTreeIdx) const;
42614291
42624292 /// Checks if the specified list of the instructions/values can be vectorized
42634293 /// and fills required data before actual scheduling of the instructions.
@@ -9734,25 +9764,21 @@ bool BoUpSLP::canBuildSplitNode(ArrayRef<Value *> VL,
97349764 return true;
97359765}
97369766
9737- bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
9738- const EdgeInfo &UserTreeIdx,
9739- InstructionsState &S,
9740- bool &TryToFindDuplicates,
9741- bool &TrySplitVectorize) const {
9767+ BoUpSLP::ScalarsVectorizationLegality
9768+ BoUpSLP::getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth,
9769+ const EdgeInfo &UserTreeIdx) const {
97429770 assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
97439771
9744- S = getSameOpcode(VL, *TLI);
9745- TryToFindDuplicates = true;
9746- TrySplitVectorize = false;
9772+ InstructionsState S = getSameOpcode(VL, *TLI);
97479773
97489774 // Don't go into catchswitch blocks, which can happen with PHIs.
97499775 // Such blocks can only have PHIs and the catchswitch. There is no
97509776 // place to insert a shuffle if we need to, so just avoid that issue.
97519777 if (S && isa<CatchSwitchInst>(S.getMainOp()->getParent()->getTerminator())) {
97529778 LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n");
97539779 // Do not try to pack to avoid extra instructions here.
9754- TryToFindDuplicates = false;
9755- return false;
9780+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false,
9781+ /*TryToFindDuplicates=*/ false) ;
97569782 }
97579783
97589784 // Check if this is a duplicate of another entry.
@@ -9762,14 +9788,14 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
97629788 if (E->isSame(VL)) {
97639789 LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
97649790 << ".\n");
9765- return false;
9791+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false) ;
97669792 }
97679793 SmallPtrSet<Value *, 8> Values(llvm::from_range, E->Scalars);
97689794 if (all_of(VL, [&](Value *V) {
97699795 return isa<PoisonValue>(V) || Values.contains(V);
97709796 })) {
97719797 LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
9772- return false;
9798+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false) ;
97739799 }
97749800 }
97759801 }
@@ -9786,23 +9812,23 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
97869812 cast<Instruction>(I)->getOpcode() == S.getOpcode();
97879813 })))) {
97889814 LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
9789- return false;
9815+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false) ;
97909816 }
97919817
97929818 // Don't handle scalable vectors
97939819 if (S && S.getOpcode() == Instruction::ExtractElement &&
97949820 isa<ScalableVectorType>(
97959821 cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
97969822 LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
9797- return false;
9823+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false) ;
97989824 }
97999825
98009826 // Don't handle vectors.
98019827 if (!SLPReVec && getValueType(VL.front())->isVectorTy()) {
98029828 LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
98039829 // Do not try to pack to avoid extra instructions here.
9804- TryToFindDuplicates = false;
9805- return false;
9830+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false,
9831+ /*TryToFindDuplicates=*/ false) ;
98069832 }
98079833
98089834 // If all of the operands are identical or constant we have a simple solution.
@@ -9892,11 +9918,12 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
98929918 if (!S) {
98939919 LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to "
98949920 "C,S,B,O, small shuffle. \n");
9895- TrySplitVectorize = true;
9896- return false;
9921+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
9922+ /*TryToFindDuplicates=*/true,
9923+ /*TrySplitVectorize=*/true);
98979924 }
98989925 LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n");
9899- return false;
9926+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false) ;
99009927 }
99019928
99029929 // Don't vectorize ephemeral values.
@@ -9906,8 +9933,8 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
99069933 LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
99079934 << ") is ephemeral.\n");
99089935 // Do not try to pack to avoid extra instructions here.
9909- TryToFindDuplicates = false;
9910- return false;
9936+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false,
9937+ /*TryToFindDuplicates=*/ false) ;
99119938 }
99129939 }
99139940 }
@@ -9956,7 +9983,7 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
99569983 if (PreferScalarize) {
99579984 LLVM_DEBUG(dbgs() << "SLP: The instructions are in tree and alternate "
99589985 "node is not profitable.\n");
9959- return false;
9986+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false) ;
99609987 }
99619988 }
99629989
@@ -9965,7 +9992,7 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
99659992 for (Value *V : VL) {
99669993 if (UserIgnoreList->contains(V)) {
99679994 LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
9968- return false;
9995+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false) ;
99699996 }
99709997 }
99719998 }
@@ -9995,9 +10022,9 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
999510022 // Do not vectorize EH and non-returning blocks, not profitable in most
999610023 // cases.
999710024 LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
9998- return false;
10025+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false) ;
999910026 }
10000- return true;
10027+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ true) ;
1000110028}
1000210029
1000310030void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
@@ -10008,7 +10035,6 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
1000810035 SmallVector<int> ReuseShuffleIndices;
1000910036 SmallVector<Value *> VL(VLRef.begin(), VLRef.end());
1001010037
10011- InstructionsState S = InstructionsState::invalid();
1001210038 // Tries to build split node.
1001310039 auto TrySplitNode = [&](const InstructionsState &LocalState) {
1001410040 SmallVector<Value *> Op1, Op2;
@@ -10042,22 +10068,20 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
1004210068 return true;
1004310069 };
1004410070
10045- bool TryToPackDuplicates;
10046- bool TrySplitVectorize ;
10047- if (!isLegalToVectorizeScalars(VL, Depth, UserTreeIdx, S, TryToPackDuplicates,
10048- TrySplitVectorize )) {
10049- if (TrySplitVectorize ) {
10071+ ScalarsVectorizationLegality Legality =
10072+ getScalarsVectorizationLegality(VL, Depth, UserTreeIdx) ;
10073+ const InstructionsState &S = Legality.getInstructionsState();
10074+ if (!Legality.isLegal( )) {
10075+ if (Legality.trySplitVectorize() ) {
1005010076 auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL);
1005110077 // Last chance to try to vectorize alternate node.
1005210078 if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp)))
1005310079 return;
1005410080 }
10055- if (TryToPackDuplicates )
10081+ if (Legality.tryToFindDuplicates() )
1005610082 tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx);
1005710083
10058- auto Invalid = ScheduleBundle::invalid();
10059- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10060- ReuseShuffleIndices);
10084+ newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
1006110085 return;
1006210086 }
1006310087
@@ -10068,9 +10092,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
1006810092 // Check that every instruction appears once in this bundle.
1006910093 if (!tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx,
1007010094 /*TryPad=*/true)) {
10071- auto Invalid = ScheduleBundle::invalid();
10072- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10073- ReuseShuffleIndices);
10095+ newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
1007410096 return;
1007510097 }
1007610098
@@ -10083,9 +10105,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
1008310105 TreeEntry::EntryState State = getScalarsVectorizationState(
1008410106 S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps);
1008510107 if (State == TreeEntry::NeedToGather) {
10086- auto Invalid = ScheduleBundle::invalid();
10087- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10088- ReuseShuffleIndices);
10108+ newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
1008910109 return;
1009010110 }
1009110111
@@ -10109,9 +10129,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
1010910129 // Last chance to try to vectorize alternate node.
1011010130 if (S.isAltShuffle() && ReuseShuffleIndices.empty() && TrySplitNode(S))
1011110131 return;
10112- auto Invalid = ScheduleBundle::invalid();
10113- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10114- ReuseShuffleIndices);
10132+ newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
1011510133 NonScheduledFirst.insert(VL.front());
1011610134 if (S.getOpcode() == Instruction::Load &&
1011710135 BS.ScheduleRegionSize < BS.ScheduleRegionSizeLimit)
0 commit comments