@@ -3913,6 +3913,14 @@ class BoUpSLP {
39133913 bool areAltOperandsProfitable(const InstructionsState &S,
39143914 ArrayRef<Value *> VL) const;
39153915
3916+ /// Checks if the specified list of the instructions/values can be vectorized
3917+ /// in general.
3918+ bool isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
3919+ const EdgeInfo &UserTreeIdx,
3920+ InstructionsState &S,
3921+ bool &TryToFindDuplicates,
3922+ bool &TrySplitVectorize) const;
3923+
39163924 /// Checks if the specified list of the instructions/values can be vectorized
39173925 /// and fills required data before actual scheduling of the instructions.
39183926 TreeEntry::EntryState
@@ -9329,35 +9337,25 @@ bool BoUpSLP::canBuildSplitNode(ArrayRef<Value *> VL,
93299337 return true;
93309338}
93319339
9332- void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9333- const EdgeInfo &UserTreeIdx,
9334- unsigned InterleaveFactor) {
9340+ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
9341+ const EdgeInfo &UserTreeIdx,
9342+ InstructionsState &S,
9343+ bool &TryToFindDuplicates,
9344+ bool &TrySplitVectorize) const {
93359345 assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
93369346
9337- SmallVector<int> ReuseShuffleIndices;
9338- SmallVector<Value *> NonUniqueValueVL(VL.begin(), VL.end());
9339- auto TryToFindDuplicates = [&](const InstructionsState &S,
9340- bool DoNotFail = false) {
9341- if (tryToFindDuplicates(NonUniqueValueVL, ReuseShuffleIndices, *TTI, *TLI,
9342- S, UserTreeIdx, DoNotFail)) {
9343- VL = NonUniqueValueVL;
9344- return true;
9345- }
9346- auto Invalid = ScheduleBundle::invalid();
9347- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
9348- return false;
9349- };
9350-
9351- InstructionsState S = getSameOpcode(VL, *TLI);
9347+ S = getSameOpcode(VL, *TLI);
9348+ TryToFindDuplicates = true;
9349+ TrySplitVectorize = false;
93529350
93539351 // Don't go into catchswitch blocks, which can happen with PHIs.
93549352 // Such blocks can only have PHIs and the catchswitch. There is no
93559353 // place to insert a shuffle if we need to, so just avoid that issue.
93569354 if (S && isa<CatchSwitchInst>(S.getMainOp()->getParent()->getTerminator())) {
93579355 LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n");
9358- auto Invalid = ScheduleBundle::invalid();
9359- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx) ;
9360- return;
9356+ // Do not try to pack to avoid extra instructions here.
9357+ TryToFindDuplicates = false ;
9358+ return false ;
93619359 }
93629360
93639361 // Check if this is a duplicate of another entry.
@@ -9367,24 +9365,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
93679365 if (E->isSame(VL)) {
93689366 LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
93699367 << ".\n");
9370- if (TryToFindDuplicates(S)) {
9371- auto Invalid = ScheduleBundle::invalid();
9372- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9373- ReuseShuffleIndices);
9374- }
9375- return;
9368+ return false;
93769369 }
93779370 SmallPtrSet<Value *, 8> Values(llvm::from_range, E->Scalars);
93789371 if (all_of(VL, [&](Value *V) {
93799372 return isa<PoisonValue>(V) || Values.contains(V);
93809373 })) {
93819374 LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
9382- if (TryToFindDuplicates(S)) {
9383- auto Invalid = ScheduleBundle::invalid();
9384- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9385- ReuseShuffleIndices);
9386- }
9387- return;
9375+ return false;
93889376 }
93899377 }
93909378 }
@@ -9401,75 +9389,31 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
94019389 cast<Instruction>(I)->getOpcode() == S.getOpcode();
94029390 })))) {
94039391 LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
9404- if (TryToFindDuplicates(S)) {
9405- auto Invalid = ScheduleBundle::invalid();
9406- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9407- ReuseShuffleIndices);
9408- }
9409- return;
9392+ return false;
94109393 }
94119394
94129395 // Don't handle scalable vectors
94139396 if (S && S.getOpcode() == Instruction::ExtractElement &&
94149397 isa<ScalableVectorType>(
94159398 cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
94169399 LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
9417- if (TryToFindDuplicates(S)) {
9418- auto Invalid = ScheduleBundle::invalid();
9419- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9420- ReuseShuffleIndices);
9421- }
9422- return;
9400+ return false;
94239401 }
94249402
94259403 // Don't handle vectors.
94269404 if (!SLPReVec && getValueType(VL.front())->isVectorTy()) {
94279405 LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
9428- auto Invalid = ScheduleBundle::invalid();
9429- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx) ;
9430- return;
9406+ // Do not try to pack to avoid extra instructions here.
9407+ TryToFindDuplicates = false ;
9408+ return false ;
94319409 }
94329410
9433- // Tries to build split node.
9434- auto TrySplitNode = [&](const InstructionsState &LocalState) {
9435- SmallVector<Value *> Op1, Op2;
9436- OrdersType ReorderIndices;
9437- if (!canBuildSplitNode(VL, LocalState, Op1, Op2, ReorderIndices))
9438- return false;
9439-
9440- SmallVector<Value *> NewVL(VL.size());
9441- copy(Op1, NewVL.begin());
9442- copy(Op2, std::next(NewVL.begin(), Op1.size()));
9443- auto Invalid = ScheduleBundle::invalid();
9444- auto *TE = newTreeEntry(VL, TreeEntry::SplitVectorize, Invalid, LocalState,
9445- UserTreeIdx, {}, ReorderIndices);
9446- LLVM_DEBUG(dbgs() << "SLP: split alternate node.\n"; TE->dump());
9447- auto AddNode = [&](ArrayRef<Value *> Op, unsigned Idx) {
9448- InstructionsState S = getSameOpcode(Op, *TLI);
9449- if (S && (isa<LoadInst>(S.getMainOp()) ||
9450- getSameValuesTreeEntry(S.getMainOp(), Op, /*SameVF=*/true))) {
9451- // Build gather node for loads, they will be gathered later.
9452- TE->CombinedEntriesWithIndices.emplace_back(VectorizableTree.size(),
9453- Idx == 0 ? 0 : Op1.size());
9454- (void)newTreeEntry(Op, TreeEntry::NeedToGather, Invalid, S, {TE, Idx});
9455- } else {
9456- TE->CombinedEntriesWithIndices.emplace_back(VectorizableTree.size(),
9457- Idx == 0 ? 0 : Op1.size());
9458- buildTree_rec(Op, Depth, {TE, Idx});
9459- }
9460- };
9461- AddNode(Op1, 0);
9462- AddNode(Op2, 1);
9463- return true;
9464- };
9465-
94669411 // If all of the operands are identical or constant we have a simple solution.
94679412 // If we deal with insert/extract instructions, they all must have constant
94689413 // indices, otherwise we should gather them, not try to vectorize.
94699414 // If alternate op node with 2 elements with gathered operands - do not
94709415 // vectorize.
9471- auto &&NotProfitableForVectorization = [&S, this,
9472- Depth](ArrayRef<Value *> VL) {
9416+ auto NotProfitableForVectorization = [&S, this, Depth](ArrayRef<Value *> VL) {
94739417 if (!S || !S.isAltShuffle() || VL.size() > 2)
94749418 return false;
94759419 if (VectorizableTree.size() < MinTreeSize)
@@ -9549,18 +9493,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
95499493 !all_of(VL, isVectorLikeInstWithConstOps)) ||
95509494 NotProfitableForVectorization(VL)) {
95519495 if (!S) {
9552- auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL);
9553- // Last chance to try to vectorize alternate node.
9554- if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp)))
9555- return;
9496+ LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to "
9497+ "C,S,B,O, small shuffle. \n");
9498+ TrySplitVectorize = true;
9499+ return false ;
95569500 }
95579501 LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n");
9558- if (TryToFindDuplicates(S)) {
9559- auto Invalid = ScheduleBundle::invalid();
9560- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9561- ReuseShuffleIndices);
9562- }
9563- return;
9502+ return false;
95649503 }
95659504
95669505 // Don't vectorize ephemeral values.
@@ -9569,9 +9508,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
95699508 if (EphValues.count(V)) {
95709509 LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
95719510 << ") is ephemeral.\n");
9572- auto Invalid = ScheduleBundle::invalid();
9573- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx) ;
9574- return;
9511+ // Do not try to pack to avoid extra instructions here.
9512+ TryToFindDuplicates = false ;
9513+ return false ;
95759514 }
95769515 }
95779516 }
@@ -9620,12 +9559,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
96209559 if (PreferScalarize) {
96219560 LLVM_DEBUG(dbgs() << "SLP: The instructions are in tree and alternate "
96229561 "node is not profitable.\n");
9623- if (TryToFindDuplicates(S)) {
9624- auto Invalid = ScheduleBundle::invalid();
9625- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9626- ReuseShuffleIndices);
9627- }
9628- return;
9562+ return false;
96299563 }
96309564 }
96319565
@@ -9634,12 +9568,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
96349568 for (Value *V : VL) {
96359569 if (UserIgnoreList->contains(V)) {
96369570 LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
9637- if (TryToFindDuplicates(S)) {
9638- auto Invalid = ScheduleBundle::invalid();
9639- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9640- ReuseShuffleIndices);
9641- }
9642- return;
9571+ return false;
96439572 }
96449573 }
96459574 }
@@ -9669,8 +9598,79 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
96699598 // Do not vectorize EH and non-returning blocks, not profitable in most
96709599 // cases.
96719600 LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
9601+ return false;
9602+ }
9603+ return true;
9604+ }
9605+
9606+ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9607+ const EdgeInfo &UserTreeIdx,
9608+ unsigned InterleaveFactor) {
9609+ assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
9610+
9611+ SmallVector<int> ReuseShuffleIndices;
9612+ SmallVector<Value *> NonUniqueValueVL(VL.begin(), VL.end());
9613+ auto TryToFindDuplicates = [&](const InstructionsState &S,
9614+ bool DoNotFail = false) {
9615+ if (tryToFindDuplicates(NonUniqueValueVL, ReuseShuffleIndices, *TTI, *TLI,
9616+ S, UserTreeIdx, DoNotFail)) {
9617+ VL = NonUniqueValueVL;
9618+ return true;
9619+ }
96729620 auto Invalid = ScheduleBundle::invalid();
96739621 newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
9622+ return false;
9623+ };
9624+
9625+ InstructionsState S = InstructionsState::invalid();
9626+ // Tries to build split node.
9627+ auto TrySplitNode = [&](const InstructionsState &LocalState) {
9628+ SmallVector<Value *> Op1, Op2;
9629+ OrdersType ReorderIndices;
9630+ if (!canBuildSplitNode(VL, LocalState, Op1, Op2, ReorderIndices))
9631+ return false;
9632+
9633+ SmallVector<Value *> NewVL(VL.size());
9634+ copy(Op1, NewVL.begin());
9635+ copy(Op2, std::next(NewVL.begin(), Op1.size()));
9636+ auto Invalid = ScheduleBundle::invalid();
9637+ auto *TE = newTreeEntry(VL, TreeEntry::SplitVectorize, Invalid, LocalState,
9638+ UserTreeIdx, {}, ReorderIndices);
9639+ LLVM_DEBUG(dbgs() << "SLP: split alternate node.\n"; TE->dump());
9640+ auto AddNode = [&](ArrayRef<Value *> Op, unsigned Idx) {
9641+ InstructionsState S = getSameOpcode(Op, *TLI);
9642+ if (S && (isa<LoadInst>(S.getMainOp()) ||
9643+ getSameValuesTreeEntry(S.getMainOp(), Op, /*SameVF=*/true))) {
9644+ // Build gather node for loads, they will be gathered later.
9645+ TE->CombinedEntriesWithIndices.emplace_back(VectorizableTree.size(),
9646+ Idx == 0 ? 0 : Op1.size());
9647+ (void)newTreeEntry(Op, TreeEntry::NeedToGather, Invalid, S, {TE, Idx});
9648+ } else {
9649+ TE->CombinedEntriesWithIndices.emplace_back(VectorizableTree.size(),
9650+ Idx == 0 ? 0 : Op1.size());
9651+ buildTree_rec(Op, Depth, {TE, Idx});
9652+ }
9653+ };
9654+ AddNode(Op1, 0);
9655+ AddNode(Op2, 1);
9656+ return true;
9657+ };
9658+
9659+ bool TryToPackDuplicates;
9660+ bool TrySplitVectorize;
9661+ if (!isLegalToVectorizeScalars(VL, Depth, UserTreeIdx, S, TryToPackDuplicates,
9662+ TrySplitVectorize)) {
9663+ if (TrySplitVectorize) {
9664+ auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL);
9665+ // Last chance to try to vectorize alternate node.
9666+ if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp)))
9667+ return;
9668+ }
9669+ if (!TryToPackDuplicates || TryToFindDuplicates(S)) {
9670+ auto Invalid = ScheduleBundle::invalid();
9671+ newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9672+ ReuseShuffleIndices);
9673+ }
96749674 return;
96759675 }
96769676
@@ -9683,6 +9683,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
96839683 return;
96849684
96859685 // Perform specific checks for each particular instruction kind.
9686+ bool IsScatterVectorizeUserTE =
9687+ UserTreeIdx.UserTE &&
9688+ UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
96869689 OrdersType CurrentOrder;
96879690 SmallVector<Value *> PointerOps;
96889691 TreeEntry::EntryState State = getScalarsVectorizationState(
@@ -9694,6 +9697,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
96949697 return;
96959698 }
96969699
9700+ Instruction *VL0 = S.getMainOp();
9701+ BasicBlock *BB = VL0->getParent();
96979702 auto &BSRef = BlocksSchedules[BB];
96989703 if (!BSRef)
96999704 BSRef = std::make_unique<BlockScheduling>(BB);
0 commit comments