@@ -4869,6 +4869,8 @@ class BoUpSLP {
48694869 assert(hasValidDependencies() &&
48704870 "increment of unscheduled deps would be meaningless");
48714871 UnscheduledDeps += Incr;
4872+ assert(UnscheduledDeps >= 0 &&
4873+ "Expected valid number of unscheduled deps");
48724874 return UnscheduledDeps;
48734875 }
48744876
@@ -5331,6 +5333,28 @@ class BoUpSLP {
53315333 // Check all tree entries, if they have operands replaced by copyable
53325334 // data.
53335335 for (TreeEntry *TE : Entries) {
5336+ unsigned Inc = 0;
5337+ bool IsNonSchedulableWithParentPhiNode =
5338+ TE->doesNotNeedToSchedule() && TE->UserTreeIndex &&
5339+ TE->UserTreeIndex.UserTE->hasState() &&
5340+ TE->UserTreeIndex.UserTE->getOpcode() == Instruction::PHI;
5341+ // Count the number of unique phi nodes, which are the parent for
5342+ // parent entry, and exit, if all the unique phis are processed.
5343+ if (IsNonSchedulableWithParentPhiNode) {
5344+ SmallPtrSet<Value *, 4> ParentsUniqueUsers;
5345+ const TreeEntry *ParentTE = TE->UserTreeIndex.UserTE;
5346+ for (Value *V : ParentTE->Scalars) {
5347+ auto *PHI = dyn_cast<PHINode>(V);
5348+ if (!PHI)
5349+ continue;
5350+ if (ParentsUniqueUsers.insert(PHI).second &&
5351+ is_contained(PHI->incoming_values(), User))
5352+ ++Inc;
5353+ }
5354+ } else {
5355+ Inc = 1;
5356+ }
5357+
53345358 // Check if the user is commutative.
53355359 // The commutatives are handled later, as their operands can be
53365360 // reordered.
@@ -5346,11 +5370,11 @@ class BoUpSLP {
53465370 if (!getScheduleCopyableData(EI, Op))
53475371 continue;
53485372 // Found copyable operand - continue.
5349- ++ OpCnt;
5373+ OpCnt += Inc ;
53505374 continue;
53515375 }
5352- ++ PotentiallyReorderedEntriesCount.try_emplace(TE, 0)
5353- .first->getSecond();
5376+ PotentiallyReorderedEntriesCount.try_emplace(TE, 0)
5377+ .first->getSecond() += Inc ;
53545378 }
53555379 }
53565380 if (PotentiallyReorderedEntriesCount.empty())
@@ -5360,21 +5384,44 @@ class BoUpSLP {
53605384 });
53615385 // Check the commutative/cmp entries.
53625386 for (auto &P : PotentiallyReorderedEntriesCount) {
5387+ SmallPtrSet<Value *, 4> ParentsUniqueUsers;
5388+ bool IsNonSchedulableWithParentPhiNode =
5389+ P.first->doesNotNeedToSchedule() && P.first->UserTreeIndex &&
5390+ P.first->UserTreeIndex.UserTE->hasState() &&
5391+ P.first->UserTreeIndex.UserTE->getOpcode() == Instruction::PHI;
53635392 auto *It = find(P.first->Scalars, User);
5364- assert(It != P.first->Scalars.end() && "User is not in the tree entry");
5365- int Lane = std::distance(P.first->Scalars.begin(), It);
5366- assert(Lane >= 0 && "Lane is not found");
5367- if (isa<StoreInst>(User) && !P.first->ReorderIndices.empty())
5368- Lane = P.first->ReorderIndices[Lane];
5369- assert(Lane < static_cast<int>(P.first->Scalars.size()) &&
5370- "Couldn't find extract lane");
5371- for (unsigned OpIdx :
5372- seq<unsigned>(::getNumberOfPotentiallyCommutativeOps(
5373- P.first->getMainOp()))) {
5374- if (P.first->getOperand(OpIdx)[Lane] == Op &&
5375- getScheduleCopyableData(EdgeInfo(P.first, OpIdx), Op))
5376- --P.getSecond();
5377- }
5393+ do {
5394+ assert(It != P.first->Scalars.end() &&
5395+ "User is not in the tree entry");
5396+ int Lane = std::distance(P.first->Scalars.begin(), It);
5397+ assert(Lane >= 0 && "Lane is not found");
5398+ if (isa<StoreInst>(User) && !P.first->ReorderIndices.empty())
5399+ Lane = P.first->ReorderIndices[Lane];
5400+ assert(Lane < static_cast<int>(P.first->Scalars.size()) &&
5401+ "Couldn't find extract lane");
5402+ // Count the number of unique phi nodes, which are the parent for
5403+ // parent entry, and exit, if all the unique phis are processed.
5404+ if (IsNonSchedulableWithParentPhiNode) {
5405+ const TreeEntry *ParentTE = P.first->UserTreeIndex.UserTE;
5406+ Value *User = ParentTE->Scalars[Lane];
5407+ if (!ParentsUniqueUsers.insert(User).second) {
5408+ It =
5409+ find(make_range(std::next(It), P.first->Scalars.end()), User);
5410+ continue;
5411+ }
5412+ }
5413+ for (unsigned OpIdx :
5414+ seq<unsigned>(::getNumberOfPotentiallyCommutativeOps(
5415+ P.first->getMainOp()))) {
5416+ if (P.first->getOperand(OpIdx)[Lane] == Op &&
5417+ getScheduleCopyableData(EdgeInfo(P.first, OpIdx), Op))
5418+ --P.getSecond();
5419+ }
5420+ // If parent node is schedulable, it will be handled correctly.
5421+ if (!IsNonSchedulableWithParentPhiNode)
5422+ break;
5423+ It = find(make_range(std::next(It), P.first->Scalars.end()), User);
5424+ } while (It != P.first->Scalars.end());
53785425 }
53795426 return all_of(PotentiallyReorderedEntriesCount,
53805427 [&](const std::pair<const TreeEntry *, unsigned> &P) {
@@ -5648,8 +5695,11 @@ class BoUpSLP {
56485695 const TreeEntry *ParentTE =
56495696 Bundle->getTreeEntry()->UserTreeIndex.UserTE;
56505697 Value *User = ParentTE->Scalars[Lane];
5651- if (!ParentsUniqueUsers.insert(User).second)
5652- break;
5698+ if (!ParentsUniqueUsers.insert(User).second) {
5699+ It = std::find(std::next(It),
5700+ Bundle->getTreeEntry()->Scalars.end(), In);
5701+ continue;
5702+ }
56535703 }
56545704
56555705 for (unsigned OpIdx :
@@ -10745,10 +10795,11 @@ class InstructionsCompatibilityAnalysis {
1074510795 /// Checks if the opcode is supported as the main opcode for copyable
1074610796 /// elements.
1074710797 static bool isSupportedOpcode(const unsigned Opcode) {
10748- return Opcode == Instruction::Add || Opcode == Instruction::LShr ||
10749- Opcode == Instruction::Shl || Opcode == Instruction::SDiv ||
10750- Opcode == Instruction::UDiv || Opcode == Instruction::And ||
10751- Opcode == Instruction::Or || Opcode == Instruction::Xor;
10798+ return Opcode == Instruction::Add || Opcode == Instruction::Sub ||
10799+ Opcode == Instruction::LShr || Opcode == Instruction::Shl ||
10800+ Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
10801+ Opcode == Instruction::And || Opcode == Instruction::Or ||
10802+ Opcode == Instruction::Xor;
1075210803 }
1075310804
1075410805 /// Identifies the best candidate value, which represents main opcode
@@ -10808,8 +10859,12 @@ class InstructionsCompatibilityAnalysis {
1080810859 for (const auto &P : Candidates) {
1080910860 if (P.second.size() < BestOpcodeNum)
1081010861 continue;
10862+ // If have inner dependencies - skip.
10863+ if (any_of(P.second,
10864+ [&](Instruction *I) { return Operands.contains(I); }))
10865+ continue;
1081110866 for (Instruction *I : P.second) {
10812- if (IsSupportedInstruction(I, AnyUndef) && !Operands.contains(I) ) {
10867+ if (IsSupportedInstruction(I, AnyUndef)) {
1081310868 MainOp = I;
1081410869 BestOpcodeNum = P.second.size();
1081510870 break;
@@ -11069,6 +11124,7 @@ class InstructionsCompatibilityAnalysis {
1106911124 getWidenedType(S.getMainOp()->getType(), VL.size());
1107011125 switch (MainOpcode) {
1107111126 case Instruction::Add:
11127+ case Instruction::Sub:
1107211128 case Instruction::LShr:
1107311129 case Instruction::Shl:
1107411130 case Instruction::SDiv:
@@ -19686,8 +19742,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
1968619742 V = ::propagateMetadata(I, E->Scalars);
1968719743 // Drop nuw flags for abs(sub(commutative), true).
1968819744 if (!MinBWs.contains(E) && ShuffleOrOp == Instruction::Sub &&
19689- any_of(E->Scalars, [](Value *V) {
19690- return isa<PoisonValue>(V) || isCommutative(cast<Instruction>(V));
19745+ any_of(E->Scalars, [E](Value *V) {
19746+ return isa<PoisonValue>(V) ||
19747+ (E->hasCopyableElements() && E->isCopyableElement(V)) ||
19748+ isCommutative(cast<Instruction>(V));
1969119749 }))
1969219750 I->setHasNoUnsignedWrap(/*b=*/false);
1969319751 }
@@ -20091,9 +20149,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
2009120149 // Drop nuw flags for abs(sub(commutative), true).
2009220150 if (auto *I = dyn_cast<Instruction>(Vec);
2009320151 I && Opcode == Instruction::Sub && !MinBWs.contains(E) &&
20094- any_of(E->Scalars, [](Value *V) {
20152+ any_of(E->Scalars, [E ](Value *V) {
2009520153 if (isa<PoisonValue>(V))
2009620154 return false;
20155+ if (E->hasCopyableElements() && E->isCopyableElement(V))
20156+ return false;
2009720157 auto *IV = cast<Instruction>(V);
2009820158 return IV->getOpcode() == Instruction::Sub && isCommutative(IV);
2009920159 }))
0 commit comments