Skip to content

Commit e7f370f

Browse files
committed
[SLP] Check all copyable children for non-schedulable parent nodes
If the parent node is non-schedulable and it includes several copies of the same instruction, its operand might be replaced by the copyable nodes in multiple children nodes, and if the instruction is commutative, they can be used in different operands. The compiler shall consider this opportunity, taking into account that non-copyable children are scheduled only ones for the same parent instruction. Fixes #164242
1 parent 32b534b commit e7f370f

File tree

2 files changed

+82
-32
lines changed

2 files changed

+82
-32
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 45 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5578,14 +5578,16 @@ class BoUpSLP {
55785578
// Decrement the unscheduled counter and insert to ready list if
55795579
// ready.
55805580
auto DecrUnschedForInst = [&](Instruction *I, TreeEntry *UserTE,
5581-
unsigned OpIdx) {
5581+
unsigned OpIdx, bool FirstRun = false) {
55825582
if (!ScheduleCopyableDataMap.empty()) {
55835583
const EdgeInfo EI = {UserTE, OpIdx};
55845584
if (ScheduleCopyableData *CD = getScheduleCopyableData(EI, I)) {
55855585
DecrUnsched(CD, /*IsControl=*/false);
55865586
return;
55875587
}
55885588
}
5589+
if (!FirstRun)
5590+
return;
55895591
auto It = OperandsUses.find(I);
55905592
assert(It != OperandsUses.end() && "Operand not found");
55915593
if (It->second > 0) {
@@ -5602,37 +5604,48 @@ class BoUpSLP {
56025604
break;
56035605
// Need to search for the lane since the tree entry can be
56045606
// reordered.
5605-
int Lane = std::distance(Bundle->getTreeEntry()->Scalars.begin(),
5606-
find(Bundle->getTreeEntry()->Scalars, In));
5607-
assert(Lane >= 0 && "Lane not set");
5608-
if (isa<StoreInst>(In) &&
5609-
!Bundle->getTreeEntry()->ReorderIndices.empty())
5610-
Lane = Bundle->getTreeEntry()->ReorderIndices[Lane];
5611-
assert(Lane < static_cast<int>(
5612-
Bundle->getTreeEntry()->Scalars.size()) &&
5613-
"Couldn't find extract lane");
5614-
5615-
// Since vectorization tree is being built recursively this
5616-
// assertion ensures that the tree entry has all operands set before
5617-
// reaching this code. Couple of exceptions known at the moment are
5618-
// extracts where their second (immediate) operand is not added.
5619-
// Since immediates do not affect scheduler behavior this is
5620-
// considered okay.
5621-
assert(In &&
5622-
(isa<ExtractValueInst, ExtractElementInst, CallBase>(In) ||
5623-
In->getNumOperands() ==
5624-
Bundle->getTreeEntry()->getNumOperands() ||
5625-
Bundle->getTreeEntry()->isCopyableElement(In)) &&
5626-
"Missed TreeEntry operands?");
5627-
5628-
for (unsigned OpIdx :
5629-
seq<unsigned>(Bundle->getTreeEntry()->getNumOperands()))
5630-
if (auto *I = dyn_cast<Instruction>(
5631-
Bundle->getTreeEntry()->getOperand(OpIdx)[Lane])) {
5632-
LLVM_DEBUG(dbgs() << "SLP: check for readiness (def): " << *I
5633-
<< "\n");
5634-
DecrUnschedForInst(I, Bundle->getTreeEntry(), OpIdx);
5635-
}
5607+
auto *It = find(Bundle->getTreeEntry()->Scalars, In);
5608+
bool FirstRun = true;
5609+
do {
5610+
int Lane =
5611+
std::distance(Bundle->getTreeEntry()->Scalars.begin(), It);
5612+
assert(Lane >= 0 && "Lane not set");
5613+
if (isa<StoreInst>(In) &&
5614+
!Bundle->getTreeEntry()->ReorderIndices.empty())
5615+
Lane = Bundle->getTreeEntry()->ReorderIndices[Lane];
5616+
assert(Lane < static_cast<int>(
5617+
Bundle->getTreeEntry()->Scalars.size()) &&
5618+
"Couldn't find extract lane");
5619+
5620+
// Since vectorization tree is being built recursively this
5621+
// assertion ensures that the tree entry has all operands set
5622+
// before reaching this code. Couple of exceptions known at the
5623+
// moment are extracts where their second (immediate) operand is
5624+
// not added. Since immediates do not affect scheduler behavior
5625+
// this is considered okay.
5626+
assert(In &&
5627+
(isa<ExtractValueInst, ExtractElementInst, CallBase>(In) ||
5628+
In->getNumOperands() ==
5629+
Bundle->getTreeEntry()->getNumOperands() ||
5630+
Bundle->getTreeEntry()->isCopyableElement(In)) &&
5631+
"Missed TreeEntry operands?");
5632+
5633+
for (unsigned OpIdx :
5634+
seq<unsigned>(Bundle->getTreeEntry()->getNumOperands()))
5635+
if (auto *I = dyn_cast<Instruction>(
5636+
Bundle->getTreeEntry()->getOperand(OpIdx)[Lane])) {
5637+
LLVM_DEBUG(dbgs() << "SLP: check for readiness (def): "
5638+
<< *I << "\n");
5639+
DecrUnschedForInst(I, Bundle->getTreeEntry(), OpIdx,
5640+
FirstRun);
5641+
}
5642+
// If parent node is schedulable, it will be handle correctly.
5643+
if (!Bundle->getTreeEntry()->doesNotNeedToSchedule())
5644+
break;
5645+
It = std::find(std::next(It),
5646+
Bundle->getTreeEntry()->Scalars.end(), In);
5647+
FirstRun = false;
5648+
} while (It != Bundle->getTreeEntry()->Scalars.end());
56365649
}
56375650
} else {
56385651
// If BundleMember is a stand-alone instruction, no operand reordering
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s -slp-threshold=-99999 | FileCheck %s
3+
4+
define void @test() {
5+
; CHECK-LABEL: define void @test() {
6+
; CHECK-NEXT: [[BB:.*]]:
7+
; CHECK-NEXT: br i1 false, label %[[BB1:.*]], label %[[BB6:.*]]
8+
; CHECK: [[BB1]]:
9+
; CHECK-NEXT: br label %[[BB6]]
10+
; CHECK: [[BB6]]:
11+
; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ <i32 0, i32 0, i32 poison, i32 0>, %[[BB]] ], [ <i32 0, i32 0, i32 -1, i32 -1>, %[[BB1]] ]
12+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
13+
; CHECK-NEXT: [[OR:%.*]] = or i32 [[TMP1]], 0
14+
; CHECK-NEXT: ret void
15+
;
16+
bb:
17+
br i1 false, label %bb1, label %bb6
18+
19+
bb1:
20+
%add = add i32 0, 0
21+
%shl = shl i32 %add, 0
22+
%sub = sub i32 0, 1
23+
%add2 = add i32 %sub, %shl
24+
%add3 = add i32 0, 0
25+
%shl4 = shl i32 %add3, 0
26+
%ashr = ashr i32 %shl4, 1
27+
%add5 = add i32 0, 0
28+
br label %bb6
29+
30+
bb6:
31+
%phi = phi i32 [ poison, %bb ], [ %add2, %bb1 ]
32+
%phi7 = phi i32 [ 0, %bb ], [ %ashr, %bb1 ]
33+
%phi8 = phi i32 [ 0, %bb ], [ %add2, %bb1 ]
34+
%phi9 = phi i32 [ 0, %bb ], [ %add5, %bb1 ]
35+
%or = or i32 %phi8, 0
36+
ret void
37+
}

0 commit comments

Comments
 (0)