Skip to content

Commit dd5ba69

Browse files
committed
[SLP]Recalculate deps for potential control-dependent schedule data
After clearing the dependencies in copyable data, need to recalculate dependencies for the original ScheduleData, if it can be marked as control dependent. Fixes #153289
1 parent 0f6d3ad commit dd5ba69

File tree

2 files changed

+78
-9
lines changed

2 files changed

+78
-9
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5689,7 +5689,8 @@ class BoUpSLP {
56895689
/// Updates the dependency information of a bundle and of all instructions/
56905690
/// bundles which depend on the original bundle.
56915691
void calculateDependencies(ScheduleBundle &Bundle, bool InsertInReadyList,
5692-
BoUpSLP *SLP);
5692+
BoUpSLP *SLP,
5693+
ArrayRef<ScheduleData *> ControlDeps = {});
56935694

56945695
/// Sets all instruction in the scheduling region to un-scheduled.
56955696
void resetSchedule();
@@ -20727,15 +20728,21 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
2072720728
LLVM_DEBUG(dbgs() << "SLP: bundle: " << *S.getMainOp() << "\n");
2072820729

2072920730
auto TryScheduleBundleImpl = [=](bool ReSchedule, ScheduleBundle &Bundle) {
20730-
// Clear deps or reculate the region, if the memory instruction is a
20731-
// copyable. It may have memory deps, which must be reaculated.
20731+
// Clear deps or recalculate the region, if the memory instruction is a
20732+
// copyable. It may have memory deps, which must be recalculated.
20733+
SmallVector<ScheduleData *> ControlDependentMembers;
2073220734
auto CheckIfNeedToClearDeps = [&](ScheduleBundle &Bundle) {
2073320735
SmallDenseMap<std::pair<Instruction *, Value *>, unsigned> UserOpToNumOps;
2073420736
for (ScheduleEntity *SE : Bundle.getBundle()) {
2073520737
if (ScheduleCopyableData *SD = dyn_cast<ScheduleCopyableData>(SE)) {
2073620738
if (ScheduleData *BundleMember = getScheduleData(SD->getInst());
20737-
BundleMember && BundleMember->hasValidDependencies())
20739+
BundleMember && BundleMember->hasValidDependencies()) {
2073820740
BundleMember->clearDirectDependencies();
20741+
if (RegionHasStackSave ||
20742+
!isGuaranteedToTransferExecutionToSuccessor(
20743+
BundleMember->getInst()))
20744+
ControlDependentMembers.push_back(BundleMember);
20745+
}
2073920746
continue;
2074020747
}
2074120748
auto *SD = cast<ScheduleData>(SE);
@@ -20748,8 +20755,12 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
2074820755
if (auto *Op = dyn_cast<Instruction>(U.get());
2074920756
Op && areAllOperandsReplacedByCopyableData(SD->getInst(), Op,
2075020757
*SLP, NumOps)) {
20751-
if (ScheduleData *OpSD = getScheduleData(Op))
20758+
if (ScheduleData *OpSD = getScheduleData(Op)) {
2075220759
OpSD->clearDirectDependencies();
20760+
if (RegionHasStackSave ||
20761+
!isGuaranteedToTransferExecutionToSuccessor(OpSD->getInst()))
20762+
ControlDependentMembers.push_back(OpSD);
20763+
}
2075320764
}
2075420765
}
2075520766
}
@@ -20783,7 +20794,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
2078320794
CheckIfNeedToClearDeps(Bundle);
2078420795
LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << Bundle << " in block "
2078520796
<< BB->getName() << "\n");
20786-
calculateDependencies(Bundle, /*InsertInReadyList=*/!ReSchedule, SLP);
20797+
calculateDependencies(Bundle, /*InsertInReadyList=*/!ReSchedule, SLP,
20798+
ControlDependentMembers);
2078720799
}
2078820800

2078920801
if (ReSchedule) {
@@ -21048,9 +21060,9 @@ void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
2104821060
}
2104921061
}
2105021062

21051-
void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleBundle &Bundle,
21052-
bool InsertInReadyList,
21053-
BoUpSLP *SLP) {
21063+
void BoUpSLP::BlockScheduling::calculateDependencies(
21064+
ScheduleBundle &Bundle, bool InsertInReadyList, BoUpSLP *SLP,
21065+
ArrayRef<ScheduleData *> ControlDeps) {
2105421066
SmallVector<ScheduleEntity *> WorkList;
2105521067
auto ProcessNode = [&](ScheduleEntity *SE) {
2105621068
if (auto *CD = dyn_cast<ScheduleCopyableData>(SE)) {
@@ -21293,6 +21305,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleBundle &Bundle,
2129321305
};
2129421306

2129521307
WorkList.push_back(Bundle.getBundle().front());
21308+
WorkList.append(ControlDeps.begin(), ControlDeps.end());
2129621309
SmallPtrSet<ScheduleBundle *, 16> Visited;
2129721310
while (!WorkList.empty()) {
2129821311
ScheduleEntity *SD = WorkList.pop_back_val();
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define i32 @test(i32 %0, i32 %1) {
5+
; CHECK-LABEL: define i32 @test(
6+
; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[K:%.*]] = alloca [4 x i32], align 16
9+
; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP0]], [[TMP1]]
10+
; CHECK-NEXT: [[SUB2:%.*]] = add i32 [[ADD1]], -1
11+
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (ptr, ...) @printf(ptr null, i32 [[ADD1]])
12+
; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[TMP1]], -1
13+
; CHECK-NEXT: [[SUB3:%.*]] = add i32 [[ADD2]], [[CALL]]
14+
; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[SUB3]], [[TMP0]]
15+
; CHECK-NEXT: store i32 [[ADD4]], ptr [[K]], align 16
16+
; CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr i8, ptr [[K]], i64 4
17+
; CHECK-NEXT: store i32 0, ptr [[ARRAYINIT_ELEMENT]], align 4
18+
; CHECK-NEXT: [[ARRAYINIT_ELEMENT5:%.*]] = getelementptr i8, ptr [[K]], i64 8
19+
; CHECK-NEXT: [[ADD7:%.*]] = add i32 [[ADD2]], [[SUB2]]
20+
; CHECK-NEXT: [[SUB8:%.*]] = add i32 [[ADD7]], [[TMP0]]
21+
; CHECK-NEXT: store i32 [[SUB8]], ptr [[ARRAYINIT_ELEMENT5]], align 8
22+
; CHECK-NEXT: [[ARRAYINIT_ELEMENT9:%.*]] = getelementptr i8, ptr [[K]], i64 12
23+
; CHECK-NEXT: [[ADD13:%.*]] = add i32 [[TMP1]], 1
24+
; CHECK-NEXT: [[ADD10:%.*]] = add i32 [[ADD13]], [[TMP0]]
25+
; CHECK-NEXT: [[ADD11:%.*]] = add i32 [[ADD10]], [[ADD1]]
26+
; CHECK-NEXT: [[ADD12:%.*]] = add i32 [[ADD11]], [[TMP0]]
27+
; CHECK-NEXT: store i32 [[ADD12]], ptr [[ARRAYINIT_ELEMENT9]], align 4
28+
; CHECK-NEXT: [[CALL15:%.*]] = call i32 (ptr, ...) @printf(ptr null, ptr [[K]])
29+
; CHECK-NEXT: ret i32 [[CALL15]]
30+
;
31+
entry:
32+
%k = alloca [4 x i32], align 16
33+
%add1 = add i32 %0, %1
34+
%sub2 = add i32 %add1, -1
35+
%call = tail call i32 (ptr, ...) @printf(ptr null, i32 %add1)
36+
%add2 = add i32 %1, -1
37+
%sub3 = add i32 %add2, %call
38+
%add4 = add i32 %sub3, %0
39+
store i32 %add4, ptr %k, align 16
40+
%arrayinit.element = getelementptr i8, ptr %k, i64 4
41+
store i32 0, ptr %arrayinit.element, align 4
42+
%arrayinit.element5 = getelementptr i8, ptr %k, i64 8
43+
%add7 = add i32 %add2, %sub2
44+
%sub8 = add i32 %add7, %0
45+
store i32 %sub8, ptr %arrayinit.element5, align 8
46+
%arrayinit.element9 = getelementptr i8, ptr %k, i64 12
47+
%add13 = add i32 %1, 1
48+
%add10 = add i32 %add13, %0
49+
%add11 = add i32 %add10, %add1
50+
%add12 = add i32 %add11, %0
51+
store i32 %add12, ptr %arrayinit.element9, align 4
52+
%call15 = call i32 (ptr, ...) @printf(ptr null, ptr %k)
53+
ret i32 %call15
54+
}
55+
56+
declare i32 @printf(ptr, ...)

0 commit comments

Comments
 (0)