Skip to content

Commit 8c41859

Browse files
committed
[SLP]Clear the operands deps of non-schedulable nodes, if previously all operands were copyable
If all operands of the non-schedulable nodes were previously only copyables, need to clear the dependencies of the original schedule data for such copyable operands and recalculate them to correctly handle number of dependecies. Fixes #159406
1 parent 3e0c58b commit 8c41859

File tree

2 files changed

+190
-4
lines changed

2 files changed

+190
-4
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20804,12 +20804,45 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
2080420804
const EdgeInfo &EI) {
2080520805
// No need to schedule PHIs, insertelement, extractelement and extractvalue
2080620806
// instructions.
20807-
bool HasCopyables = S.areInstructionsWithCopyableElements();
2080820807
if (isa<PHINode>(S.getMainOp()) ||
20809-
isVectorLikeInstWithConstOps(S.getMainOp()) ||
20810-
(!HasCopyables && doesNotNeedToSchedule(VL)) ||
20811-
all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))
20808+
isVectorLikeInstWithConstOps(S.getMainOp()))
20809+
return nullptr;
20810+
bool HasCopyables = S.areInstructionsWithCopyableElements();
20811+
if (((!HasCopyables && doesNotNeedToSchedule(VL)) ||
20812+
all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))) {
20813+
// If all operands were replaced by copyables, the operands of this node
20814+
// might be not, so need to recalculate dependencies for schedule data,
20815+
// replaced by copyable schedule data.
20816+
SmallVector<ScheduleData *> ControlDependentMembers;
20817+
for (Value *V : VL) {
20818+
auto *I = dyn_cast<Instruction>(V);
20819+
if (!I || (HasCopyables && S.isCopyableElement(V)))
20820+
continue;
20821+
SmallDenseMap<std::pair<Instruction *, Value *>, unsigned> UserOpToNumOps;
20822+
for (const Use &U : I->operands()) {
20823+
unsigned &NumOps =
20824+
UserOpToNumOps.try_emplace(std::make_pair(I, U.get()), 0)
20825+
.first->getSecond();
20826+
++NumOps;
20827+
if (auto *Op = dyn_cast<Instruction>(U.get());
20828+
Op && areAllOperandsReplacedByCopyableData(I, Op, *SLP, NumOps)) {
20829+
if (ScheduleData *OpSD = getScheduleData(Op);
20830+
OpSD && OpSD->hasValidDependencies()) {
20831+
OpSD->clearDirectDependencies();
20832+
if (RegionHasStackSave ||
20833+
!isGuaranteedToTransferExecutionToSuccessor(OpSD->getInst()))
20834+
ControlDependentMembers.push_back(OpSD);
20835+
}
20836+
}
20837+
}
20838+
}
20839+
if (!ControlDependentMembers.empty()) {
20840+
ScheduleBundle Invalid = ScheduleBundle::invalid();
20841+
calculateDependencies(Invalid, /*InsertInReadyList=*/true, SLP,
20842+
ControlDependentMembers);
20843+
}
2081220844
return nullptr;
20845+
}
2081320846

2081420847
// Initialize the instruction bundle.
2081520848
Instruction *OldScheduleEnd = ScheduleEnd;
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-cros-linux-gnu < %s | FileCheck %s
3+
4+
%struct.fe = type { [5 x i64] }
5+
6+
define i32 @test(i64 %0, i128 %1, i1 %2) {
7+
; CHECK-LABEL: define i32 @test(
8+
; CHECK-SAME: i64 [[TMP0:%.*]], i128 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
9+
; CHECK-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_FE:%.*]], align 8
10+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 16
11+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 24
12+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 32
13+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 8
14+
; CHECK-NEXT: br label %[[BB9:.*]]
15+
; CHECK: [[BB9]]:
16+
; CHECK-NEXT: [[TMP10:%.*]] = phi i64 [ undef, [[TMP3:%.*]] ], [ [[TMP29:%.*]], %[[BB9]] ]
17+
; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP26:%.*]], %[[BB9]] ]
18+
; CHECK-NEXT: [[TMP12:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP23:%.*]], %[[BB9]] ]
19+
; CHECK-NEXT: [[TMP13:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP20:%.*]], %[[BB9]] ]
20+
; CHECK-NEXT: [[TMP14:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP17:%.*]], %[[BB9]] ]
21+
; CHECK-NEXT: [[DOTSROA_14_0:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP52:%.*]], %[[BB9]] ]
22+
; CHECK-NEXT: [[DOTSROA_11_0:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP50:%.*]], %[[BB9]] ]
23+
; CHECK-NEXT: [[DOTSROA_8_0:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP57:%.*]], %[[BB9]] ]
24+
; CHECK-NEXT: [[DOTSROA_4_0:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP56:%.*]], %[[BB9]] ]
25+
; CHECK-NEXT: [[DOTSROA_0_0:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP54:%.*]], %[[BB9]] ]
26+
; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[DOTSROA_0_0]], [[TMP14]]
27+
; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP15]], [[TMP0]]
28+
; CHECK-NEXT: [[TMP17]] = xor i64 [[TMP16]], 1
29+
; CHECK-NEXT: store i64 [[TMP17]], ptr [[TMP4]], align 8
30+
; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[DOTSROA_4_0]], [[TMP13]]
31+
; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], [[TMP0]]
32+
; CHECK-NEXT: [[TMP20]] = xor i64 [[TMP19]], 1
33+
; CHECK-NEXT: store i64 [[TMP20]], ptr [[TMP8]], align 8
34+
; CHECK-NEXT: [[TMP21:%.*]] = xor i64 [[DOTSROA_8_0]], [[TMP12]]
35+
; CHECK-NEXT: [[TMP22:%.*]] = and i64 [[TMP21]], [[TMP0]]
36+
; CHECK-NEXT: [[TMP23]] = xor i64 [[TMP22]], 1
37+
; CHECK-NEXT: store i64 [[TMP23]], ptr [[TMP5]], align 8
38+
; CHECK-NEXT: [[TMP24:%.*]] = xor i64 [[DOTSROA_11_0]], [[TMP11]]
39+
; CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], [[TMP0]]
40+
; CHECK-NEXT: [[TMP26]] = xor i64 [[TMP25]], 1
41+
; CHECK-NEXT: store i64 [[TMP26]], ptr [[TMP6]], align 8
42+
; CHECK-NEXT: [[TMP27:%.*]] = xor i64 [[DOTSROA_14_0]], [[TMP10]]
43+
; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP27]], [[TMP0]]
44+
; CHECK-NEXT: [[TMP29]] = xor i64 [[TMP28]], 1
45+
; CHECK-NEXT: store i64 [[TMP29]], ptr [[TMP7]], align 8
46+
; CHECK-NEXT: [[TMP30:%.*]] = load i64, ptr null, align 4294967296
47+
; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP19]], 1
48+
; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP16]], 1
49+
; CHECK-NEXT: [[TMP33:%.*]] = add i64 [[TMP17]], 1
50+
; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP29]], 19
51+
; CHECK-NEXT: [[TMP35:%.*]] = zext i64 [[TMP34]] to i128
52+
; CHECK-NEXT: [[TMP36:%.*]] = mul i64 [[TMP26]], 19
53+
; CHECK-NEXT: [[TMP37:%.*]] = zext i64 [[TMP36]] to i128
54+
; CHECK-NEXT: [[TMP38:%.*]] = mul i64 [[TMP23]], 19
55+
; CHECK-NEXT: [[TMP39:%.*]] = zext i64 [[TMP38]] to i128
56+
; CHECK-NEXT: [[TMP40:%.*]] = mul nuw nsw i128 [[TMP39]], 24
57+
; CHECK-NEXT: [[TMP41:%.*]] = zext i64 [[TMP32]] to i128
58+
; CHECK-NEXT: [[TMP42:%.*]] = mul nuw i128 [[TMP37]], [[TMP41]]
59+
; CHECK-NEXT: [[TMP43:%.*]] = zext i64 [[TMP31]] to i128
60+
; CHECK-NEXT: [[TMP44:%.*]] = mul nuw i128 [[TMP35]], [[TMP43]]
61+
; CHECK-NEXT: [[TMP45:%.*]] = zext i64 [[TMP33]] to i128
62+
; CHECK-NEXT: [[TMP46:%.*]] = mul i128 [[TMP1]], [[TMP45]]
63+
; CHECK-NEXT: [[TMP47:%.*]] = add i128 [[TMP40]], [[TMP46]]
64+
; CHECK-NEXT: [[TMP48:%.*]] = add i128 [[TMP47]], [[TMP42]]
65+
; CHECK-NEXT: [[TMP49:%.*]] = add i128 [[TMP48]], [[TMP44]]
66+
; CHECK-NEXT: [[TMP50]] = and i64 [[TMP29]], 1
67+
; CHECK-NEXT: [[TMP51:%.*]] = trunc i128 [[TMP49]] to i64
68+
; CHECK-NEXT: [[TMP52]] = and i64 [[TMP30]], 1
69+
; CHECK-NEXT: [[TMP53:%.*]] = add i64 [[TMP51]], 1
70+
; CHECK-NEXT: [[TMP54]] = and i64 [[TMP53]], 1
71+
; CHECK-NEXT: [[TMP55:%.*]] = lshr i64 [[TMP53]], 1
72+
; CHECK-NEXT: [[TMP56]] = and i64 [[TMP19]], 1
73+
; CHECK-NEXT: [[TMP57]] = or i64 [[TMP55]], 1
74+
; CHECK-NEXT: br i1 [[TMP2]], label %[[BB58:.*]], label %[[BB9]]
75+
; CHECK: [[BB58]]:
76+
; CHECK-NEXT: call void @g(ptr nonnull [[TMP4]])
77+
; CHECK-NEXT: ret i32 0
78+
;
79+
%4 = alloca %struct.fe, align 8
80+
%5 = getelementptr inbounds nuw i8, ptr %4, i64 16
81+
%6 = getelementptr inbounds nuw i8, ptr %4, i64 24
82+
%7 = getelementptr inbounds nuw i8, ptr %4, i64 32
83+
%8 = getelementptr inbounds nuw i8, ptr %4, i64 8
84+
br label %9
85+
86+
9:
87+
%10 = phi i64 [ undef, %3 ], [ %29, %9 ]
88+
%11 = phi i64 [ undef, %3 ], [ %26, %9 ]
89+
%12 = phi i64 [ undef, %3 ], [ %23, %9 ]
90+
%13 = phi i64 [ undef, %3 ], [ %20, %9 ]
91+
%14 = phi i64 [ undef, %3 ], [ %17, %9 ]
92+
%.sroa.14.0 = phi i64 [ undef, %3 ], [ %52, %9 ]
93+
%.sroa.11.0 = phi i64 [ undef, %3 ], [ %50, %9 ]
94+
%.sroa.8.0 = phi i64 [ undef, %3 ], [ %57, %9 ]
95+
%.sroa.4.0 = phi i64 [ undef, %3 ], [ %56, %9 ]
96+
%.sroa.0.0 = phi i64 [ undef, %3 ], [ %54, %9 ]
97+
%15 = xor i64 %.sroa.0.0, %14
98+
%16 = and i64 %15, %0
99+
%17 = xor i64 %16, 1
100+
store i64 %17, ptr %4, align 8
101+
%18 = xor i64 %.sroa.4.0, %13
102+
%19 = and i64 %18, %0
103+
%20 = xor i64 %19, 1
104+
store i64 %20, ptr %8, align 8
105+
%21 = xor i64 %.sroa.8.0, %12
106+
%22 = and i64 %21, %0
107+
%23 = xor i64 %22, 1
108+
store i64 %23, ptr %5, align 8
109+
%24 = xor i64 %.sroa.11.0, %11
110+
%25 = and i64 %24, %0
111+
%26 = xor i64 %25, 1
112+
store i64 %26, ptr %6, align 8
113+
%27 = xor i64 %.sroa.14.0, %10
114+
%28 = and i64 %27, %0
115+
%29 = xor i64 %28, 1
116+
store i64 %29, ptr %7, align 8
117+
%30 = load i64, ptr null, align 4294967296
118+
%31 = or i64 %19, 1
119+
%32 = or i64 %16, 1
120+
%33 = add i64 %17, 1
121+
%34 = mul i64 %29, 19
122+
%35 = zext i64 %34 to i128
123+
%36 = mul i64 %26, 19
124+
%37 = zext i64 %36 to i128
125+
%38 = mul i64 %23, 19
126+
%39 = zext i64 %38 to i128
127+
%40 = mul nuw nsw i128 %39, 24
128+
%41 = zext i64 %32 to i128
129+
%42 = mul nuw i128 %37, %41
130+
%43 = zext i64 %31 to i128
131+
%44 = mul nuw i128 %35, %43
132+
%45 = zext i64 %33 to i128
133+
%46 = mul i128 %1, %45
134+
%47 = add i128 %40, %46
135+
%48 = add i128 %47, %42
136+
%49 = add i128 %48, %44
137+
%50 = and i64 %29, 1
138+
%51 = trunc i128 %49 to i64
139+
%52 = and i64 %30, 1
140+
%53 = add i64 %51, 1
141+
%54 = and i64 %53, 1
142+
%55 = lshr i64 %53, 1
143+
%56 = and i64 %19, 1
144+
%57 = or i64 %55, 1
145+
br i1 %2, label %58, label %9
146+
147+
58:
148+
call void @g(ptr nonnull %4)
149+
ret i32 0
150+
}
151+
152+
declare void @g(ptr)
153+

0 commit comments

Comments
 (0)