Skip to content

Commit 8c41859

Browse files
committed
[SLP]Clear the operands deps of non-schedulable nodes, if previously all operands were copyable
If all operands of the non-schedulable nodes were previously only copyables, need to clear the dependencies of the original schedule data for such copyable operands and recalculate them to correctly handle number of dependecies. Fixes llvm#159406
1 parent 3e0c58b commit 8c41859

File tree

2 files changed

+190
-4
lines changed

2 files changed

+190
-4
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20804,12 +20804,45 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
2080420804
const EdgeInfo &EI) {
2080520805
// No need to schedule PHIs, insertelement, extractelement and extractvalue
2080620806
// instructions.
20807-
bool HasCopyables = S.areInstructionsWithCopyableElements();
2080820807
if (isa<PHINode>(S.getMainOp()) ||
20809-
isVectorLikeInstWithConstOps(S.getMainOp()) ||
20810-
(!HasCopyables && doesNotNeedToSchedule(VL)) ||
20811-
all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))
20808+
isVectorLikeInstWithConstOps(S.getMainOp()))
20809+
return nullptr;
20810+
bool HasCopyables = S.areInstructionsWithCopyableElements();
20811+
if (((!HasCopyables && doesNotNeedToSchedule(VL)) ||
20812+
all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))) {
20813+
// If all operands were replaced by copyables, the operands of this node
20814+
// might be not, so need to recalculate dependencies for schedule data,
20815+
// replaced by copyable schedule data.
20816+
SmallVector<ScheduleData *> ControlDependentMembers;
20817+
for (Value *V : VL) {
20818+
auto *I = dyn_cast<Instruction>(V);
20819+
if (!I || (HasCopyables && S.isCopyableElement(V)))
20820+
continue;
20821+
SmallDenseMap<std::pair<Instruction *, Value *>, unsigned> UserOpToNumOps;
20822+
for (const Use &U : I->operands()) {
20823+
unsigned &NumOps =
20824+
UserOpToNumOps.try_emplace(std::make_pair(I, U.get()), 0)
20825+
.first->getSecond();
20826+
++NumOps;
20827+
if (auto *Op = dyn_cast<Instruction>(U.get());
20828+
Op && areAllOperandsReplacedByCopyableData(I, Op, *SLP, NumOps)) {
20829+
if (ScheduleData *OpSD = getScheduleData(Op);
20830+
OpSD && OpSD->hasValidDependencies()) {
20831+
OpSD->clearDirectDependencies();
20832+
if (RegionHasStackSave ||
20833+
!isGuaranteedToTransferExecutionToSuccessor(OpSD->getInst()))
20834+
ControlDependentMembers.push_back(OpSD);
20835+
}
20836+
}
20837+
}
20838+
}
20839+
if (!ControlDependentMembers.empty()) {
20840+
ScheduleBundle Invalid = ScheduleBundle::invalid();
20841+
calculateDependencies(Invalid, /*InsertInReadyList=*/true, SLP,
20842+
ControlDependentMembers);
20843+
}
2081220844
return nullptr;
20845+
}
2081320846

2081420847
// Initialize the instruction bundle.
2081520848
Instruction *OldScheduleEnd = ScheduleEnd;
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-cros-linux-gnu < %s | FileCheck %s
3+
4+
%struct.fe = type { [5 x i64] }
5+
6+
define i32 @test(i64 %0, i128 %1, i1 %2) {
7+
; CHECK-LABEL: define i32 @test(
8+
; CHECK-SAME: i64 [[TMP0:%.*]], i128 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
9+
; CHECK-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_FE:%.*]], align 8
10+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 16
11+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 24
12+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 32
13+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 8
14+
; CHECK-NEXT: br label %[[BB9:.*]]
15+
; CHECK: [[BB9]]:
16+
; CHECK-NEXT: [[TMP10:%.*]] = phi i64 [ undef, [[TMP3:%.*]] ], [ [[TMP29:%.*]], %[[BB9]] ]
17+
; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP26:%.*]], %[[BB9]] ]
18+
; CHECK-NEXT: [[TMP12:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP23:%.*]], %[[BB9]] ]
19+
; CHECK-NEXT: [[TMP13:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP20:%.*]], %[[BB9]] ]
20+
; CHECK-NEXT: [[TMP14:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP17:%.*]], %[[BB9]] ]
21+
; CHECK-NEXT: [[DOTSROA_14_0:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP52:%.*]], %[[BB9]] ]
22+
; CHECK-NEXT: [[DOTSROA_11_0:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP50:%.*]], %[[BB9]] ]
23+
; CHECK-NEXT: [[DOTSROA_8_0:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP57:%.*]], %[[BB9]] ]
24+
; CHECK-NEXT: [[DOTSROA_4_0:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP56:%.*]], %[[BB9]] ]
25+
; CHECK-NEXT: [[DOTSROA_0_0:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP54:%.*]], %[[BB9]] ]
26+
; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[DOTSROA_0_0]], [[TMP14]]
27+
; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP15]], [[TMP0]]
28+
; CHECK-NEXT: [[TMP17]] = xor i64 [[TMP16]], 1
29+
; CHECK-NEXT: store i64 [[TMP17]], ptr [[TMP4]], align 8
30+
; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[DOTSROA_4_0]], [[TMP13]]
31+
; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], [[TMP0]]
32+
; CHECK-NEXT: [[TMP20]] = xor i64 [[TMP19]], 1
33+
; CHECK-NEXT: store i64 [[TMP20]], ptr [[TMP8]], align 8
34+
; CHECK-NEXT: [[TMP21:%.*]] = xor i64 [[DOTSROA_8_0]], [[TMP12]]
35+
; CHECK-NEXT: [[TMP22:%.*]] = and i64 [[TMP21]], [[TMP0]]
36+
; CHECK-NEXT: [[TMP23]] = xor i64 [[TMP22]], 1
37+
; CHECK-NEXT: store i64 [[TMP23]], ptr [[TMP5]], align 8
38+
; CHECK-NEXT: [[TMP24:%.*]] = xor i64 [[DOTSROA_11_0]], [[TMP11]]
39+
; CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], [[TMP0]]
40+
; CHECK-NEXT: [[TMP26]] = xor i64 [[TMP25]], 1
41+
; CHECK-NEXT: store i64 [[TMP26]], ptr [[TMP6]], align 8
42+
; CHECK-NEXT: [[TMP27:%.*]] = xor i64 [[DOTSROA_14_0]], [[TMP10]]
43+
; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP27]], [[TMP0]]
44+
; CHECK-NEXT: [[TMP29]] = xor i64 [[TMP28]], 1
45+
; CHECK-NEXT: store i64 [[TMP29]], ptr [[TMP7]], align 8
46+
; CHECK-NEXT: [[TMP30:%.*]] = load i64, ptr null, align 4294967296
47+
; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP19]], 1
48+
; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP16]], 1
49+
; CHECK-NEXT: [[TMP33:%.*]] = add i64 [[TMP17]], 1
50+
; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP29]], 19
51+
; CHECK-NEXT: [[TMP35:%.*]] = zext i64 [[TMP34]] to i128
52+
; CHECK-NEXT: [[TMP36:%.*]] = mul i64 [[TMP26]], 19
53+
; CHECK-NEXT: [[TMP37:%.*]] = zext i64 [[TMP36]] to i128
54+
; CHECK-NEXT: [[TMP38:%.*]] = mul i64 [[TMP23]], 19
55+
; CHECK-NEXT: [[TMP39:%.*]] = zext i64 [[TMP38]] to i128
56+
; CHECK-NEXT: [[TMP40:%.*]] = mul nuw nsw i128 [[TMP39]], 24
57+
; CHECK-NEXT: [[TMP41:%.*]] = zext i64 [[TMP32]] to i128
58+
; CHECK-NEXT: [[TMP42:%.*]] = mul nuw i128 [[TMP37]], [[TMP41]]
59+
; CHECK-NEXT: [[TMP43:%.*]] = zext i64 [[TMP31]] to i128
60+
; CHECK-NEXT: [[TMP44:%.*]] = mul nuw i128 [[TMP35]], [[TMP43]]
61+
; CHECK-NEXT: [[TMP45:%.*]] = zext i64 [[TMP33]] to i128
62+
; CHECK-NEXT: [[TMP46:%.*]] = mul i128 [[TMP1]], [[TMP45]]
63+
; CHECK-NEXT: [[TMP47:%.*]] = add i128 [[TMP40]], [[TMP46]]
64+
; CHECK-NEXT: [[TMP48:%.*]] = add i128 [[TMP47]], [[TMP42]]
65+
; CHECK-NEXT: [[TMP49:%.*]] = add i128 [[TMP48]], [[TMP44]]
66+
; CHECK-NEXT: [[TMP50]] = and i64 [[TMP29]], 1
67+
; CHECK-NEXT: [[TMP51:%.*]] = trunc i128 [[TMP49]] to i64
68+
; CHECK-NEXT: [[TMP52]] = and i64 [[TMP30]], 1
69+
; CHECK-NEXT: [[TMP53:%.*]] = add i64 [[TMP51]], 1
70+
; CHECK-NEXT: [[TMP54]] = and i64 [[TMP53]], 1
71+
; CHECK-NEXT: [[TMP55:%.*]] = lshr i64 [[TMP53]], 1
72+
; CHECK-NEXT: [[TMP56]] = and i64 [[TMP19]], 1
73+
; CHECK-NEXT: [[TMP57]] = or i64 [[TMP55]], 1
74+
; CHECK-NEXT: br i1 [[TMP2]], label %[[BB58:.*]], label %[[BB9]]
75+
; CHECK: [[BB58]]:
76+
; CHECK-NEXT: call void @g(ptr nonnull [[TMP4]])
77+
; CHECK-NEXT: ret i32 0
78+
;
79+
%4 = alloca %struct.fe, align 8
80+
%5 = getelementptr inbounds nuw i8, ptr %4, i64 16
81+
%6 = getelementptr inbounds nuw i8, ptr %4, i64 24
82+
%7 = getelementptr inbounds nuw i8, ptr %4, i64 32
83+
%8 = getelementptr inbounds nuw i8, ptr %4, i64 8
84+
br label %9
85+
86+
9:
87+
%10 = phi i64 [ undef, %3 ], [ %29, %9 ]
88+
%11 = phi i64 [ undef, %3 ], [ %26, %9 ]
89+
%12 = phi i64 [ undef, %3 ], [ %23, %9 ]
90+
%13 = phi i64 [ undef, %3 ], [ %20, %9 ]
91+
%14 = phi i64 [ undef, %3 ], [ %17, %9 ]
92+
%.sroa.14.0 = phi i64 [ undef, %3 ], [ %52, %9 ]
93+
%.sroa.11.0 = phi i64 [ undef, %3 ], [ %50, %9 ]
94+
%.sroa.8.0 = phi i64 [ undef, %3 ], [ %57, %9 ]
95+
%.sroa.4.0 = phi i64 [ undef, %3 ], [ %56, %9 ]
96+
%.sroa.0.0 = phi i64 [ undef, %3 ], [ %54, %9 ]
97+
%15 = xor i64 %.sroa.0.0, %14
98+
%16 = and i64 %15, %0
99+
%17 = xor i64 %16, 1
100+
store i64 %17, ptr %4, align 8
101+
%18 = xor i64 %.sroa.4.0, %13
102+
%19 = and i64 %18, %0
103+
%20 = xor i64 %19, 1
104+
store i64 %20, ptr %8, align 8
105+
%21 = xor i64 %.sroa.8.0, %12
106+
%22 = and i64 %21, %0
107+
%23 = xor i64 %22, 1
108+
store i64 %23, ptr %5, align 8
109+
%24 = xor i64 %.sroa.11.0, %11
110+
%25 = and i64 %24, %0
111+
%26 = xor i64 %25, 1
112+
store i64 %26, ptr %6, align 8
113+
%27 = xor i64 %.sroa.14.0, %10
114+
%28 = and i64 %27, %0
115+
%29 = xor i64 %28, 1
116+
store i64 %29, ptr %7, align 8
117+
%30 = load i64, ptr null, align 4294967296
118+
%31 = or i64 %19, 1
119+
%32 = or i64 %16, 1
120+
%33 = add i64 %17, 1
121+
%34 = mul i64 %29, 19
122+
%35 = zext i64 %34 to i128
123+
%36 = mul i64 %26, 19
124+
%37 = zext i64 %36 to i128
125+
%38 = mul i64 %23, 19
126+
%39 = zext i64 %38 to i128
127+
%40 = mul nuw nsw i128 %39, 24
128+
%41 = zext i64 %32 to i128
129+
%42 = mul nuw i128 %37, %41
130+
%43 = zext i64 %31 to i128
131+
%44 = mul nuw i128 %35, %43
132+
%45 = zext i64 %33 to i128
133+
%46 = mul i128 %1, %45
134+
%47 = add i128 %40, %46
135+
%48 = add i128 %47, %42
136+
%49 = add i128 %48, %44
137+
%50 = and i64 %29, 1
138+
%51 = trunc i128 %49 to i64
139+
%52 = and i64 %30, 1
140+
%53 = add i64 %51, 1
141+
%54 = and i64 %53, 1
142+
%55 = lshr i64 %53, 1
143+
%56 = and i64 %19, 1
144+
%57 = or i64 %55, 1
145+
br i1 %2, label %58, label %9
146+
147+
58:
148+
call void @g(ptr nonnull %4)
149+
ret i32 0
150+
}
151+
152+
declare void @g(ptr)
153+

0 commit comments

Comments
 (0)