Skip to content

Commit 758c685

Browse files
committed
[SLP]Do not include copyable data to the same user twice
If the copyable schedule data is created and the user is used several times in the user node, no need to count same data for the same user several times, need to include it only ones. Fixes #153754
1 parent dcdbd5b commit 758c685

File tree

2 files changed

+69
-0
lines changed

2 files changed

+69
-0
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5336,6 +5336,7 @@ class BoUpSLP {
53365336
ArrayRef<Value *> Op = EI.UserTE->getOperand(EI.EdgeIdx);
53375337
const auto *It = find(Op, I);
53385338
assert(It != Op.end() && "Lane not set");
5339+
SmallPtrSet<Instruction *, 4> Visited;
53395340
do {
53405341
int Lane = std::distance(Op.begin(), It);
53415342
assert(Lane >= 0 && "Lane not set");
@@ -5345,6 +5346,10 @@ class BoUpSLP {
53455346
assert(Lane < static_cast<int>(EI.UserTE->Scalars.size()) &&
53465347
"Couldn't find extract lane");
53475348
auto *In = cast<Instruction>(EI.UserTE->Scalars[Lane]);
5349+
if (!Visited.insert(In).second) {
5350+
It = find(make_range(std::next(It), Op.end()), I);
5351+
continue;
5352+
}
53485353
ScheduleCopyableDataMapByInstUser
53495354
.try_emplace(std::make_pair(std::make_pair(In, EI.EdgeIdx), I))
53505355
.first->getSecond()
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define i32 @test(ptr %o, i32 %b.021.i) {
5+
; CHECK-LABEL: define i32 @test(
6+
; CHECK-SAME: ptr [[O:%.*]], i32 [[B_021_I:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*]]:
8+
; CHECK-NEXT: [[O1:%.*]] = alloca [3 x i32], align 4
9+
; CHECK-NEXT: br label %[[WHILE_BODY:.*]]
10+
; CHECK: [[WHILE_BODY]]:
11+
; CHECK-NEXT: [[SUB623:%.*]] = phi i32 [ [[SUB6:%.*]], %[[N_EXIT:.*]] ], [ 0, %[[ENTRY]] ]
12+
; CHECK-NEXT: [[ADD21:%.*]] = phi i32 [ [[ADD:%.*]], %[[N_EXIT]] ], [ 0, %[[ENTRY]] ]
13+
; CHECK-NEXT: [[ADD419:%.*]] = phi i32 [ [[ADD4:%.*]], %[[N_EXIT]] ], [ 0, %[[ENTRY]] ]
14+
; CHECK-NEXT: [[ADD18:%.*]] = phi i32 [ [[ADD]], %[[N_EXIT]] ], [ 1, %[[ENTRY]] ]
15+
; CHECK-NEXT: store i32 [[ADD419]], ptr [[O1]], align 4
16+
; CHECK-NEXT: store i32 [[ADD18]], ptr [[O]], align 4
17+
; CHECK-NEXT: br label %[[FOR_BODY4_I:.*]]
18+
; CHECK: [[FOR_COND1_I:.*]]:
19+
; CHECK-NEXT: ret i32 0
20+
; CHECK: [[FOR_BODY4_I]]:
21+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[O1]], align 4
22+
; CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[TMP0]], 0
23+
; CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[FOR_COND1_I]], label %[[N_EXIT]]
24+
; CHECK: [[N_EXIT]]:
25+
; CHECK-NEXT: [[SUB:%.*]] = or i32 [[B_021_I]], [[ADD21]]
26+
; CHECK-NEXT: [[ADD]] = or i32 [[SUB]], 1
27+
; CHECK-NEXT: [[ADD2:%.*]] = or i32 [[B_021_I]], 1
28+
; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[SUB623]]
29+
; CHECK-NEXT: [[ADD4]] = or i32 [[ADD3]], 1
30+
; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[B_021_I]], 1
31+
; CHECK-NEXT: [[SUB6]] = or i32 [[MUL]], 1
32+
; CHECK-NEXT: br label %[[WHILE_BODY]]
33+
;
34+
entry:
35+
%o1 = alloca [3 x i32], align 4
36+
br label %while.body
37+
38+
while.body: ; preds = %n.exit, %entry
39+
%sub623 = phi i32 [ %sub6, %n.exit ], [ 0, %entry ]
40+
%add21 = phi i32 [ %add, %n.exit ], [ 0, %entry ]
41+
%add419 = phi i32 [ %add4, %n.exit ], [ 0, %entry ]
42+
%add18 = phi i32 [ %add, %n.exit ], [ 1, %entry ]
43+
store i32 %add419, ptr %o1, align 4
44+
store i32 %add18, ptr %o, align 4
45+
br label %for.body4.i
46+
47+
for.cond1.i: ; preds = %for.body4.i
48+
ret i32 0
49+
50+
for.body4.i: ; preds = %while.body
51+
%0 = load i32, ptr %o1, align 4
52+
%tobool.not.i = icmp eq i32 %0, 0
53+
br i1 %tobool.not.i, label %for.cond1.i, label %n.exit
54+
55+
n.exit: ; preds = %for.body4.i
56+
%sub = or i32 %b.021.i, %add21
57+
%add = or i32 %sub, 1
58+
%add2 = or i32 %b.021.i, 1
59+
%add3 = add i32 %add2, %sub623
60+
%add4 = or i32 %add3, 1
61+
%mul = shl i32 %b.021.i, 1
62+
%sub6 = or i32 %mul, 1
63+
br label %while.body
64+
}

0 commit comments

Comments
 (0)