Skip to content

Commit 0fdfad3

Browse files
committed
[SLP]Fix insert point for copyable node with the last inst, used only outside the block
If the copyable entry has the last instruction, used only outside the block, tha insert ion point for the vector code should be the last instruction itself, not the following one. It prevents wrong def-use sequences, which might be generated for the buildvector nodes. Fixes #163404
1 parent 936e038 commit 0fdfad3

File tree

2 files changed

+92
-1
lines changed

2 files changed

+92
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17632,7 +17632,9 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
1763217632
}
1763317633
if (IsPHI ||
1763417634
(!E->isGather() && E->State != TreeEntry::SplitVectorize &&
17635-
E->doesNotNeedToSchedule()) ||
17635+
(E->doesNotNeedToSchedule() ||
17636+
(E->hasCopyableElements() && !E->isCopyableElement(LastInst) &&
17637+
isUsedOutsideBlock(LastInst)))) ||
1763617638
(GatheredLoadsEntriesFirst.has_value() &&
1763717639
E->Idx >= *GatheredLoadsEntriesFirst && !E->isGather() &&
1763817640
E->getOpcode() == Instruction::Load)) {
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s
3+
4+
define void @test() {
5+
; CHECK-LABEL: define void @test() {
6+
; CHECK-NEXT: [[BB:.*]]:
7+
; CHECK-NEXT: br label %[[BB1:.*]]
8+
; CHECK: [[BB1]]:
9+
; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP7:%.*]], %[[BB16:.*]] ], [ zeroinitializer, %[[BB1]] ]
10+
; CHECK-NEXT: br i1 false, label %[[BB1]], label %[[BB5:.*]]
11+
; CHECK: [[BB5]]:
12+
; CHECK-NEXT: [[PHI8:%.*]] = phi double [ 0.000000e+00, %[[BB16]] ], [ 0.000000e+00, %[[BB1]] ]
13+
; CHECK-NEXT: [[TMP1:%.*]] = phi <4 x i32> [ [[TMP8:%.*]], %[[BB16]] ], [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB1]] ]
14+
; CHECK-NEXT: switch i32 0, label %[[BB21:.*]] [
15+
; CHECK-NEXT: i32 4, label %[[BB21]]
16+
; CHECK-NEXT: i32 1, label %[[BB21]]
17+
; CHECK-NEXT: i32 0, label %[[BB9:.*]]
18+
; CHECK-NEXT: ]
19+
; CHECK: [[BB9]]:
20+
; CHECK-NEXT: [[PHI13:%.*]] = phi double [ 0.000000e+00, %[[BB21]] ], [ 0.000000e+00, %[[BB5]] ]
21+
; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ [[TMP1]], %[[BB21]] ], [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB5]] ]
22+
; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x i32> [ [[TMP9:%.*]], %[[BB21]] ], [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB5]] ]
23+
; CHECK-NEXT: switch i32 0, label %[[BB15:.*]] [
24+
; CHECK-NEXT: i32 1, label %[[BB14:.*]]
25+
; CHECK-NEXT: i32 0, label %[[BB16]]
26+
; CHECK-NEXT: ]
27+
; CHECK: [[BB14]]:
28+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
29+
; CHECK-NEXT: br label %[[BB16]]
30+
; CHECK: [[BB15]]:
31+
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> <i32 poison, i32 poison, i32 0, i32 0>, [[TMP2]]
32+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP5]], <4 x i32> <i32 poison, i32 poison, i32 2, i32 7>
33+
; CHECK-NEXT: br label %[[BB16]]
34+
; CHECK: [[BB16]]:
35+
; CHECK-NEXT: [[PHI20:%.*]] = phi double [ 0.000000e+00, %[[BB15]] ], [ 0.000000e+00, %[[BB14]] ], [ 0.000000e+00, %[[BB9]] ]
36+
; CHECK-NEXT: [[TMP7]] = phi <4 x i32> [ [[TMP5]], %[[BB15]] ], [ [[TMP4]], %[[BB14]] ], [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB9]] ]
37+
; CHECK-NEXT: [[TMP8]] = phi <4 x i32> [ [[TMP6]], %[[BB15]] ], [ [[TMP3]], %[[BB14]] ], [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB9]] ]
38+
; CHECK-NEXT: br i1 false, label %[[BB5]], label %[[BB1]]
39+
; CHECK: [[BB21]]:
40+
; CHECK-NEXT: [[TMP9]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
41+
; CHECK-NEXT: br label %[[BB9]]
42+
;
43+
bb:
44+
br label %bb1
45+
46+
bb1:
47+
%phi = phi i32 [ 0, %bb ], [ 0, %bb1 ], [ %phi17, %bb16 ]
48+
%phi2 = phi i32 [ 0, %bb ], [ 0, %bb1 ], [ %phi18, %bb16 ]
49+
%phi3 = phi i32 [ 0, %bb ], [ poison, %bb16 ], [ 0, %bb1 ]
50+
%phi4 = phi i32 [ 0, %bb ], [ poison, %bb16 ], [ 0, %bb1 ]
51+
br i1 false, label %bb1, label %bb5
52+
53+
bb5:
54+
%phi6 = phi i32 [ %phi17, %bb16 ], [ 0, %bb1 ]
55+
%phi7 = phi i32 [ %phi19, %bb16 ], [ 0, %bb1 ]
56+
%phi8 = phi double [ 0.000000e+00, %bb16 ], [ 0.000000e+00, %bb1 ]
57+
switch i32 0, label %bb21 [
58+
i32 4, label %bb21
59+
i32 1, label %bb21
60+
i32 0, label %bb9
61+
]
62+
63+
bb9:
64+
%phi10 = phi i32 [ %phi6, %bb21 ], [ 0, %bb5 ]
65+
%phi11 = phi i32 [ %phi7, %bb21 ], [ 0, %bb5 ]
66+
%phi12 = phi i32 [ 0, %bb21 ], [ 0, %bb5 ]
67+
%phi13 = phi double [ 0.000000e+00, %bb21 ], [ 0.000000e+00, %bb5 ]
68+
switch i32 0, label %bb15 [
69+
i32 1, label %bb14
70+
i32 0, label %bb16
71+
]
72+
73+
bb14:
74+
br label %bb16
75+
76+
bb15:
77+
%add = add i32 0, %phi10
78+
br label %bb16
79+
80+
bb16:
81+
%phi17 = phi i32 [ %add, %bb15 ], [ %phi10, %bb14 ], [ 0, %bb9 ]
82+
%phi18 = phi i32 [ %phi11, %bb15 ], [ 0, %bb14 ], [ 0, %bb9 ]
83+
%phi19 = phi i32 [ %phi12, %bb15 ], [ %phi12, %bb14 ], [ 0, %bb9 ]
84+
%phi20 = phi double [ 0.000000e+00, %bb15 ], [ 0.000000e+00, %bb14 ], [ 0.000000e+00, %bb9 ]
85+
br i1 false, label %bb5, label %bb1
86+
87+
bb21:
88+
br label %bb9
89+
}

0 commit comments

Comments
 (0)