Skip to content

Commit 79682c4

Browse files
committed
[SLP]Check if the buildvector root is not a part of the graph before deletion
If the buildvector root has no uses, it might be still needed as a part of the graph, so need to check that it is not a part of the graph before deletion. Fixes #116852
1 parent d8a1c6d commit 79682c4

File tree

2 files changed

+59
-1
lines changed

2 files changed

+59
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13614,7 +13614,10 @@ Value *BoUpSLP::gather(
1361413614
} else {
1361513615
Vec = CreateShuffle(Root, Vec, Mask);
1361613616
if (auto *OI = dyn_cast<Instruction>(OriginalRoot);
13617-
OI && OI->hasNUses(0))
13617+
OI && OI->hasNUses(0) &&
13618+
none_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
13619+
return TE->VectorizedValue == OI;
13620+
}))
1361813621
eraseInstruction(OI);
1361913622
}
1362013623
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define void @test() {
5+
; CHECK-LABEL: define void @test() {
6+
; CHECK-NEXT: [[BB:.*]]:
7+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00>, <4 x float> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
8+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00>, <4 x float> <float poison, float 0.000000e+00, float poison, float poison>, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
9+
; CHECK-NEXT: br label %[[BB1:.*]]
10+
; CHECK: [[BB1]]:
11+
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP9:%.*]], %[[BB1]] ]
12+
; CHECK-NEXT: [[FMUL:%.*]] = fmul float 0.000000e+00, 0.000000e+00
13+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[FMUL]], i32 2
14+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
15+
; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP0]], [[TMP3]]
16+
; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP4]], zeroinitializer
17+
; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[TMP5]], zeroinitializer
18+
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
19+
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> zeroinitializer, <4 x i32> [[TMP7]], <4 x i32> zeroinitializer
20+
; CHECK-NEXT: [[TMP9]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP8]])
21+
; CHECK-NEXT: br label %[[BB1]]
22+
;
23+
bb:
24+
br label %bb1
25+
26+
bb1:
27+
%phi = phi i32 [ 0, %bb ], [ %or21, %bb1 ]
28+
%sitofp = sitofp i32 0 to float
29+
%fadd = fadd float %sitofp, %sitofp
30+
%fadd2 = fadd float %fadd, 0.000000e+00
31+
%fcmp = fcmp ogt float %fadd2, 0.000000e+00
32+
%select = select i1 %fcmp, i32 0, i32 0
33+
%select3 = select i1 false, i32 %select, i32 0
34+
%fadd4 = fadd float %sitofp, 0.000000e+00
35+
%fadd5 = fadd float %fadd4, 0.000000e+00
36+
%fcmp6 = fcmp ogt float %fadd5, 0.000000e+00
37+
%select7 = select i1 %fcmp6, i32 0, i32 0
38+
%select8 = select i1 false, i32 %select7, i32 0
39+
%or = or i32 %select3, %select8
40+
%sitofp9 = sitofp i32 0 to float
41+
%fmul = fmul float 0.000000e+00, 0.000000e+00
42+
%fadd10 = fadd float %sitofp9, %fmul
43+
%fadd11 = fadd float %fadd10, 0.000000e+00
44+
%fcmp12 = fcmp ogt float %fadd11, 0.000000e+00
45+
%select13 = select i1 %fcmp12, i32 0, i32 0
46+
%select14 = select i1 false, i32 %select13, i32 0
47+
%or15 = or i32 %select14, %or
48+
%fadd16 = fadd float %fmul, 0.000000e+00
49+
%fadd17 = fadd float %fadd16, 0.000000e+00
50+
%fcmp18 = fcmp ogt float %fadd17, 0.000000e+00
51+
%select19 = select i1 %fcmp18, i32 0, i32 0
52+
%select20 = select i1 false, i32 %select19, i32 0
53+
%or21 = or i32 %or15, %select20
54+
br label %bb1
55+
}

0 commit comments

Comments
 (0)