Skip to content

Commit 37bde7a

Browse files
committed
[SLP]Fix hanging on small trees with phis only with adjusted cost threshold
Need to check if the tree is too small before attempting to vectorize the tree to prevent hanging on small trees with phis only.
1 parent 8806311 commit 37bde7a

File tree

2 files changed

+64
-0
lines changed

2 files changed

+64
-0
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12130,6 +12130,30 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
1213012130
}))
1213112131
return true;
1213212132

12133+
// Do not vectorize small tree of phis only, if all vector phis are also
12134+
// gathered.
12135+
if (!ForReduction && SLPCostThreshold.getNumOccurrences() &&
12136+
VectorizableTree.size() <= Limit &&
12137+
all_of(VectorizableTree,
12138+
[&](const std::unique_ptr<TreeEntry> &TE) {
12139+
return (TE->isGather() &&
12140+
(!TE->hasState() ||
12141+
TE->getOpcode() != Instruction::ExtractElement) &&
12142+
count_if(TE->Scalars, IsaPred<ExtractElementInst>) <=
12143+
Limit) ||
12144+
(TE->hasState() &&
12145+
(TE->getOpcode() == Instruction::InsertElement ||
12146+
(TE->getOpcode() == Instruction::PHI &&
12147+
all_of(TE->Scalars, [&](Value *V) {
12148+
return isa<PoisonValue>(V) || MustGather.contains(V);
12149+
}))));
12150+
}) &&
12151+
any_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
12152+
return TE->State == TreeEntry::Vectorize &&
12153+
TE->getOpcode() == Instruction::PHI;
12154+
}))
12155+
return true;
12156+
1213312157
// We can vectorize the tree if its size is greater than or equal to the
1213412158
// minimum size specified by the MinTreeSize command line option.
1213512159
if (VectorizableTree.size() >= MinTreeSize)
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-threshold=-11 < %s | FileCheck %s
3+
4+
define float @test(ptr %call78) {
5+
; CHECK-LABEL: @test(
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x ptr> <ptr null, ptr poison>, ptr [[CALL78:%.*]], i32 1
8+
; CHECK-NEXT: br label [[FOR_BODY194:%.*]]
9+
; CHECK: for.body194:
10+
; CHECK-NEXT: [[INDVARS_IV132:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[FOR_BODY194]] ]
11+
; CHECK-NEXT: [[CURRENTW_031:%.*]] = phi ptr [ [[CALL78]], [[ENTRY]] ], [ [[PREVIOUSW_030:%.*]], [[FOR_BODY194]] ]
12+
; CHECK-NEXT: [[PREVIOUSW_030]] = phi ptr [ null, [[ENTRY]] ], [ [[CURRENTW_031]], [[FOR_BODY194]] ]
13+
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x ptr> [ [[TMP0]], [[ENTRY]] ], [ [[TMP3:%.*]], [[FOR_BODY194]] ]
14+
; CHECK-NEXT: store float 0.000000e+00, ptr [[CURRENTW_031]], align 4
15+
; CHECK-NEXT: tail call void null(ptr [[PREVIOUSW_030]], ptr null, ptr null, i32 0, i32 0, ptr null, ptr null, i32 0)
16+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> poison, ptr [[CURRENTW_031]], i32 0
17+
; CHECK-NEXT: [[TMP3]] = insertelement <2 x ptr> [[TMP2]], ptr [[PREVIOUSW_030]], i32 1
18+
; CHECK-NEXT: br i1 false, label [[FOR_END286_LOOPEXIT:%.*]], label [[FOR_BODY194]]
19+
; CHECK: for.end286.loopexit:
20+
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x ptr> [ [[TMP1]], [[FOR_BODY194]] ]
21+
; CHECK-NEXT: ret float 0.000000e+00
22+
;
23+
entry:
24+
br label %for.body194
25+
26+
for.body194:
27+
%indvars.iv132 = phi i64 [ 0, %entry ], [ 0, %for.body194 ]
28+
%currentw.031 = phi ptr [ %call78, %entry ], [ %previousw.030, %for.body194 ]
29+
%previousw.030 = phi ptr [ null, %entry ], [ %currentw.031, %for.body194 ]
30+
store float 0.000000e+00, ptr %currentw.031, align 4
31+
tail call void null(ptr %previousw.030, ptr null, ptr null, i32 0, i32 0, ptr null, ptr null, i32 0)
32+
br i1 false, label %for.end286.loopexit, label %for.body194
33+
34+
for.end286.loopexit:
35+
%currentw.031.lcssa = phi ptr [ %currentw.031, %for.body194 ]
36+
%previousw.030.lcssa = phi ptr [ %previousw.030, %for.body194 ]
37+
ret float 0.000000e+00
38+
}
39+
40+

0 commit comments

Comments
 (0)