Skip to content

Commit ee3a4f4

Browse files
authored
[SLPVectorizer] Test -1 stride loads. (#158358)
Add a test to generate -1 stride load and flags to force this behaviour.
1 parent 5719fb8 commit ee3a4f4

File tree

2 files changed

+99
-1
lines changed

2 files changed

+99
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,16 @@ static cl::opt<unsigned> MaxProfitableLoadStride(
198198
"slp-max-stride", cl::init(8), cl::Hidden,
199199
cl::desc("The maximum stride, considered to be profitable."));
200200

201+
static cl::opt<bool>
202+
DisableTreeReorder("slp-disable-tree-reorder", cl::init(false), cl::Hidden,
203+
cl::desc("Disable tree reordering even if it is "
204+
"profitable. Used for testing only."));
205+
206+
static cl::opt<bool>
207+
ForceStridedLoads("slp-force-strided-loads", cl::init(false), cl::Hidden,
208+
cl::desc("Generate strided loads even if they are not "
209+
"profitable. Used for testing only."));
210+
201211
static cl::opt<bool>
202212
ViewSLPTree("view-slp-tree", cl::Hidden,
203213
cl::desc("Display the SLP trees with Graphviz"));
@@ -7770,6 +7780,9 @@ static void combineOrders(MutableArrayRef<unsigned> Order,
77707780
}
77717781

77727782
bool BoUpSLP::isProfitableToReorder() const {
7783+
if (DisableTreeReorder)
7784+
return false;
7785+
77737786
constexpr unsigned TinyVF = 2;
77747787
constexpr unsigned TinyTree = 10;
77757788
constexpr unsigned PhiOpsLimit = 12;
@@ -13027,7 +13040,7 @@ void BoUpSLP::transformNodes() {
1302713040
InstructionCost StridedCost = TTI->getStridedMemoryOpCost(
1302813041
Instruction::Load, VecTy, BaseLI->getPointerOperand(),
1302913042
/*VariableMask=*/false, CommonAlignment, CostKind, BaseLI);
13030-
if (StridedCost < OriginalVecCost)
13043+
if (StridedCost < OriginalVecCost || ForceStridedLoads)
1303113044
// Strided load is more profitable than consecutive load + reverse -
1303213045
// transform the node to strided load.
1303313046
E.State = TreeEntry::StridedVectorize;
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -mtriple=riscv64 -mattr=+m,+v \
3+
; RUN: -passes=slp-vectorizer \
4+
; RUN: -slp-disable-tree-reorder=true -slp-force-strided-loads=true \
5+
; RUN: -S < %s | FileCheck %s
6+
7+
define void @const_stride_reversed(ptr %pl, ptr %ps) {
8+
; CHECK-LABEL: define void @const_stride_reversed(
9+
; CHECK-SAME: ptr [[PL:%.*]], ptr [[PS:%.*]]) #[[ATTR0:[0-9]+]] {
10+
; CHECK-NEXT: [[GEP_L15:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 15
11+
; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
12+
; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.p0.i64(ptr align 16 [[GEP_L15]], i64 -1, <16 x i1> splat (i1 true), i32 16)
13+
; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[GEP_S0]], align 16
14+
; CHECK-NEXT: ret void
15+
;
16+
%gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
17+
%gep_l1 = getelementptr inbounds i8, ptr %pl, i64 1
18+
%gep_l2 = getelementptr inbounds i8, ptr %pl, i64 2
19+
%gep_l3 = getelementptr inbounds i8, ptr %pl, i64 3
20+
%gep_l4 = getelementptr inbounds i8, ptr %pl, i64 4
21+
%gep_l5 = getelementptr inbounds i8, ptr %pl, i64 5
22+
%gep_l6 = getelementptr inbounds i8, ptr %pl, i64 6
23+
%gep_l7 = getelementptr inbounds i8, ptr %pl, i64 7
24+
%gep_l8 = getelementptr inbounds i8, ptr %pl, i64 8
25+
%gep_l9 = getelementptr inbounds i8, ptr %pl, i64 9
26+
%gep_l10 = getelementptr inbounds i8, ptr %pl, i64 10
27+
%gep_l11 = getelementptr inbounds i8, ptr %pl, i64 11
28+
%gep_l12 = getelementptr inbounds i8, ptr %pl, i64 12
29+
%gep_l13 = getelementptr inbounds i8, ptr %pl, i64 13
30+
%gep_l14 = getelementptr inbounds i8, ptr %pl, i64 14
31+
%gep_l15 = getelementptr inbounds i8, ptr %pl, i64 15
32+
33+
%load0 = load i8, ptr %gep_l0 , align 16
34+
%load1 = load i8, ptr %gep_l1 , align 16
35+
%load2 = load i8, ptr %gep_l2 , align 16
36+
%load3 = load i8, ptr %gep_l3 , align 16
37+
%load4 = load i8, ptr %gep_l4 , align 16
38+
%load5 = load i8, ptr %gep_l5 , align 16
39+
%load6 = load i8, ptr %gep_l6 , align 16
40+
%load7 = load i8, ptr %gep_l7 , align 16
41+
%load8 = load i8, ptr %gep_l8 , align 16
42+
%load9 = load i8, ptr %gep_l9 , align 16
43+
%load10 = load i8, ptr %gep_l10, align 16
44+
%load11 = load i8, ptr %gep_l11, align 16
45+
%load12 = load i8, ptr %gep_l12, align 16
46+
%load13 = load i8, ptr %gep_l13, align 16
47+
%load14 = load i8, ptr %gep_l14, align 16
48+
%load15 = load i8, ptr %gep_l15, align 16
49+
50+
%gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
51+
%gep_s1 = getelementptr inbounds i8, ptr %ps, i64 1
52+
%gep_s2 = getelementptr inbounds i8, ptr %ps, i64 2
53+
%gep_s3 = getelementptr inbounds i8, ptr %ps, i64 3
54+
%gep_s4 = getelementptr inbounds i8, ptr %ps, i64 4
55+
%gep_s5 = getelementptr inbounds i8, ptr %ps, i64 5
56+
%gep_s6 = getelementptr inbounds i8, ptr %ps, i64 6
57+
%gep_s7 = getelementptr inbounds i8, ptr %ps, i64 7
58+
%gep_s8 = getelementptr inbounds i8, ptr %ps, i64 8
59+
%gep_s9 = getelementptr inbounds i8, ptr %ps, i64 9
60+
%gep_s10 = getelementptr inbounds i8, ptr %ps, i64 10
61+
%gep_s11 = getelementptr inbounds i8, ptr %ps, i64 11
62+
%gep_s12 = getelementptr inbounds i8, ptr %ps, i64 12
63+
%gep_s13 = getelementptr inbounds i8, ptr %ps, i64 13
64+
%gep_s14 = getelementptr inbounds i8, ptr %ps, i64 14
65+
%gep_s15 = getelementptr inbounds i8, ptr %ps, i64 15
66+
67+
store i8 %load0, ptr %gep_s15, align 16
68+
store i8 %load1, ptr %gep_s14, align 16
69+
store i8 %load2, ptr %gep_s13, align 16
70+
store i8 %load3, ptr %gep_s12, align 16
71+
store i8 %load4, ptr %gep_s11, align 16
72+
store i8 %load5, ptr %gep_s10, align 16
73+
store i8 %load6, ptr %gep_s9, align 16
74+
store i8 %load7, ptr %gep_s8, align 16
75+
store i8 %load8, ptr %gep_s7, align 16
76+
store i8 %load9, ptr %gep_s6, align 16
77+
store i8 %load10, ptr %gep_s5, align 16
78+
store i8 %load11, ptr %gep_s4, align 16
79+
store i8 %load12, ptr %gep_s3, align 16
80+
store i8 %load13, ptr %gep_s2, align 16
81+
store i8 %load14, ptr %gep_s1, align 16
82+
store i8 %load15, ptr %gep_s0, align 16
83+
84+
ret void
85+
}

0 commit comments

Comments
 (0)