Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,16 @@ static cl::opt<unsigned> MaxProfitableLoadStride(
"slp-max-stride", cl::init(8), cl::Hidden,
cl::desc("The maximum stride, considered to be profitable."));

static cl::opt<bool>
DisableTreeReorder("slp-disable-tree-reorder", cl::init(false), cl::Hidden,
cl::desc("Disable tree reordering even if it is "
"profitable. Used for testing only."));

static cl::opt<bool>
ForceStridedLoads("slp-force-strided-loads", cl::init(false), cl::Hidden,
cl::desc("Generate strided loads even if they are not "
"profitable. Used for testing only."));

static cl::opt<bool>
ViewSLPTree("view-slp-tree", cl::Hidden,
cl::desc("Display the SLP trees with Graphviz"));
Expand Down Expand Up @@ -7770,6 +7780,9 @@ static void combineOrders(MutableArrayRef<unsigned> Order,
}

bool BoUpSLP::isProfitableToReorder() const {
if (DisableTreeReorder)
return false;

constexpr unsigned TinyVF = 2;
constexpr unsigned TinyTree = 10;
constexpr unsigned PhiOpsLimit = 12;
Expand Down Expand Up @@ -13027,7 +13040,7 @@ void BoUpSLP::transformNodes() {
InstructionCost StridedCost = TTI->getStridedMemoryOpCost(
Instruction::Load, VecTy, BaseLI->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind, BaseLI);
if (StridedCost < OriginalVecCost)
if (StridedCost < OriginalVecCost || ForceStridedLoads)
// Strided load is more profitable than consecutive load + reverse -
// transform the node to strided load.
E.State = TreeEntry::StridedVectorize;
Expand Down
85 changes: 85 additions & 0 deletions llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-load.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=riscv64 -mattr=+m,+v \
; RUN: -passes=slp-vectorizer \
; RUN: -slp-disable-tree-reorder=true -slp-force-strided-loads=true \
; RUN: -S < %s | FileCheck %s

define void @const_stride_reversed(ptr %pl, ptr %ps) {
; CHECK-LABEL: define void @const_stride_reversed(
; CHECK-SAME: ptr [[PL:%.*]], ptr [[PS:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[GEP_L15:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 15
; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.p0.i64(ptr align 16 [[GEP_L15]], i64 -1, <16 x i1> splat (i1 true), i32 16)
; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[GEP_S0]], align 16
; CHECK-NEXT: ret void
;
%gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
%gep_l1 = getelementptr inbounds i8, ptr %pl, i64 1
%gep_l2 = getelementptr inbounds i8, ptr %pl, i64 2
%gep_l3 = getelementptr inbounds i8, ptr %pl, i64 3
%gep_l4 = getelementptr inbounds i8, ptr %pl, i64 4
%gep_l5 = getelementptr inbounds i8, ptr %pl, i64 5
%gep_l6 = getelementptr inbounds i8, ptr %pl, i64 6
%gep_l7 = getelementptr inbounds i8, ptr %pl, i64 7
%gep_l8 = getelementptr inbounds i8, ptr %pl, i64 8
%gep_l9 = getelementptr inbounds i8, ptr %pl, i64 9
%gep_l10 = getelementptr inbounds i8, ptr %pl, i64 10
%gep_l11 = getelementptr inbounds i8, ptr %pl, i64 11
%gep_l12 = getelementptr inbounds i8, ptr %pl, i64 12
%gep_l13 = getelementptr inbounds i8, ptr %pl, i64 13
%gep_l14 = getelementptr inbounds i8, ptr %pl, i64 14
%gep_l15 = getelementptr inbounds i8, ptr %pl, i64 15

%load0 = load i8, ptr %gep_l0 , align 16
%load1 = load i8, ptr %gep_l1 , align 16
%load2 = load i8, ptr %gep_l2 , align 16
%load3 = load i8, ptr %gep_l3 , align 16
%load4 = load i8, ptr %gep_l4 , align 16
%load5 = load i8, ptr %gep_l5 , align 16
%load6 = load i8, ptr %gep_l6 , align 16
%load7 = load i8, ptr %gep_l7 , align 16
%load8 = load i8, ptr %gep_l8 , align 16
%load9 = load i8, ptr %gep_l9 , align 16
%load10 = load i8, ptr %gep_l10, align 16
%load11 = load i8, ptr %gep_l11, align 16
%load12 = load i8, ptr %gep_l12, align 16
%load13 = load i8, ptr %gep_l13, align 16
%load14 = load i8, ptr %gep_l14, align 16
%load15 = load i8, ptr %gep_l15, align 16

%gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
%gep_s1 = getelementptr inbounds i8, ptr %ps, i64 1
%gep_s2 = getelementptr inbounds i8, ptr %ps, i64 2
%gep_s3 = getelementptr inbounds i8, ptr %ps, i64 3
%gep_s4 = getelementptr inbounds i8, ptr %ps, i64 4
%gep_s5 = getelementptr inbounds i8, ptr %ps, i64 5
%gep_s6 = getelementptr inbounds i8, ptr %ps, i64 6
%gep_s7 = getelementptr inbounds i8, ptr %ps, i64 7
%gep_s8 = getelementptr inbounds i8, ptr %ps, i64 8
%gep_s9 = getelementptr inbounds i8, ptr %ps, i64 9
%gep_s10 = getelementptr inbounds i8, ptr %ps, i64 10
%gep_s11 = getelementptr inbounds i8, ptr %ps, i64 11
%gep_s12 = getelementptr inbounds i8, ptr %ps, i64 12
%gep_s13 = getelementptr inbounds i8, ptr %ps, i64 13
%gep_s14 = getelementptr inbounds i8, ptr %ps, i64 14
%gep_s15 = getelementptr inbounds i8, ptr %ps, i64 15

store i8 %load0, ptr %gep_s15, align 16
store i8 %load1, ptr %gep_s14, align 16
store i8 %load2, ptr %gep_s13, align 16
store i8 %load3, ptr %gep_s12, align 16
store i8 %load4, ptr %gep_s11, align 16
store i8 %load5, ptr %gep_s10, align 16
store i8 %load6, ptr %gep_s9, align 16
store i8 %load7, ptr %gep_s8, align 16
store i8 %load8, ptr %gep_s7, align 16
store i8 %load9, ptr %gep_s6, align 16
store i8 %load10, ptr %gep_s5, align 16
store i8 %load11, ptr %gep_s4, align 16
store i8 %load12, ptr %gep_s3, align 16
store i8 %load13, ptr %gep_s2, align 16
store i8 %load14, ptr %gep_s1, align 16
store i8 %load15, ptr %gep_s0, align 16

ret void
}
Loading