Skip to content

Commit 9207d63

Browse files
committed
[LV] strip TailFoldingStyle::DataWithoutLaneMask
There is just one usage of TailFoldingStyle::DataWithoutLaneMask in LoopVectorize, introduced by 413a66f ([LV, VP]VP intrinsics support for the Loop Vectorizer + adding new tail-folding mode using EVL.), but this usage is completely unnecessary, as @llvm.get.active.lane.mask is unrelated to EVL. Moreover, SelectionDAG automatically detects if a target supports the @llvm.get.active.lane.mask intrinsic, and lowers it to equivalent instructions on targets where it is not preferred, since 243a532 ([SelectionDAG] Lower @llvm.get.active.lane.mask to setcc).
1 parent c1622ca commit 9207d63

File tree

6 files changed

+77
-149
lines changed

6 files changed

+77
-149
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -249,9 +249,6 @@ static cl::opt<TailFoldingStyle> ForceTailFoldingStyle(
249249
clEnumValN(
250250
TailFoldingStyle::Data, "data",
251251
"Create lane mask for data only, using active.lane.mask intrinsic"),
252-
clEnumValN(TailFoldingStyle::DataWithoutLaneMask,
253-
"data-without-lane-mask",
254-
"Create lane mask with compare/stepvector"),
255252
clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control",
256253
"Create lane mask using active.lane.mask intrinsic, and use "
257254
"it for both data and control flow"),
@@ -1480,12 +1477,10 @@ class LoopVectorizationCostModel {
14801477
// FIXME: implement support for max safe dependency distance.
14811478
Legal->isSafeForAnyVectorWidth();
14821479
if (!EVLIsLegal) {
1483-
// If for some reason EVL mode is unsupported, fallback to
1484-
// DataWithoutLaneMask to try to vectorize the loop with folded tail
1485-
// in a generic way.
1480+
// If for some reason EVL mode is unsupported, fallback to Data to try to
1481+
// vectorize the loop with folded tail in a generic way.
14861482
ChosenTailFoldingStyle =
1487-
std::make_pair(TailFoldingStyle::DataWithoutLaneMask,
1488-
TailFoldingStyle::DataWithoutLaneMask);
1483+
std::make_pair(TailFoldingStyle::Data, TailFoldingStyle::Data);
14891484
LLVM_DEBUG(
14901485
dbgs()
14911486
<< "LV: Preference for VP intrinsics indicated. Will "

llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll

Lines changed: 0 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: opt -S -passes=loop-vectorize -force-tail-folding-style=none < %s | FileCheck %s --check-prefix=NONE
33
; RUN: opt -S -passes=loop-vectorize -force-tail-folding-style=data < %s | FileCheck %s --check-prefix=DATA
4-
; RUN: opt -S -passes=loop-vectorize -force-tail-folding-style=data-without-lane-mask < %s | FileCheck %s --check-prefix=DATA_NO_LANEMASK
54
; RUN: opt -S -passes=loop-vectorize -force-tail-folding-style=data-and-control < %s | FileCheck %s --check-prefix=DATA_AND_CONTROL
65
; RUN: opt -S -passes=loop-vectorize -force-tail-folding-style=data-and-control-without-rt-check < %s | FileCheck %s --check-prefix=DATA_AND_CONTROL_NO_RT_CHECK
76

@@ -97,59 +96,6 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
9796
; DATA: while.end.loopexit:
9897
; DATA-NEXT: ret void
9998
;
100-
; DATA_NO_LANEMASK-LABEL: @simple_memset_tailfold(
101-
; DATA_NO_LANEMASK-NEXT: entry:
102-
; DATA_NO_LANEMASK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
103-
; DATA_NO_LANEMASK-NEXT: [[TMP0:%.*]] = sub i64 -1, [[UMAX]]
104-
; DATA_NO_LANEMASK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
105-
; DATA_NO_LANEMASK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
106-
; DATA_NO_LANEMASK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
107-
; DATA_NO_LANEMASK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
108-
; DATA_NO_LANEMASK: vector.ph:
109-
; DATA_NO_LANEMASK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
110-
; DATA_NO_LANEMASK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
111-
; DATA_NO_LANEMASK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1
112-
; DATA_NO_LANEMASK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]]
113-
; DATA_NO_LANEMASK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
114-
; DATA_NO_LANEMASK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
115-
; DATA_NO_LANEMASK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX]], 1
116-
; DATA_NO_LANEMASK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
117-
; DATA_NO_LANEMASK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
118-
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
119-
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
120-
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
121-
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT4]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
122-
; DATA_NO_LANEMASK-NEXT: br label [[VECTOR_BODY:%.*]]
123-
; DATA_NO_LANEMASK: vector.body:
124-
; DATA_NO_LANEMASK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VECTOR_BODY]] ]
125-
; DATA_NO_LANEMASK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0
126-
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[INDEX1]], i64 0
127-
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
128-
; DATA_NO_LANEMASK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
129-
; DATA_NO_LANEMASK-NEXT: [[TMP11:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
130-
; DATA_NO_LANEMASK-NEXT: [[VEC_IV:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT3]], [[TMP11]]
131-
; DATA_NO_LANEMASK-NEXT: [[TMP12:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
132-
; DATA_NO_LANEMASK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP9]]
133-
; DATA_NO_LANEMASK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0
134-
; DATA_NO_LANEMASK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT5]], ptr [[TMP14]], i32 4, <vscale x 4 x i1> [[TMP12]])
135-
; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT6]] = add i64 [[INDEX1]], [[TMP16]]
136-
; DATA_NO_LANEMASK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]]
137-
; DATA_NO_LANEMASK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
138-
; DATA_NO_LANEMASK: middle.block:
139-
; DATA_NO_LANEMASK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
140-
; DATA_NO_LANEMASK: scalar.ph:
141-
; DATA_NO_LANEMASK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
142-
; DATA_NO_LANEMASK-NEXT: br label [[WHILE_BODY:%.*]]
143-
; DATA_NO_LANEMASK: while.body:
144-
; DATA_NO_LANEMASK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
145-
; DATA_NO_LANEMASK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]]
146-
; DATA_NO_LANEMASK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4
147-
; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1
148-
; DATA_NO_LANEMASK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]]
149-
; DATA_NO_LANEMASK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
150-
; DATA_NO_LANEMASK: while.end.loopexit:
151-
; DATA_NO_LANEMASK-NEXT: ret void
152-
;
15399
; DATA_AND_CONTROL-LABEL: @simple_memset_tailfold(
154100
; DATA_AND_CONTROL-NEXT: entry:
155101
; DATA_AND_CONTROL-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)

llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@
99
define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
1010
; CHECK-LABEL: VPlan 'Initial VPlan for VF={2,4},UF>=1' {
1111
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
12-
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
13-
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
12+
; CHECK-NEXT: Live-in vp<[[VTC:%.*]]> = vector-trip-count
1413
; CHECK-NEXT: Live-in ir<%N> = original trip-count
1514
; CHECK-EMPTY:
1615
; CHECK-NEXT: vector.ph:
@@ -19,17 +18,16 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
1918
; CHECK-NEXT: <x1> vector loop: {
2019
; CHECK-NEXT: vector.body:
2120
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_INC:%.*]]>
22-
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>
23-
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]>
21+
; CHECK-NEXT: vp<[[STEPS:%.*]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
22+
; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = active lane mask vp<[[STEPS]]>, ir<%N>
2423
; CHECK-NEXT: Successor(s): pred.store
2524
; CHECK-EMPTY:
2625
; CHECK-NEXT: <xVFxUF> pred.store: {
2726
; CHECK-NEXT: pred.store.entry:
28-
; CHECK-NEXT: BRANCH-ON-MASK vp<[[CMP]]>
27+
; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]>
2928
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
3029
; CHECK-EMPTY:
3130
; CHECK-NEXT: pred.store.if:
32-
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
3331
; CHECK-NEXT: REPLICATE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<[[STEPS]]>
3432
; CHECK-NEXT: REPLICATE ir<%0> = load ir<%arrayidx>
3533
; CHECK-NEXT: REPLICATE ir<%arrayidx2> = getelementptr inbounds ir<%c>, vp<[[STEPS]]>
@@ -45,7 +43,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
4543
; CHECK-NEXT: Successor(s): for.body.2
4644
; CHECK-EMPTY:
4745
; CHECK-NEXT: for.body.2:
48-
; CHECK-NEXT: EMIT vp<[[CAN_INC:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]>
46+
; CHECK-NEXT: EMIT vp<[[CAN_INC]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]>
4947
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_INC]]>, vp<[[VTC]]>
5048
; CHECK-NEXT: No successors
5149
; CHECK-NEXT: }

0 commit comments

Comments
 (0)